Update LLVM for rebase to r212749.

Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
author: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
committer: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
commit: c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree: 81b7dd2bb4370a392f31d332a566c903b5744764 /test
parent: 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download: external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
1021 files changed, 39653 insertions, 4439 deletions
diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll
new file mode 100644
index 0000000..682e4b6
--- /dev/null
+++ b/test/Analysis/BasicAA/cs-cs.ll
@@ -0,0 +1,221 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "arm-apple-ios"
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
+entry:
+  %q = getelementptr i8* %p, i64 16
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %c = add <8 x i16> %a, %b
+  ret <8 x i16> %c
+
+; CHECK-LABEL: Function: test1:
+
+; CHECK: NoAlias:      i8* %p, i8* %q
+; CHECK: Just Ref:  Ptr: i8* %p        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:  Ptr: i8* %q        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:  Ptr: i8* %p        <->  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Both ModRef:  Ptr: i8* %q     <->  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref:  Ptr: i8* %p        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:  Ptr: i8* %q        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
+; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+}
+
+define void @test2(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2:
+
+; CHECK:   MayAlias:     i8* %P, i8* %Q
+; CHECK:   Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK:   Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK:   Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK:   Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK:   Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK:   Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test2a(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2a:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test2b(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  %R = getelementptr i8* %P, i64 12
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2b:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: NoAlias:      i8* %P, i8* %R
+; CHECK: NoAlias:      i8* %Q, i8* %R
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test2c(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  %R = getelementptr i8* %P, i64 11
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2c:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: NoAlias:      i8* %P, i8* %R
+; CHECK: NoAlias:      i8* %Q, i8* %R
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test2d(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  %R = getelementptr i8* %P, i64 -12
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2d:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: NoAlias:      i8* %P, i8* %R
+; CHECK: NoAlias:      i8* %Q, i8* %R
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test2e(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  %R = getelementptr i8* %P, i64 -11
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test2e:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: NoAlias:      i8* %P, i8* %R
+; CHECK: NoAlias:      i8* %Q, i8* %R
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %R        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+define void @test3(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test3:
+
+; CHECK: MayAlias:     i8* %P, i8* %Q
+; CHECK: Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+}
+
+define void @test3a(i8* noalias %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test3a:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+}
+
+define void @test4(i8* %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test4:
+
+; CHECK: NoAlias:      i8* %P, i8* %Q
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+; CHECK: NoModRef:  Ptr: i8* %Q        <->  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+; CHECK: Just Mod:  Ptr: i8* %P        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref:  Ptr: i8* %Q        <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+}
+
+define void @test5(i8* %P, i8* %Q, i8* %R) nounwind ssp {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test5:
+
+; CHECK: MayAlias:     i8* %P, i8* %Q
+; CHECK: MayAlias:     i8* %P, i8* %R
+; CHECK: MayAlias:     i8* %Q, i8* %R
+; CHECK: Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %R     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %P     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %Q     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %R     <->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Analysis/CostModel/AArch64/lit.local.cfg b/test/Analysis/CostModel/AArch64/lit.local.cfg
index c420349..7184443 100644
--- a/test/Analysis/CostModel/AArch64/lit.local.cfg
+++ b/test/Analysis/CostModel/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Analysis/CostModel/ARM/lit.local.cfg
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Analysis/CostModel/PowerPC/lit.local.cfg
+++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
new file mode 100644
index 0000000..2e162f0
--- /dev/null
+++ b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
@@ -0,0 +1,347 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+sse3,+ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSSE3
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+
+; Verify the cost model for alternate shuffles.
+
+; shufflevector instructions with illegal 64-bit vector types.
+; 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+; 64-bit packed float vectors (v2f32) are widened to type v4f32.
+
+define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) {
+  %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b) {
+  %1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <2 x i32> @test_v2i32_2(<2 x i32> %a, <2 x i32> %b) {
+  %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32_2':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <2 x float> @test_v2f32_2(<2 x float> %a, <2 x float> %b) {
+  %1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32_2':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+; Test shuffles on packed vectors of two elements.
+
+define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
+  %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
+  %1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <2 x i64> @test_v2i64_2(<2 x i64> %a, <2 x i64> %b) {
+  %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64_2':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <2 x double> @test_v2f64_2(<2 x double> %a, <2 x double> %b) {
+  %1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64_2':
+; SSE2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+; Test shuffles on packed vectors of four elements.
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x i32> @test_v4i32_2(<4 x i32> %a, <4 x i32> %b) {
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32_2':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x float> @test_v4f32_2(<4 x float> %a, <4 x float> %b) {
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32_2':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
+  %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x i64> @test_v4i64_2(<4 x i64> %a, <4 x i64> %b) {
+  %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i64> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64_2':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
+  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <4 x double> @test_v4f64_2(<4 x double> %a, <4 x double> %b) {
+  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x double> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64_2':
+; SSE2: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+; Test shuffles on packed vectors of eight elements.
+define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
+  %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x i16> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) {
+  %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i16> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
+  %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <8 x i32> @test_v8i32_2(<8 x i32> %a, <8 x i32> %b) {
+  %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i32> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32_2':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
+  %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <8 x float> @test_v8f32_2(<8 x float> %a, <8 x float> %b) {
+  %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x float> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32_2':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 4 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+; Test shuffles on packed vectors of sixteen elements.
+define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) {
+  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  ret <16 x i8> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8':
+; SSE2: Cost Model: {{.*}} 48 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+
+
+define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
+  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
+  ret <16 x i8> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2':
+; SSE2: Cost Model: {{.*}} 48 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+
+
+define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
+  %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  ret <16 x i16> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16':
+; SSE2: Cost Model: {{.*}} 16 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 5 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+
+define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
+  %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
+  ret <16 x i16> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2':
+; SSE2: Cost Model: {{.*}} 16 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 5 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+
+define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
+  %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
+  ret <32 x i8> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8':
+; SSE2: Cost Model: {{.*}} 96 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+
+
+define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
+  %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
+  ret <32 x i8> %1
+}
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2':
+; SSE2: Cost Model: {{.*}} 96 for instruction:   %1 = shufflevector
+; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+
diff --git a/test/Analysis/CostModel/X86/lit.local.cfg b/test/Analysis/CostModel/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Analysis/CostModel/X86/lit.local.cfg
+++ b/test/Analysis/CostModel/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index 48bec08..5a88c4c 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -8,6 +8,15 @@
 ;       A[i][j] = 1.0;
 ; }
 
+; Inst:  %val = load double* %arrayidx
+; In Loop with Header: for.j
+; AddRec: {{0,+,(%m * sizeof(double))}<%for.i>,+,sizeof(double)}<%for.j>
+; Base offset: %A
+; ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
+; ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+
+; Inst:  store double %val, double* %arrayidx
+; In Loop with Header: for.j
 ; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
@@ -26,7 +35,8 @@ for.j:
   %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
   %vlaarrayidx.sum = add i64 %j, %tmp
   %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
-  store double 1.0, double* %arrayidx
+  %val = load double* %arrayidx
+  store double %val, double* %arrayidx
   %j.inc = add nsw i64 %j, 1
   %j.exitcond = icmp eq i64 %j.inc, %m
   br i1 %j.exitcond, label %for.i.inc, label %for.j
diff --git a/test/Assembler/addrspacecast-alias.ll b/test/Assembler/addrspacecast-alias.ll
index 052a141..d751659 100644
--- a/test/Assembler/addrspacecast-alias.ll
+++ b/test/Assembler/addrspacecast-alias.ll
@@ -3,5 +3,5 @@
 ; Test that global aliases are allowed to be constant addrspacecast
 
 @i = internal addrspace(1) global i8 42
-@ia = alias internal addrspace(2) i8 addrspace(3)*, i8 addrspace(1)* @i
-; CHECK: @ia = alias internal addrspace(2) i8 addrspace(3)*, i8 addrspace(1)* @i
+@ia = alias internal addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
+; CHECK: @ia = alias internal addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
diff --git a/test/Assembler/alias-addrspace.ll b/test/Assembler/alias-addrspace.ll
deleted file mode 100644
index 6d378e4..0000000
--- a/test/Assembler/alias-addrspace.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-@foo = global i32 42
-@bar = alias internal addrspace(1) i32* @foo
-
-CHECK: error: A type is required if addrspace is given
diff --git a/test/Assembler/alias-to-alias.ll b/test/Assembler/alias-to-alias.ll
deleted file mode 100644
index 1ea99bb..0000000
--- a/test/Assembler/alias-to-alias.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-; CHECK: Alias must point to function or variable
-
-@b1 = alias i32* @c1
-@c1 = alias i32* @b1
diff --git a/test/Assembler/alias-to-alias2.ll b/test/Assembler/alias-to-alias2.ll
deleted file mode 100644
index a8a0196..0000000
--- a/test/Assembler/alias-to-alias2.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-; CHECK: error: Alias is pointed by alias b1
-
-@g = global i32 42
-
-@b1 = alias i32* @c1
-@c1 = alias i32* @g
diff --git a/test/Assembler/alias-type.ll b/test/Assembler/alias-type.ll
deleted file mode 100644
index ead3e95..0000000
--- a/test/Assembler/alias-type.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-@foo = global i32 42
-@bar = alias i32 @foo
-
-CHECK: error: An alias must have pointer type
diff --git a/test/Assembler/atomic.ll b/test/Assembler/atomic.ll
index a2ae58e..d7ccd99 100644
--- a/test/Assembler/atomic.ll
+++ b/test/Assembler/atomic.ll
@@ -16,6 +16,8 @@ define void @f(i32* %x) {
   cmpxchg volatile i32* %x, i32 0, i32 1 acq_rel acquire
   ; CHECK: cmpxchg i32* %x, i32 42, i32 0 acq_rel monotonic
   cmpxchg i32* %x, i32 42, i32 0 acq_rel monotonic
+  ; CHECK: cmpxchg weak i32* %x, i32 13, i32 0 seq_cst monotonic
+  cmpxchg weak i32* %x, i32 13, i32 0 seq_cst monotonic
   ; CHECK: atomicrmw add i32* %x, i32 10 seq_cst
   atomicrmw add i32* %x, i32 10 seq_cst
   ; CHECK: atomicrmw volatile xchg  i32* %x, i32 10 monotonic
diff --git a/test/Assembler/invalid-comdat.ll b/test/Assembler/invalid-comdat.ll
new file mode 100644
index 0000000..987e1e1
--- /dev/null
+++ b/test/Assembler/invalid-comdat.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+@v = global i32 0, comdat $v
+; CHECK: use of undefined comdat '$v'
diff --git a/test/Assembler/invalid-comdat2.ll b/test/Assembler/invalid-comdat2.ll
new file mode 100644
index 0000000..ed656ef
--- /dev/null
+++ b/test/Assembler/invalid-comdat2.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+$v = comdat any
+$v = comdat any
+; CHECK: redefinition of comdat '$v'
diff --git a/test/Assembler/upgrade-loop-metadata.ll b/test/Assembler/upgrade-loop-metadata.ll
new file mode 100644
index 0000000..f664bdf
--- /dev/null
+++ b/test/Assembler/upgrade-loop-metadata.ll
@@ -0,0 +1,41 @@
+; Test to make sure loop vectorizer metadata is automatically upgraded.
+;
+; Run using opt as well to ensure that the metadata is upgraded when parsing
+; assembly.
+;
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: opt -S < %s | FileCheck %s
+
+define void @_Z28loop_with_vectorize_metadatav() {
+entry:
+  %i = alloca i32, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 16
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %1 = load i32* %i, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: !{metadata !"llvm.loop.vectorize.unroll", i32 4}
+; CHECK: !{metadata !"llvm.loop.vectorize.width", i32 8}
+; CHECK: !{metadata !"llvm.loop.vectorize.enable", i1 true}
+
+!0 = metadata !{metadata !"clang version 3.5.0 (trunk 211528)"}
+!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !4}
+!2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 4}
+!3 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
+!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index 26cd129..0a2283a 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -46,7 +46,7 @@ let test_target_data () =
   let layout = "e-p:32:32-f64:32:64-v64:32:64-v128:32:128-n32-S32" in
   let dl     = DL.of_string layout in
   let sty    = struct_type context [| i32_type; i64_type |] in
-  
+
   assert_equal (DL.as_string dl) layout;
   assert_equal (DL.byte_order dl) Endian.Little;
   assert_equal (DL.pointer_size dl) 4;
@@ -86,7 +86,8 @@ let test_target_machine () =
   assert_equal (TM.triple machine) (Target.default_triple ());
   assert_equal (TM.cpu machine) "";
   assert_equal (TM.features machine) "";
-  ignore (TM.data_layout machine)
+  ignore (TM.data_layout machine);
+  TM.set_verbose_asm true machine
 
 
 (*===-- Code Emission -----------------------------------------------------===*)
diff --git a/test/Bindings/llvm-c/lit.local.cfg b/test/Bindings/llvm-c/lit.local.cfg
index d83ebee..75b22c0 100644
--- a/test/Bindings/llvm-c/lit.local.cfg
+++ b/test/Bindings/llvm-c/lit.local.cfg
@@ -1,5 +1,4 @@
-targets = set(config.root.targets_to_build.split())
-if not "X86" in targets:
+if not "X86" in config.root.targets:
     config.unsupported = True
-if not "ARM" in targets:
+if not "ARM" in config.root.targets:
     config.unsupported = True
diff --git a/test/Bitcode/atomic.ll b/test/Bitcode/atomic.ll
new file mode 100644
index 0000000..37815a7
--- /dev/null
+++ b/test/Bitcode/atomic.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as %s -o - | llvm-dis | FileCheck %s
+
+define void @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+  cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  ; CHECK: cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+
+  cmpxchg volatile i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  ; CHECK: cmpxchg volatile i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+
+  cmpxchg weak i32* %addr, i32 %desired, i32 %new acq_rel acquire
+  ; CHECK: cmpxchg weak i32* %addr, i32 %desired, i32 %new acq_rel acquire
+
+  cmpxchg weak volatile i32* %addr, i32 %desired, i32 %new singlethread release monotonic
+  ; CHECK: cmpxchg weak volatile i32* %addr, i32 %desired, i32 %new singlethread release monotonic
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 02e1bb1..49366de 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -203,7 +203,7 @@ define void @f34()
 ; CHECK: define void @f34()
 {
         call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #24
+; CHECK: call void @nobuiltin() #25
         ret void;
 }
 
@@ -223,6 +223,12 @@ define nonnull i8* @f37(i8* nonnull %a) {
         ret i8* %a
 }
 
+define void @f38() unnamed_addr jumptable {
+; CHECK: define void @f38() unnamed_addr #24
+    call void bitcast (void (i8*)* @f36 to void ()*)()
+    unreachable
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -247,5 +253,5 @@ define nonnull i8* @f37(i8* nonnull %a) {
 ; CHECK: attributes #21 = { sspstrong }
 ; CHECK: attributes #22 = { minsize }
 ; CHECK: attributes #23 = { noinline optnone }
-; CHECK: attributes #24 = { nobuiltin }
-
+; CHECK: attributes #24 = { jumptable }
+; CHECK: attributes #25 = { nobuiltin }
diff --git a/test/Bitcode/memInstructions.3.2.ll b/test/Bitcode/memInstructions.3.2.ll
index 21c3deb..e4cb6bd 100644
--- a/test/Bitcode/memInstructions.3.2.ll
+++ b/test/Bitcode/memInstructions.3.2.ll
@@ -223,68 +223,88 @@ define void @cmpxchg(i32* %ptr,i32 %cmp,i32 %new){
 entry:
   ;cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>
 
-; CHECK: %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK-NEXT: %res1 = extractvalue { i32, i1 } [[TMP]], 0
   %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
   
-; CHECK-NEXT: %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK-NEXT: %res2 = extractvalue { i32, i1 } [[TMP]], 0
   %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
   
-; CHECK-NEXT: %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: %res3 = extractvalue { i32, i1 } [[TMP]], 0
   %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
   
-; CHECK-NEXT: %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: %res4 = extractvalue { i32, i1 } [[TMP]], 0
   %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
   
   
-; CHECK-NEXT: %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: %res5 = extractvalue { i32, i1 } [[TMP]], 0
   %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
   
-; CHECK-NEXT: %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: %res6 = extractvalue { i32, i1 } [[TMP]], 0
   %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
   
-; CHECK-NEXT: %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: %res7 = extractvalue { i32, i1 } [[TMP]], 0
   %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
   
-; CHECK-NEXT: %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: %res8 = extractvalue { i32, i1 } [[TMP]], 0
   %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
   
   
-; CHECK-NEXT: %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: %res9 = extractvalue { i32, i1 } [[TMP]], 0
   %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
   
-; CHECK-NEXT: %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: %res10 = extractvalue { i32, i1 } [[TMP]], 0
   %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
   
-; CHECK-NEXT: %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: %res11 = extractvalue { i32, i1 } [[TMP]], 0
   %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
   
-; CHECK-NEXT: %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: %res12 = extractvalue { i32, i1 } [[TMP]], 0
   %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
   
   
-; CHECK-NEXT: %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: %res13 = extractvalue { i32, i1 } [[TMP]], 0
   %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
   
-; CHECK-NEXT: %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: %res14 = extractvalue { i32, i1 } [[TMP]], 0
   %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
   
-; CHECK-NEXT: %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: %res15 = extractvalue { i32, i1 } [[TMP]], 0
   %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
   
-; CHECK-NEXT: %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: %res16 = extractvalue { i32, i1 } [[TMP]], 0
   %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
   
   
-; CHECK-NEXT: %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: %res17 = extractvalue { i32, i1 } [[TMP]], 0
   %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
   
-; CHECK-NEXT: %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: %res18 = extractvalue { i32, i1 } [[TMP]], 0
   %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
   
-; CHECK-NEXT: %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: %res19 = extractvalue { i32, i1 } [[TMP]], 0
   %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
   
-; CHECK-NEXT: %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: %res20 = extractvalue { i32, i1 } [[TMP]], 0
   %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
 
   ret void
diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll
index 4ef47c0..7a0eea2 100644
--- a/test/Bitcode/old-aliases.ll
+++ b/test/Bitcode/old-aliases.ll
@@ -10,13 +10,13 @@
 ; CHECK: @v2 = global [1 x i32] zeroinitializer
 
 @v3 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @v3 = alias i16, i32* @v1
+; CHECK: @v3 = alias bitcast (i32* @v1 to i16*)
 
 @v4 = alias getelementptr ([1 x i32]* @v2, i32 0, i32 0)
-; CHECK: @v4 = alias i32, [1 x i32]* @v2
+; CHECK: @v4 = alias getelementptr inbounds ([1 x i32]* @v2, i32 0, i32 0)
 
 @v5 = alias i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
-; CHECK: @v5 = alias addrspace(2) i32, i32* @v1
+; CHECK: @v5 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
 
 @v6 = alias i16* @v3
-; CHECK: @v6 = alias i16, i32* @v1
+; CHECK: @v6 = alias i16* @v3
diff --git a/test/Bitcode/upgrade-loop-metadata.ll b/test/Bitcode/upgrade-loop-metadata.ll
new file mode 100644
index 0000000..1a45056
--- /dev/null
+++ b/test/Bitcode/upgrade-loop-metadata.ll
@@ -0,0 +1,37 @@
+; Test to make sure loop vectorizer metadata is automatically upgraded.
+;
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+define void @_Z28loop_with_vectorize_metadatav() {
+entry:
+  %i = alloca i32, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 16
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %1 = load i32* %i, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: !{metadata !"llvm.loop.vectorize.unroll", i32 4}
+; CHECK: !{metadata !"llvm.loop.vectorize.width", i32 8}
+; CHECK: !{metadata !"llvm.loop.vectorize.enable", i1 true}
+
+!0 = metadata !{metadata !"clang version 3.5.0 (trunk 211528)"}
+!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !4}
+!2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 4}
+!3 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
+!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
diff --git a/test/Bitcode/upgrade-loop-metadata.ll.bc b/test/Bitcode/upgrade-loop-metadata.ll.bc
new file mode 100644
index 0000000..3f218cb
--- /dev/null
+++ b/test/Bitcode/upgrade-loop-metadata.ll.bc
diff --git a/test/Bitcode/weak-cmpxchg-upgrade.ll b/test/Bitcode/weak-cmpxchg-upgrade.ll
new file mode 100644
index 0000000..dbcd150
--- /dev/null
+++ b/test/Bitcode/weak-cmpxchg-upgrade.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; cmpxchg-upgrade.ll.bc was produced by running a version of llvm-as from just
+; before the IR change on this file.
+
+define i32 @test(i32* %addr, i32 %old, i32 %new) {
+; CHECK:  [[TMP:%.*]] = cmpxchg i32* %addr, i32 %old, i32 %new seq_cst monotonic
+; CHECK:  %val = extractvalue { i32, i1 } [[TMP]], 0
+  %val = cmpxchg i32* %addr, i32 %old, i32 %new seq_cst monotonic
+  ret i32 %val
+}
+
+define i32 @test(i32* %addr, i32 %old, i32 %new) {
+  ret i1 %val
+}
diff --git a/test/Bitcode/weak-cmpxchg-upgrade.ll.bc b/test/Bitcode/weak-cmpxchg-upgrade.ll.bc
new file mode 100644
index 0000000..f713c31
--- /dev/null
+++ b/test/Bitcode/weak-cmpxchg-upgrade.ll.bc
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
new file mode 100644
index 0000000..2df9c37
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O3 -mcpu=cortex-a53 -mtriple=aarch64--linux-gnu %s -o - | FileCheck %s
+; PR20188: don't crash when merging sexts.
+
+; CHECK: foo:
+define void @foo() unnamed_addr align 2 {
+entry:
+  br label %invoke.cont145
+
+invoke.cont145:
+  %or.cond = and i1 undef, false
+  br i1 %or.cond, label %if.then274, label %invoke.cont145
+
+if.then274:
+  %0 = load i32* null, align 4
+  br i1 undef, label %invoke.cont291, label %if.else313
+
+invoke.cont291:
+  %idxprom.i.i.i605 = sext i32 %0 to i64
+  %arrayidx.i.i.i607 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i605
+  %idxprom.i.i.i596 = sext i32 %0 to i64
+  %arrayidx.i.i.i598 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i596
+  br label %if.end356
+
+if.else313:
+  %cmp314 = fcmp olt double undef, 0.000000e+00
+  br i1 %cmp314, label %invoke.cont317, label %invoke.cont353
+
+invoke.cont317:
+  br i1 undef, label %invoke.cont326, label %invoke.cont334
+
+invoke.cont326:
+  %idxprom.i.i.i587 = sext i32 %0 to i64
+  %arrayidx.i.i.i589 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i587
+  %sub329 = fsub fast double undef, undef
+  br label %invoke.cont334
+
+invoke.cont334:
+  %lo.1 = phi double [ %sub329, %invoke.cont326 ], [ undef, %invoke.cont317 ]
+  br i1 undef, label %invoke.cont342, label %if.end356
+
+invoke.cont342:
+  %idxprom.i.i.i578 = sext i32 %0 to i64
+  %arrayidx.i.i.i580 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i578
+  br label %if.end356
+
+invoke.cont353:
+  %idxprom.i.i.i572 = sext i32 %0 to i64
+  %arrayidx.i.i.i574 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i572
+  br label %if.end356
+
+if.end356:
+  %lo.2 = phi double [ 0.000000e+00, %invoke.cont291 ], [ %lo.1, %invoke.cont342 ], [ undef, %invoke.cont353 ], [ %lo.1, %invoke.cont334 ]
+  call void null(i32 %0, double %lo.2)
+  unreachable
+}
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
new file mode 100644
index 0000000..ee90d19
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "arm64-apple-macosx10.9"
+
+; Check that sexts get promoted above adds.
+define void @foo(i32* nocapture %a, i32 %i) {
+entry:
+; CHECK-LABEL: _foo:
+; CHECK: add
+; CHECK-NEXT: ldp
+; CHECK-NEXT: add
+; CHECK-NEXT: str
+; CHECK-NEXT: ret
+  %add = add nsw i32 %i, 1
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add1 = add nsw i32 %i, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds i32* %a, i64 %idxprom2
+  %1 = load i32* %arrayidx3, align 4
+  %add4 = add nsw i32 %1, %0
+  %idxprom5 = sext i32 %i to i64
+  %arrayidx6 = getelementptr inbounds i32* %a, i64 %idxprom5
+  store i32 %add4, i32* %arrayidx6, align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index a2266b1..ceea8a0 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
diff --git a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
index d1840d3..7da2d2c 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
@@ -2,14 +2,14 @@
 ; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX
 ; <rdar://problem/11392109>
 
-define hidden void @t() optsize ssp {
+define hidden void @t(i64* %addr) optsize ssp {
 entry:
-  store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8
+  store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* %addr, align 8
 ; CHECK:             adrp    x{{[0-9]+}}, _x@GOTPAGE
 ; CHECK:        ldr     x{{[0-9]+}}, [x{{[0-9]+}}, _x@GOTPAGEOFF]
 ; CHECK-NEXT:        and     x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff
 ; CHECK-NEXT:        str     x{{[0-9]+}}, [x{{[0-9]+}}]
-  unreachable
+  ret void
 }
 
 declare i64 @x(i32) optsize
diff --git a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
index a73b707..a73b707 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
+++ b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
diff --git a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
index 1b2d543..1b2d543 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
+++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index b713f0d..ccf1371 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -101,3 +101,11 @@ define fp128 @test_fp128([8 x float] %arg0, fp128 %arg1) {
 ; CHECK: ldr {{q[0-9]+}}, [sp]
   ret fp128 %arg1
 }
+
+; Check if VPR can be correctly pass by stack.
+define <2 x double> @test_vreg_stack([8 x <2 x double>], <2 x double> %varg_stack) {
+entry:
+; CHECK-LABEL: test_vreg_stack:
+; CHECK: ldr {{q[0-9]+}}, [sp]
+  ret <2 x double> %varg_stack;
+}
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index e2de434..a955029 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
+; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
 ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
+; REQUIRES: asserts
 target triple = "arm64-apple-darwin"
 
 ; rdar://9932559
@@ -8,15 +9,15 @@ entry:
 ; CHECK-LABEL: i8i16callee:
 ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
 ; They are i8, i16, i8 and i8.
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #5]
-; CHECK: ldrsh	{{w[0-9]+}}, [sp, #2]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #5]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: ldrsh {{w[0-9]+}}, [sp, #2]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp]
 ; FAST-LABEL: i8i16callee:
-; FAST: ldrb  {{w[0-9]+}}, [sp, #5]
-; FAST: ldrb  {{w[0-9]+}}, [sp, #4]
-; FAST: ldrh  {{w[0-9]+}}, [sp, #2]
-; FAST: ldrb  {{w[0-9]+}}, [sp]
+; FAST-DAG: ldrsb  {{w[0-9]+}}, [sp, #5]
+; FAST-DAG: ldrsb  {{w[0-9]+}}, [sp, #4]
+; FAST-DAG: ldrsh  {{w[0-9]+}}, [sp, #2]
+; FAST-DAG: ldrsb  {{w[0-9]+}}, [sp]
   %conv = sext i8 %a4 to i64
   %conv3 = sext i16 %a5 to i64
   %conv8 = sext i8 %b1 to i64
@@ -44,10 +45,10 @@ entry:
 ; CHECK: i8i16caller
 ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
 ; They are i8, i16, i8 and i8.
-; CHECK: strb {{w[0-9]+}}, [sp, #5]
-; CHECK: strb {{w[0-9]+}}, [sp, #4]
-; CHECK: strh {{w[0-9]+}}, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp]
 ; CHECK: bl
 ; FAST: i8i16caller
 ; FAST: strb {{w[0-9]+}}, [sp]
diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
index 34d6287..38661a5 100644
--- a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
+++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | FileCheck %s
+; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 ; Check that ANDS (tst) is not merged with ADD when the immediate
 ; is not 0.
 ; <rdar://problem/16693089>
@@ -8,18 +8,18 @@ target triple = "arm64-apple-ios"
 ; CHECK-LABEL: tst1:
 ; CHECK: add [[REG:w[0-9]+]], w{{[0-9]+}}, #1
 ; CHECK: tst [[REG]], #0x1
-define void @tst1() {
+define void @tst1(i1 %tst, i32 %true) {
 entry:
-  br i1 undef, label %for.end, label %for.body
+  br i1 %tst, label %for.end, label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %result.09 = phi i32 [ %add2.result.0, %for.body ], [ 1, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
   %and = and i32 %i.08, 1
   %cmp1 = icmp eq i32 %and, 0
-  %add2.result.0 = select i1 %cmp1, i32 undef, i32 %result.09
+  %add2.result.0 = select i1 %cmp1, i32 %true, i32 %result.09
   %inc = add nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %i.08, undef
+  %cmp = icmp slt i32 %i.08, %true
   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
 
 for.cond.for.end_crit_edge:                       ; preds = %for.body
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
index ed9b569..f36e706 100644
--- a/test/CodeGen/AArch64/arm64-arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -260,3 +260,11 @@ define i64 @f3(i64 %a) nounwind readnone ssp {
   %res = mul nsw i64 %a, 17
   ret i64 %res
 }
+
+define i32 @f4(i32 %a) nounwind readnone ssp {
+; CHECK-LABEL: f4:
+; CHECK-NEXT: add w0, w0, w0, lsl #1
+; CHECK-NEXT: ret
+  %res = mul i32 %a, 3
+  ret i32 %res
+}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 3b43aa1..3377849 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -13,7 +13,8 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
 ; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 ; CHECK: [[DONE]]:
-  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+  %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+  %val = extractvalue { i128, i1 } %pair, 0
   ret i128 %val
 }
 
@@ -21,8 +22,10 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
 ; CHECK-LABEL: fetch_and_nand:
 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK-DAG: bic    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK-DAG: bic    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK-DAG: and    [[TMP_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK-DAG: and    [[TMP_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK-DAG: mvn    [[SCRATCH_REGLO:x[0-9]+]], [[TMP_REGLO]]
+; CHECK-DAG: mvn    [[SCRATCH_REGHI:x[0-9]+]], [[TMP_REGHI]]
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index aa9b284..b56f91d 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -10,7 +10,8 @@ define i32 @val_compare_and_swap(i32* %p) {
 ; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
 ; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK: [[LABEL2]]:
-  %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+  %pair = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -25,7 +26,8 @@ define i64 @val_compare_and_swap_64(i64* %p) {
 ; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
 ; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 ; CHECK: [[LABEL2]]:
-  %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+  %pair = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+  %val = extractvalue { i64, i1 } %pair, 0
   ret i64 %val
 }
 
@@ -33,7 +35,8 @@ define i32 @fetch_and_nand(i32* %p) {
 ; CHECK-LABEL: fetch_and_nand:
 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: and    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
+; CHECK: mvn    [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
+; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
 ; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
 ; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
@@ -46,8 +49,9 @@ define i64 @fetch_and_nand_64(i64* %p) {
 ; CHECK-LABEL: fetch_and_nand_64:
 ; CHECK: mov    x[[ADDR:[0-9]+]], x0
 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
-; CHECK: and    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
+; CHECK: ldaxr   x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
+; CHECK: mvn    w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
+; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
 ; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
 ; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 
diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
index c109263..d0f6db0 100644
--- a/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -33,3 +33,27 @@ define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
   %4 = insertelement <4 x float> %3, float %d, i32 3
   ret <4 x float> %4
 }
+
+define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
+; CHECK-LABEL: build_all_zero:
+; CHECK: movz	w[[GREG:[0-9]+]], #0xae80
+; CHECK-NEXT:	fmov	s[[FREG:[0-9]+]], w[[GREG]]
+; CHECK-NEXT:	mul.8h	v0, v0, v[[FREG]]
+  %b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
+  %c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
+  ret <8 x i16> %c
+}
+
+; There is an optimization in DAG Combiner as following:
+;   fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+;        -> (BUILD_VECTOR A, B, ..., C, D, ...)
+; This case checks when A,B and C,D are different types, there should be no
+; assertion failure.
+define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
+; CHECK-LABEL: concat_2_build_vector:
+; CHECK: movi
+  %vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>
+  %vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
+  %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
deleted file mode 100644
index d862b1e..0000000
--- a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-; CHECK: fptosi_1
-; CHECK: fcvtzs.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptosi_1() nounwind noinline ssp {
-entry:
-  %0 = fptosi <2 x double> undef to <2 x i32>
-  store <2 x i32> %0, <2 x i32>* undef, align 8
-  ret void
-}
-
-; CHECK: fptoui_1
-; CHECK: fcvtzu.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptoui_1() nounwind noinline ssp {
-entry:
-  %0 = fptoui <2 x double> undef to <2 x i32>
-  store <2 x i32> %0, <2 x i32>* undef, align 8
-  ret void
-}
-
diff --git a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
deleted file mode 100644
index daaf1e0..0000000
--- a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f1:
-; CHECK: sshll.2d v0, v0, #0
-; CHECK-NEXT: scvtf.2d v0, v0
-; CHECK-NEXT: ret
-  %conv = sitofp <2 x i32> %v to <2 x double>
-  ret <2 x double> %conv
-}
-define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f2:
-; CHECK: ushll.2d v0, v0, #0
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: ret
-  %conv = uitofp <2 x i32> %v to <2 x double>
-  ret <2 x double> %conv
-}
-
-; CHECK: autogen_SD19655
-; CHECK: scvtf
-; CHECK: ret
-define void @autogen_SD19655() {
-  %T = load <2 x i64>* undef
-  %F = sitofp <2 x i64> undef to <2 x float>
-  store <2 x float> %F, <2 x float>* undef
-  ret void
-}
-
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 0000000..7123e5e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+  %tmp1 = load <4 x double>* %ptr
+  %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG:  fcvtzs  v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG:  xtn  v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG:  xtn  v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG:  xtn  v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG:  xtn  v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+  %tmp1 = load <8 x double>* %ptr
+  %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+  ret <8 x i8> %tmp2
+}
+
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index bb14c89..5d62cfe 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; rdar://12462006
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
index 2cf0135..6eed48b 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -mcpu=cyclone < %s | FileCheck %s
 
+; r208640 broke ppc64/Linux self-hosting; xfailing while this is worked on.
+; XFAIL: *
+
 target datalayout = "e-i64:64-n32:64-S128"
 target triple = "arm64-apple-ios"
 
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
index 2e4b658..ce132c6 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
@@ -13,12 +13,12 @@ target triple = "arm64-apple-ios"
 
 ; CHECK-LABEL: XX:
 ; CHECK: ldr
-define void @XX(%class.A* %K) {
+define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) {
 entry:
-  br i1 undef, label %if.then, label %lor.rhs.i
+  br i1 %tst, label %if.then, label %lor.rhs.i
 
 lor.rhs.i:                                        ; preds = %entry
-  %tmp = load i32* undef, align 4
+  %tmp = load i32* %addr, align 4
   %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
   %tmp1 = load i64* %y.i.i.i, align 8
   %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
@@ -30,17 +30,17 @@ lor.rhs.i:                                        ; preds = %entry
   %add16.i = add nsw i32 %add12.i, %div15.i
   %rem.i.i = srem i32 %add16.i, %tmp
   %idxprom = sext i32 %rem.i.i to i64
-  %arrayidx = getelementptr inbounds %class.C** undef, i64 %idxprom
-  %tobool533 = icmp eq %class.C* undef, null
+  %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom
+  %tobool533 = icmp eq %class.C* %pC, null
   br i1 %tobool533, label %while.end, label %while.body
 
 if.then:                                          ; preds = %entry
-  unreachable
+  ret i32 42
 
 while.body:                                       ; preds = %lor.rhs.i
-  unreachable
+  ret i32 5
 
 while.end:                                        ; preds = %lor.rhs.i
   %tmp3 = load %class.C** %arrayidx, align 8
-  unreachable
+  ret i32 50
 }
diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 17d783a..44150c2 100644
--- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
+; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 target triple = "arm64-apple-macosx"
 
 ; CHECK: mm2
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
index a3d5f6c..1152988 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -133,3 +133,16 @@ define void @t8() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
   ret void
 }
+
+define void @test_distant_memcpy(i8* %dst) {
+; ARM64-LABEL: test_distant_memcpy:
+; ARM64: mov [[ARRAY:x[0-9]+]], sp
+; ARM64: movz [[OFFSET:x[0-9]+]], #0x1f40
+; ARM64: add x[[ADDR:[0-9]+]], [[ARRAY]], [[OFFSET]]
+; ARM64: ldrb [[BYTE:w[0-9]+]], [x[[ADDR]]]
+; ARM64: strb [[BYTE]], [x0]
+  %array = alloca i8, i32 8192
+  %elem = getelementptr i8* %array, i32 8000
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index 57bbb93..b1d5010 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
 
 @lhs = global fp128 zeroinitializer, align 16
 @rhs = global fp128 zeroinitializer, align 16
diff --git a/test/CodeGen/AArch64/arm64-frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll
index 4a91ff3..321f335 100644
--- a/test/CodeGen/AArch64/arm64-frame-index.ll
+++ b/test/CodeGen/AArch64/arm64-frame-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
 ; rdar://11935841
 
 define void @t1() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index f88bd6a..bc7ed7f 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -122,3 +122,82 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
 }
 
 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
+
+; Regression Test for PR20057.
+;
+; Cortex-A53 machine model stalls on A53UnitFPMDS contention. Instructions that
+; are otherwise ready are jammed in the pending queue.
+; CHECK: ********** MI Scheduling **********
+; CHECK: testResourceConflict
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: BRK
+; CHECK: ********** INTERVALS **********
+define void @testResourceConflict(float* %ptr) {
+entry:
+  %add1 = fadd float undef, undef
+  %mul2 = fmul float undef, undef
+  %add3 = fadd float %mul2, undef
+  %mul4 = fmul float undef, %add3
+  %add5 = fadd float %mul4, undef
+  %sub6 = fsub float 0.000000e+00, undef
+  %sub7 = fsub float %add5, undef
+  %div8 = fdiv float 1.000000e+00, undef
+  %mul9 = fmul float %div8, %sub7
+  %mul14 = fmul float %sub6, %div8
+  %mul10 = fsub float -0.000000e+00, %mul14
+  %mul15 = fmul float undef, %div8
+  %mul11 = fsub float -0.000000e+00, %mul15
+  %mul12 = fmul float 0.000000e+00, %div8
+  %mul13 = fmul float %add1, %mul9
+  %mul21 = fmul float %add5, %mul11
+  %add22 = fadd float %mul13, %mul21
+  store float %add22, float* %ptr, align 4
+  %mul28 = fmul float %add1, %mul10
+  %mul33 = fmul float %add5, %mul12
+  %add34 = fadd float %mul33, %mul28
+  store float %add34, float* %ptr, align 4
+  %mul240 = fmul float undef, %mul9
+  %add246 = fadd float %mul240, undef
+  store float %add246, float* %ptr, align 4
+  %mul52 = fmul float undef, %mul10
+  %mul57 = fmul float undef, %mul12
+  %add58 = fadd float %mul57, %mul52
+  store float %add58, float* %ptr, align 4
+  %mul27 = fmul float 0.000000e+00, %mul9
+  %mul81 = fmul float undef, %mul10
+  %add82 = fadd float %mul27, %mul81
+  store float %add82, float* %ptr, align 4
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap()
+
+; Regression test for PR20057: "permanent hazard"'
+; Resource contention on LDST.
+; CHECK: ********** MI Scheduling **********
+; CHECK: testLdStConflict
+; CHECK: *** Final schedule for BB#1 ***
+; CHECK: LD4Fourv2d
+; CHECK: STRQui
+; CHECK: ********** INTERVALS **********
+define void @testLdStConflict() {
+entry:
+  br label %loop
+
+loop:
+  %0 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8* null)
+  %ptr = bitcast i8* undef to <2 x i64>*
+  store <2 x i64> zeroinitializer, <2 x i64>* %ptr, align 4
+  %ptr1 = bitcast i8* undef to <2 x i64>*
+  store <2 x i64> zeroinitializer, <2 x i64>* %ptr1, align 4
+  %ptr2 = bitcast i8* undef to <2 x i64>*
+  store <2 x i64> zeroinitializer, <2 x i64>* %ptr2, align 4
+  %ptr3 = bitcast i8* undef to <2 x i64>*
+  store <2 x i64> zeroinitializer, <2 x i64>* %ptr3, align 4
+  %ptr4 = bitcast i8* undef to <2 x i64>*
+  store <2 x i64> zeroinitializer, <2 x i64>* %ptr4, align 4
+  br label %loop
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8*)
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
new file mode 100644
index 0000000..238474a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
@@ -0,0 +1,112 @@
+; REQUIRES: asserts
+;
+; The Cortext-A57 machine model will avoid scheduling load instructions in
+; succession because loads on the A57 have a latency of 4 cycles and they all
+; issue to the same pipeline. Instead, it will move other instructions between
+; the loads to avoid unnecessary stalls. The generic machine model schedules 4
+; loads consecutively for this case and will cause stalls.
+;
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; CHECK: ********** MI Scheduling **********
+; CHECK: main:BB#2
+; CHECK LDR
+; CHECK Latency : 4
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: LDR
+; CHECK: LDR
+; CHECK-NOT: LDR
+; CHECK: {{.*}}
+; CHECK: ********** MI Scheduling **********
+
+@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
+@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca [8 x i32], align 4
+  %y = alloca [8 x i32], align 4
+  %i = alloca i32, align 4
+  %xx = alloca i32, align 4
+  %yy = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast [8 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
+  %1 = bitcast [8 x i32]* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
+  store i32 0, i32* %xx, align 4
+  store i32 0, i32* %yy, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32* %i, align 4
+  %cmp = icmp slt i32 %2, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32* %yy, align 4
+  %4 = load i32* %i, align 4
+  %idxprom = sext i32 %4 to i64
+  %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
+  %5 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %5, 1
+  store i32 %add, i32* %xx, align 4
+  %6 = load i32* %xx, align 4
+  %add1 = add nsw i32 %6, 12
+  store i32 %add1, i32* %xx, align 4
+  %7 = load i32* %xx, align 4
+  %add2 = add nsw i32 %7, 23
+  store i32 %add2, i32* %xx, align 4
+  %8 = load i32* %xx, align 4
+  %add3 = add nsw i32 %8, 34
+  store i32 %add3, i32* %xx, align 4
+  %9 = load i32* %i, align 4
+  %idxprom4 = sext i32 %9 to i64
+  %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
+  %10 = load i32* %arrayidx5, align 4
+
+  %add4 = add nsw i32 %9, %add
+  %add5 = add nsw i32 %10, %add1
+  %add6 = add nsw i32 %add4, %add5
+
+  %add7 = add nsw i32 %9, %add3
+  %add8 = add nsw i32 %10, %add4
+  %add9 = add nsw i32 %add7, %add8
+
+  %add10 = add nsw i32 %9, %add6
+  %add11 = add nsw i32 %10, %add7
+  %add12 = add nsw i32 %add10, %add11
+
+  %add13 = add nsw i32 %9, %add9
+  %add14 = add nsw i32 %10, %add10
+  %add15 = add nsw i32 %add13, %add14
+
+  store i32 %add15, i32* %xx, align 4
+
+  %div = sdiv i32 %4, %5
+
+  store i32 %div, i32* %yy, align 4
+
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %11 = load i32* %i, align 4
+  %inc = add nsw i32 %11, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %12 = load i32* %xx, align 4
+  %13 = load i32* %yy, align 4
+  %add67 = add nsw i32 %12, %13
+  ret i32 %add67
+}
+
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
index 97bfb5c..07373cc 100644
--- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
@@ -6,9 +6,10 @@
 ;
 ; CHECK: ********** MI Scheduling **********
 ; CHECK: shiftable
-; CHECK: *** Final schedule for BB#0 ***
-; CHECK: ADDXrr %vreg0, %vreg2
-; CHECK: ADDXrs %vreg0, %vreg2, 5
+; CHECK: SU(2):   %vreg2<def> = SUBXri %vreg1, 20, 0
+; CHECK:   Successors:
+; CHECK-NEXT:    val SU(4): Latency=1 Reg=%vreg2
+; CHECK-NEXT:    val SU(3): Latency=2 Reg=%vreg2
 ; CHECK: ********** INTERVALS **********
 define i64 @shiftable(i64 %A, i64 %B) {
         %tmp0 = sub i64 %B, 20
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index cfc2ebf..1cfba82 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
 
 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
   %b = extractelement <1 x i8> %a, i32 0
   %c = insertelement <8 x i8> undef, i8 %b, i32 0
   %d = insertelement <8 x i8> %c, i8 %b, i32 1
@@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 
 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 ; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
   %b = extractelement <1 x i16> %a, i32 0
   %c = insertelement <8 x i16> undef, i16 %b, i32 0
   %d = insertelement <8 x i16> %c, i16 %b, i32 1
@@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 
 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 ; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
   %b = extractelement <1 x i32> %a, i32 0
   %c = insertelement <4 x i32> undef, i32 %b, i32 0
   %d = insertelement <4 x i32> %c, i32 %b, i32 1
@@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() {
 
 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+; CHECK: dup v0.4h, v0.h[0]
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
  ret <4 x i16> %r
 }
 
 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 ; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
  ret <4 x i32> %r
 }
 
 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
  ret <8 x i8> %r
 }
 
 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
  ret <8 x i16> %r
 }
 
 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+; CHECK: dup v0.16b, v0.b[0]
  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
  ret <16 x i8> %r
 }
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index 255b90d..95c582a 100644
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )
 
 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
   %cmp31 = fcmp oeq float %a, %b
   %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
   ret <1 x float> %e
diff --git a/test/CodeGen/AArch64/arm64-shrink-v1i64.ll b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
new file mode 100644
index 0000000..f31a570
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=arm64 < %s
+
+; The DAGCombiner tries to do following shrink:
+;     Convert x+y to (VT)((SmallVT)x+(SmallVT)y)
+; But currently it can't handle vector type and will trigger an assertion failure
+; when it tries to generate an add mixed using vector type and scaler type.
+; This test checks that such assertion failur should not happen.
+define <1 x i64> @dotest(<1 x i64> %in0) {
+entry:
+  %0 = add <1 x i64> %in0, %in0
+  %vshl_n = shl <1 x i64> %0, <i64 32>
+  %vsra_n = ashr <1 x i64> %vshl_n, <i64 32>
+  ret <1 x i64> %vsra_n
+}
diff --git a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
index 3949b85..3949b85 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
+++ b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
index 8c9e4e9..6570f0e 100644
--- a/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -665,19 +665,19 @@ define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
 ;CHECK-LABEL: autogen_SD28458:
 ;CHECK: fcvt
 ;CHECK: ret
-define void @autogen_SD28458() {
-  %Tr53 = fptrunc <8 x double> undef to <8 x float>
-  store <8 x float> %Tr53, <8 x float>* undef
+define void @autogen_SD28458(<8 x double> %val.f64, <8 x float>* %addr.f32) {
+  %Tr53 = fptrunc <8 x double> %val.f64 to <8 x float>
+  store <8 x float> %Tr53, <8 x float>* %addr.f32
   ret void
 }
 
 ;CHECK-LABEL: autogen_SD19225:
 ;CHECK: fcvt
 ;CHECK: ret
-define void @autogen_SD19225() {
-  %A = load <8 x float>* undef
+define void @autogen_SD19225(<8 x double>* %addr.f64, <8 x float>* %addr.f32) {
+  %A = load <8 x float>* %addr.f32
   %Tr53 = fpext <8 x float> %A to <8 x double>
-  store <8 x double> %Tr53, <8 x double>* undef
+  store <8 x double> %Tr53, <8 x double>* %addr.f64
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
index 82ae486..65bd50c 100644
--- a/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1313,6 +1313,15 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
         ret <8 x i8> %tmp3
 }
 
+define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: uqshli8b_1:
+;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
+;CHECK: uqshl.8b v0, v0, [[REG]]
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+        ret <8 x i8> %tmp3
+}
+
 define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshli4h:
 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index 6cffbde..0c300de 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 
 ;
 ; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 58b5d1d..26301b9 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -878,7 +878,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %old = extractvalue { i8, i1 } %pair, 0
+
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
@@ -889,8 +891,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
-  ; As above, w1 is a reasonable guess.
-; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
@@ -900,7 +901,9 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
-   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+   %old = extractvalue { i16, i1 } %pair, 0
+
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
@@ -911,8 +914,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
-  ; As above, w1 is a reasonable guess.
-; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
@@ -922,7 +924,9 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   %old = extractvalue { i32, i1 } %pair, 0
+
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
@@ -933,8 +937,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
-  ; As above, w1 is a reasonable guess.
-; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
@@ -944,7 +947,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+   %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+   %old = extractvalue { i64, i1 } %pair, 0
+
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 1eec4cc..3a5dbdc 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 @addr = global i8* null
 
diff --git a/test/CodeGen/AArch64/branch-relax-asm.ll b/test/CodeGen/AArch64/branch-relax-asm.ll
new file mode 100644
index 0000000..7409c84
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-asm.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s
+define i32 @test_asm_length(i32 %in) {
+; CHECK-LABEL: test_asm_length:
+
+  ; It would be more natural to use just one "tbnz %false" here, but if the
+  ; number of instructions in the asm is counted reasonably, that block is out
+  ; of the limited range we gave tbz. So branch relaxation has to invert the
+  ; condition.
+; CHECK:     tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]]
+; CHECK:     b [[FALSE:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[TRUE]]:
+; CHECK:     orr w0, wzr, #0x4
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     ret
+
+; CHECK: [[FALSE]]:
+; CHECK:     ret
+
+  %val = and i32 %in, 1
+  %tst = icmp eq i32 %val, 0
+  br i1 %tst, label %true, label %false
+
+true:
+  call void asm sideeffect "nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop", ""()
+  ret i32 4
+
+false:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 591f483..9524044 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 
 @stored_label = global i8* null
 
diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll
new file mode 100644
index 0000000..0c008c2
--- /dev/null
+++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
+
+define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_return:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: orr w0, wzr, #0x1
+; CHECK: ret
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: mov w0, wzr
+; CHECK: ret
+
+  %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  %conv = zext i1 %success to i32
+  ret i32 %conv
+}
+
+define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
+; CHECK-LABEL: test_return_bool:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxrb [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1, uxtb
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+  ; FIXME: DAG combine should be able to deal with this.
+; CHECK: orr [[TMP:w[0-9]+]], wzr, #0x1
+; CHECK: eor w0, [[TMP]], #0x1
+; CHECK: ret
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: mov [[TMP:w[0-9]+]], wzr
+; CHECK: eor w0, [[TMP]], #0x1
+; CHECK: ret
+
+  %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
+  %success = extractvalue { i8, i1 } %pair, 1
+  %failure = xor i1 %success, 1
+  ret i1 %failure
+}
+
+define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_conditional:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: b _bar
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: b _baz
+
+  %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  br i1 %success, label %true, label %false
+
+true:
+  tail call void @bar() #2
+  br label %end
+
+false:
+  tail call void @baz() #2
+  br label %end
+
+end:
+  ret void
+}
+
+declare void @bar()
+declare void @baz()
diff --git a/test/CodeGen/AArch64/compiler-ident.ll b/test/CodeGen/AArch64/compiler-ident.ll
new file mode 100644
index 0000000..0350571
--- /dev/null
+++ b/test/CodeGen/AArch64/compiler-ident.ll
@@ -0,0 +1,12 @@
+; RUN: llc -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; ModuleID = 'compiler-ident.c'
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK: .ident  "some LLVM version"
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"some LLVM version"}
+
diff --git a/test/CodeGen/AArch64/complex-fp-to-int.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll
new file mode 100644
index 0000000..13cf762
--- /dev/null
+++ b/test/CodeGen/AArch64/complex-fp-to-int.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x i64> @test_v2f32_to_signed_v2i64(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i64:
+; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s
+; CHECK: fcvtzs.2d v0, [[VAL64]]
+
+  %val = fptosi <2 x float> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <2 x i64> @test_v2f32_to_unsigned_v2i64(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i64:
+; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s
+; CHECK: fcvtzu.2d v0, [[VAL64]]
+
+  %val = fptoui <2 x float> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <2 x i16> @test_v2f32_to_signed_v2i16(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i16:
+; CHECK: fcvtzs.2s v0, v0
+
+  %val = fptosi <2 x float> %in to <2 x i16>
+  ret <2 x i16> %val
+}
+
+define <2 x i16> @test_v2f32_to_unsigned_v2i16(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i16:
+; CHECK: fcvtzs.2s v0, v0
+
+  %val = fptoui <2 x float> %in to <2 x i16>
+  ret <2 x i16> %val
+}
+
+define <2 x i8> @test_v2f32_to_signed_v2i8(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i8:
+; CHECK: fcvtzs.2s v0, v0
+
+  %val = fptosi <2 x float> %in to <2 x i8>
+  ret <2 x i8> %val
+}
+
+define <2 x i8> @test_v2f32_to_unsigned_v2i8(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i8:
+; CHECK: fcvtzs.2s v0, v0
+
+  %val = fptoui <2 x float> %in to <2 x i8>
+  ret <2 x i8> %val
+}
+
+define <4 x i16> @test_v4f32_to_signed_v4i16(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_signed_v4i16:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+  %val = fptosi <4 x float> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <4 x i16> @test_v4f32_to_unsigned_v4i16(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_unsigned_v4i16:
+; CHECK: fcvtzu.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+  %val = fptoui <4 x float> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <4 x i8> @test_v4f32_to_signed_v4i8(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_signed_v4i8:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+  %val = fptosi <4 x float> %in to <4 x i8>
+  ret <4 x i8> %val
+}
+
+define <4 x i8> @test_v4f32_to_unsigned_v4i8(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_unsigned_v4i8:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+  %val = fptoui <4 x float> %in to <4 x i8>
+  ret <4 x i8> %val
+}
+
+define <2 x i32> @test_v2f64_to_signed_v2i32(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i32:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptosi <2 x double> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x i32> @test_v2f64_to_unsigned_v2i32(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i32:
+; CHECK: fcvtzu.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptoui <2 x double> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x i16> @test_v2f64_to_signed_v2i16(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i16:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptosi <2 x double> %in to <2 x i16>
+  ret <2 x i16> %val
+}
+
+define <2 x i16> @test_v2f64_to_unsigned_v2i16(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i16:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptoui <2 x double> %in to <2 x i16>
+  ret <2 x i16> %val
+}
+
+define <2 x i8> @test_v2f64_to_signed_v2i8(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i8:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptosi <2 x double> %in to <2 x i8>
+  ret <2 x i8> %val
+}
+
+define <2 x i8> @test_v2f64_to_unsigned_v2i8(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i8:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+  %val = fptoui <2 x double> %in to <2 x i8>
+  ret <2 x i8> %val
+}
diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll
new file mode 100644
index 0000000..5c943f9
--- /dev/null
+++ b/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; CHECK: autogen_SD19655
+; CHECK: scvtf
+; CHECK: ret
+define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
+  %T = load <2 x i64>* %addr
+  %F = sitofp <2 x i64> %T to <2 x float>
+  store <2 x float> %F, <2 x float>* %addrfloat
+  ret void
+}
+
+define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i32_to_v2f64:
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0
+; CHECK-NEXT: scvtf.2d v0, [[VAL64]]
+; CHECK-NEXT: ret
+  %conv = sitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %conv
+}
+
+define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i32_to_v2f64
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0
+; CHECK-NEXT: ucvtf.2d v0, [[VAL64]]
+; CHECK-NEXT: ret
+  %conv = uitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %conv
+}
+
+define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i16_to_v2f64:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: scvtf.2d v0, [[VAL64]]
+
+  %conv = sitofp <2 x i16> %v to <2 x double>
+  ret <2 x double> %conv
+}
+define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i16_to_v2f64
+; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: ucvtf.2d v0, [[VAL64]]
+
+  %conv = uitofp <2 x i16> %v to <2 x double>
+  ret <2 x double> %conv
+}
+
+define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i8_to_v2f64:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: scvtf.2d v0, [[VAL64]]
+
+  %conv = sitofp <2 x i8> %v to <2 x double>
+  ret <2 x double> %conv
+}
+define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i8_to_v2f64
+; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: ucvtf.2d v0, [[VAL64]]
+
+  %conv = uitofp <2 x i8> %v to <2 x double>
+  ret <2 x double> %conv
+}
+
+define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i64_to_v2f32:
+; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0
+; CHECK: fcvtn v0.2s, [[VAL64]].2d
+
+  %conv = sitofp <2 x i64> %v to <2 x float>
+  ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i64_to_v2f32
+; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0
+; CHECK: fcvtn v0.2s, [[VAL64]].2d
+
+  %conv = uitofp <2 x i64> %v to <2 x float>
+  ret <2 x float> %conv
+}
+
+define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i16_to_v2f32:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
+; CHECK: scvtf.2s v0, [[VAL32]]
+
+  %conv = sitofp <2 x i16> %v to <2 x float>
+  ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i16_to_v2f32
+; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ucvtf.2s v0, [[VAL32]]
+
+  %conv = uitofp <2 x i16> %v to <2 x float>
+  ret <2 x float> %conv
+}
+
+define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i8_to_v2f32:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
+; CHECK: scvtf.2s v0, [[VAL32]]
+
+  %conv = sitofp <2 x i8> %v to <2 x float>
+  ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i8_to_v2f32
+; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ucvtf.2s v0, [[VAL32]]
+
+  %conv = uitofp <2 x i8> %v to <2 x float>
+  ret <2 x float> %conv
+}
+
+define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v4i16_to_v4f32:
+; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: scvtf.4s v0, [[VAL32]]
+
+  %conv = sitofp <4 x i16> %v to <4 x float>
+  ret <4 x float> %conv
+}
+
+define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v4i16_to_v4f32
+; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: ucvtf.4s v0, [[VAL32]]
+
+  %conv = uitofp <4 x i16> %v to <4 x float>
+  ret <4 x float> %conv
+}
+
+define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v4i8_to_v4f32:
+; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8
+; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8
+; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0
+; CHECK: scvtf.4s v0, [[VAL32]]
+
+  %conv = sitofp <4 x i8> %v to <4 x float>
+  ret <4 x float> %conv
+}
+define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v4i8_to_v4f32
+; CHECK: bic.4h v0, #0xff, lsl #8
+; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: ucvtf.4s v0, [[VAL32]]
+
+  %conv = uitofp <4 x i8> %v to <4 x float>
+  ret <4 x float> %conv
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index 1b51928..fbea4a6 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll
new file mode 100644
index 0000000..6fabdc5
--- /dev/null
+++ b/test/CodeGen/AArch64/f16-convert.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s
+
+define float @load0(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load0:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %tmp = load i16* %a, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  ret float %tmp1
+}
+
+define double @load1(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load1:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %tmp = load i16* %a, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  %conv = fpext float %tmp1 to double
+  ret double %conv
+}
+
+define float @load2(i16* nocapture readonly %a, i32 %i) nounwind {
+; CHECK-LABEL: load2:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  ret float %tmp1
+}
+
+define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
+; CHECK-LABEL: load3:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  %conv = fpext float %tmp1 to double
+  ret double %conv
+}
+
+define float @load4(i16* nocapture readonly %a, i64 %i) nounwind {
+; CHECK-LABEL: load4:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 %i
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  ret float %tmp1
+}
+
+define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
+; CHECK-LABEL: load5:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 %i
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  %conv = fpext float %tmp1 to double
+  ret double %conv
+}
+
+define float @load6(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load6:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 10
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  ret float %tmp1
+}
+
+define double @load7(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load7:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 10
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  %conv = fpext float %tmp1 to double
+  ret double %conv
+}
+
+define float @load8(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load8:
+; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 -10
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  ret float %tmp1
+}
+
+define double @load9(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load9:
+; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+  %arrayidx = getelementptr inbounds i16* %a, i64 -10
+  %tmp = load i16* %arrayidx, align 2
+  %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+  %conv = fpext float %tmp1 to double
+  ret double %conv
+}
+
+define void @store0(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store0:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str  h0, [x0]
+; CHECK-NEXT: ret
+
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+  store i16 %tmp, i16* %a, align 2
+  ret void
+}
+
+define void @store1(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store1:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str  h0, [x0]
+; CHECK-NEXT: ret
+
+  %conv = fptrunc double %val to float
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+  store i16 %tmp, i16* %a, align 2
+  ret void
+}
+
+define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
+; CHECK-LABEL: store2:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
+; CHECK-LABEL: store3:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+  %conv = fptrunc double %val to float
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
+; CHECK-LABEL: store4:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+  %arrayidx = getelementptr inbounds i16* %a, i64 %i
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
+; CHECK-LABEL: store5:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+  %conv = fptrunc double %val to float
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+  %arrayidx = getelementptr inbounds i16* %a, i64 %i
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store6(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store6:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+  %arrayidx = getelementptr inbounds i16* %a, i64 10
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store7(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store7:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+  %conv = fptrunc double %val to float
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+  %arrayidx = getelementptr inbounds i16* %a, i64 10
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store8(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store8:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+  %arrayidx = getelementptr inbounds i16* %a, i64 -10
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+define void @store9(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store9:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+  %conv = fptrunc double %val to float
+  %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+  %arrayidx = getelementptr inbounds i16* %a, i64 -10
+  store i16 %tmp, i16* %arrayidx, align 2
+  ret void
+}
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll
new file mode 100644
index 0000000..d02c67f
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-mul.ll
@@ -0,0 +1,40 @@
+; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64 -o - %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_mul8(i8 %lhs, i8 %rhs) {
+; CHECK-LABEL: test_mul8:
+; CHECK: mul w0, w0, w1
+;  %lhs = load i8* @var8
+;  %rhs = load i8* @var8
+  %prod = mul i8 %lhs, %rhs
+  store i8 %prod, i8* @var8
+  ret void
+}
+
+define void @test_mul16(i16 %lhs, i16 %rhs) {
+; CHECK-LABEL: test_mul16:
+; CHECK: mul w0, w0, w1
+  %prod = mul i16 %lhs, %rhs
+  store i16 %prod, i16* @var16
+  ret void
+}
+
+define void @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: test_mul32:
+; CHECK: mul w0, w0, w1
+  %prod = mul i32 %lhs, %rhs
+  store i32 %prod, i32* @var32
+  ret void
+}
+
+define void @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK-LABEL: test_mul64:
+; CHECK: mul x0, x0, x1
+  %prod = mul i64 %lhs, %rhs
+  store i64 %prod, i64* @var64
+  ret void
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index c9b0b9f..77bbcdd 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
 
 ; LLVM should be able to cope with multiple uses of the same flag-setting
 ; instruction at different points of a routine. Either by rematerializing the
diff --git a/test/CodeGen/AArch64/funcptr_cast.ll b/test/CodeGen/AArch64/funcptr_cast.ll
new file mode 100644
index 0000000..a00b7bc
--- /dev/null
+++ b/test/CodeGen/AArch64/funcptr_cast.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i8 @test() {
+; CHECK-LABEL: @test
+; CHECK: adrp {{x[0-9]+}}, foo
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:foo
+; CHECK: ldrb w0, [{{x[0-9]+}}]
+entry:
+  %0 = load i8* bitcast (void (...)* @foo to i8*), align 1
+  ret i8 %0
+}
+
+declare void @foo(...)
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
new file mode 100644
index 0000000..68aba5e
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals@PAGE
+;CHECK-APPLE-IOS-NOT: adrp
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals@PAGEOFF
+  store i32 %a1, i32* @m, align 4
+  store i32 %a2, i32* @n, align 4
+  ret void
+}
+
+;CHECK:	.type	_MergedGlobals,@object  // @_MergedGlobals
+;CHECK:	.local	_MergedGlobals
+;CHECK:	.comm	_MergedGlobals,8,8
+
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
new file mode 100644
index 0000000..a773566
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -0,0 +1,51 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-LABEL: _f1:
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS-NOT: adrp
+  store i32 %a1, i32* @x, align 4
+  store i32 %a2, i32* @y, align 4
+  ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-LABEL: _g1:
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS-NOT: adrp
+  store i32 %a1, i32* @y, align 4
+  store i32 %a2, i32* @z, align 4
+  ret void
+}
+
+;CHECK:	.type	_MergedGlobals_x,@object // @_MergedGlobals_x
+;CHECK:	.globl	_MergedGlobals_x
+;CHECK:	.align	3
+;CHECK: _MergedGlobals_x:
+;CHECK:	.size	_MergedGlobals_x, 12
+
+;CHECK:	.globl	x
+;CHECK: x = _MergedGlobals_x
+;CHECK:	.globl	y
+;CHECK: y = _MergedGlobals_x+4
+;CHECK:	.globl	z
+;CHECK: z = _MergedGlobals_x+8
+
+;CHECK-APPLE-IOS: .globl	__MergedGlobals_x       ; @_MergedGlobals_x
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3
+
+;CHECK-APPLE-IOS: .globl	_x
+;CHECK-APPLE-IOS: _x = __MergedGlobals_x
+;CHECK-APPLE-IOS: .globl	_y
+;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: .globl	_z
+;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8
+;CHECK-APPLE-IOS: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
new file mode 100644
index 0000000..d455d40
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -0,0 +1,51 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@x = global [1000 x i32] zeroinitializer, align 1
+@y = global [1000 x i32] zeroinitializer, align 1
+@z = internal global i32 1, align 4
+
+define void @f1(i32 %a1, i32 %a2, i32 %a3) {
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS-NOT: adrp
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x9, __MergedGlobals_y@PAGE
+;CHECK-APPLE-IOS: add	x9, x9, __MergedGlobals_y@PAGEOFF
+  %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3
+  %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3
+  store i32 %a1, i32* %x3, align 4
+  store i32 %a2, i32* %y3, align 4
+  store i32 %a3, i32* @z, align 4
+  ret void
+}
+
+;CHECK:	.type	_MergedGlobals_x,@object // @_MergedGlobals_x
+;CHECK: .globl	_MergedGlobals_x
+;CHECK: .align	4
+;CHECK: _MergedGlobals_x:
+;CHECK: .size	_MergedGlobals_x, 4004
+
+;CHECK: .type	_MergedGlobals_y,@object // @_MergedGlobals_y
+;CHECK: .globl	_MergedGlobals_y
+;CHECK: _MergedGlobals_y:
+;CHECK: .size	_MergedGlobals_y, 4000
+
+;CHECK-APPLE-IOS: .globl	__MergedGlobals_x       ; @_MergedGlobals_x
+;CHECK-APPLE-IOS: .align	4
+;CHECK-APPLE-IOS:  __MergedGlobals_x:
+;CHECK-APPLE-IOS: .long 1
+;CHECK-APPLE-IOS: .space	4000
+
+;CHECK-APPLE-IOS: .globl	__MergedGlobals_y       ; @_MergedGlobals_y
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4
+
+;CHECK:	.globl	x
+;CHECK: x = _MergedGlobals_x+4
+;CHECK:	.globl	y
+;CHECK: y = _MergedGlobals_y
+
+;CHECK-APPLE-IOS:.globl	_x
+;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4
+;CHECK-APPLE-IOS:.globl	_y
+;CHECK-APPLE-IOS: _y = __MergedGlobals_y
diff --git a/test/Transforms/GlobalMerge/AArch64/arm64.ll b/test/CodeGen/AArch64/global-merge-4.ll
index eea474a..a525ccd 100644
--- a/test/Transforms/GlobalMerge/AArch64/arm64.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -1,23 +1,4 @@
-; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
-
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
@@ -83,6 +64,10 @@ define internal i32* @returnFoo() #1 {
   ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
 }
 
+;CHECK:	.type	_MergedGlobals,@object  // @_MergedGlobals
+;CHECK:	.local	_MergedGlobals
+;CHECK:	.comm	_MergedGlobals,60,16
+
 attributes #0 = { nounwind ssp }
 attributes #1 = { nounwind readnone ssp }
 attributes #2 = { nounwind }
diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll
new file mode 100644
index 0000000..aed1dc4
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s
+
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+
+; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: f1:
+; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
+; CHECK-NOT: adrp
+
+; CHECK-APPLE-IOS-LABEL: f1:
+; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals
+; CHECK-APPLE-IOS-NOT: adrp
+  store i32 %a1, i32* @m, align 4
+  store i32 %a2, i32* @n, align 4
+  ret void
+}
+
+; CHECK:        .local _MergedGlobals
+; CHECK:        .comm  _MergedGlobals,8,8
+; NO-MERGE-NOT: .local _MergedGlobals
+
+; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3
+; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3
diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
new file mode 100644
index 0000000..1cffbf3
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s
+
+; Function Attrs: nounwind ssp
+define void @test1() {
+  %1 = sext i32 0 to i128
+  call void  @test2(i128 %1)
+  ret void
+
+; The i128 is 0 so the we can test to make sure it is propogated into the x
+; registers that make up the i128 pair
+
+; CHECK:  mov  x0, xzr
+; CHECK:  mov  x1, x0
+; CHECK:  bl  _test2
+
+}
+
+declare void @test2(i128)
diff --git a/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
new file mode 100644
index 0000000..645214a
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
@@ -0,0 +1,26 @@
+; We actually need to use -filetype=obj in this test because if we output
+; assembly, the current code path will bypass the parser and just write the
+; raw text out to the Streamer. We need to actually parse the inlineasm to
+; demonstrate the bug. Going the asm->obj route does not show the issue.
+; RUN: llc -mtriple=aarch64   < %s -filetype=obj | llvm-objdump -arch=aarch64 -d - | FileCheck %s
+
+; CHECK-LABEL: foo:
+; CHECK:       a0 79 95 d2 	 movz	x0, #0xabcd
+; CHECK:       c0 03 5f d6   ret
+define i32 @foo() nounwind {
+entry:
+  %0 = tail call i32 asm sideeffect "ldr $0,=0xabcd", "=r"() nounwind
+  ret i32 %0
+}
+; CHECK-LABEL: bar:
+; CHECK:        40 00 00 58                                      ldr    x0, #8
+; CHECK:        c0 03 5f d6                                      ret
+; Make sure the constant pool entry comes after the return
+; CHECK-LABEL:        $d.1:
+define i32 @bar() nounwind {
+entry:
+  %0 = tail call i32 asm sideeffect "ldr $0,=0x10001", "=r"() nounwind
+  ret i32 %0
+}
+
+
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 1dfb789..69fbd99 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index 1ce5c95..e4f4295 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
 
 ; This file contains tests for the AArch64 load/store optimizer.
 
@@ -166,6 +166,217 @@ bar:
 
 ; Check the following transform:
 ;
+; add x8, x8, #16
+;  ...
+; ldr X, [x8]
+;  ->
+; ldr X, [x8, #16]!
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+%pre.struct.i32 = type { i32, i32, i32}
+%pre.struct.i64 = type { i32, i64, i64}
+%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>}
+%pre.struct.float = type { i32, float, float}
+%pre.struct.double = type { i32, double, double}
+
+define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
+                                   %pre.struct.i32* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-word2
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i32** %this
+  %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load i32* %retptr
+  ret i32 %ret
+}
+
+define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
+                                         %pre.struct.i64* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-doubleword2
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i64** %this
+  %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load i64* %retptr
+  ret i64 %ret
+}
+
+define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
+                                             %pre.struct.i128* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-quadword2
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i128** %this
+  %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load <2 x i64>* %retptr
+  ret <2 x i64> %ret
+}
+
+define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
+                                      %pre.struct.float* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-float2
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.float** %this
+  %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load float* %retptr
+  ret float %ret
+}
+
+define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
+                                        %pre.struct.double* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-double2
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.double** %this
+  %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load double* %retptr
+  ret double %ret
+}
+
+; Check the following transform:
+;
+; add x8, x8, #16
+;  ...
+; str X, [x8]
+;  ->
+; str X, [x8, #16]!
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
+                                     %pre.struct.i32* %load2,
+                                     i32 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-word2
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i32** %this
+  %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store i32 %val, i32* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
+                                           %pre.struct.i64* %load2,
+                                           i64 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-doubleword2
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i64** %this
+  %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store i64 %val, i64* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
+                                         %pre.struct.i128* %load2,
+                                         <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-quadword2
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i128** %this
+  %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store <2 x i64> %val, <2 x i64>* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
+                                      %pre.struct.float* %load2,
+                                      float %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-float2
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.float** %this
+  %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store float %val, float* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
+                                      %pre.struct.double* %load2,
+                                      double %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-double2
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.double** %this
+  %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+  br label %return
+return:
+  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store double %val, double* %retptr
+  ret void
+}
+
+; Check the following transform:
+;
 ; ldr X, [x20]
 ;  ...
 ; add x20, x20, #32
@@ -294,8 +505,263 @@ exit:
   ret void
 }
 
+; Check the following transform:
+;
+; str X, [x20]
+;  ...
+; add x20, x20, #32
+;  ->
+; str X, [x20], #32
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-word
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+  %gep1 = getelementptr i32* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i32* %iv2, i64 -1
+  %load = load i32* %gep2
+  call void @use-word(i32 %load)
+  store i32 %val, i32* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i32* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-doubleword
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+  %gep1 = getelementptr i64* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i64* %iv2, i64 -1
+  %load = load i64* %gep2
+  call void @use-doubleword(i64 %load)
+  store i64 %val, i64* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i64* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-post-indexed-quadword
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64
+entry:
+  %gep1 = getelementptr <2 x i64>* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr <2 x i64>* %iv2, i64 -1
+  %load = load <2 x i64>* %gep2
+  call void @use-quadword(<2 x i64> %load)
+  store <2 x i64> %val, <2 x i64>* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr <2 x i64>* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind {
+; CHECK-LABEL: store-post-indexed-float
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+  %gep1 = getelementptr float* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr float* %iv2, i64 -1
+  %load = load float* %gep2
+  call void @use-float(float %load)
+  store float %val, float* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr float* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind {
+; CHECK-LABEL: store-post-indexed-double
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+  %gep1 = getelementptr double* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr double* %iv2, i64 -1
+  %load = load double* %gep2
+  call void @use-double(double %load)
+  store double %val, double* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr double* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
 declare void @use-word(i32)
 declare void @use-doubleword(i64)
 declare void @use-quadword(<2 x i64>)
 declare void @use-float(float)
 declare void @use-double(double)
+
+; Check the following transform:
+;
+; (ldr|str) X, [x20]
+;  ...
+; sub x20, x20, #16
+;  ->
+; (ldr|str) X, [x20], #-16
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-word
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
+  br label %for.body
+for.body:
+  %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
+  %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
+  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+  %gep1 = getelementptr i32* %phi1, i64 -1
+  %load1 = load i32* %gep1
+  %gep2 = getelementptr i32* %phi2, i64 -1
+  store i32 %load1, i32* %gep2
+  %load2 = load i32* %phi1
+  store i32 %load2, i32* %phi2
+  %dec.i = add nsw i64 %i, -1
+  %gep3 = getelementptr i32* %phi2, i64 -2
+  %gep4 = getelementptr i32* %phi1, i64 -2
+  %cond = icmp sgt i64 %dec.i, 0
+  br i1 %cond, label %for.body, label %end
+end:
+  ret void
+}
+
+define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-doubleword
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
+  br label %for.body
+for.body:
+  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
+  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
+  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+  %gep1 = getelementptr i64* %phi1, i64 -1
+  %load1 = load i64* %gep1
+  %gep2 = getelementptr i64* %phi2, i64 -1
+  store i64 %load1, i64* %gep2
+  %load2 = load i64* %phi1
+  store i64 %load2, i64* %phi2
+  %dec.i = add nsw i64 %i, -1
+  %gep3 = getelementptr i64* %phi2, i64 -2
+  %gep4 = getelementptr i64* %phi1, i64 -2
+  %cond = icmp sgt i64 %dec.i, 0
+  br i1 %cond, label %for.body, label %end
+end:
+  ret void
+}
+
+define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-quadword
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
+  br label %for.body
+for.body:
+  %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
+  %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
+  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+  %gep1 = getelementptr <2 x i64>* %phi1, i64 -1
+  %load1 = load <2 x i64>* %gep1
+  %gep2 = getelementptr <2 x i64>* %phi2, i64 -1
+  store <2 x i64> %load1, <2 x i64>* %gep2
+  %load2 = load <2 x i64>* %phi1
+  store <2 x i64> %load2, <2 x i64>* %phi2
+  %dec.i = add nsw i64 %i, -1
+  %gep3 = getelementptr <2 x i64>* %phi2, i64 -2
+  %gep4 = getelementptr <2 x i64>* %phi1, i64 -2
+  %cond = icmp sgt i64 %dec.i, 0
+  br i1 %cond, label %for.body, label %end
+end:
+  ret void
+}
+
+define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-float
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
+  br label %for.body
+for.body:
+  %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
+  %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
+  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+  %gep1 = getelementptr float* %phi1, i64 -1
+  %load1 = load float* %gep1
+  %gep2 = getelementptr float* %phi2, i64 -1
+  store float %load1, float* %gep2
+  %load2 = load float* %phi1
+  store float %load2, float* %phi2
+  %dec.i = add nsw i64 %i, -1
+  %gep3 = getelementptr float* %phi2, i64 -2
+  %gep4 = getelementptr float* %phi1, i64 -2
+  %cond = icmp sgt i64 %dec.i, 0
+  br i1 %cond, label %for.body, label %end
+end:
+  ret void
+}
+
+define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-double
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
+  br label %for.body
+for.body:
+  %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
+  %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
+  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+  %gep1 = getelementptr double* %phi1, i64 -1
+  %load1 = load double* %gep1
+  %gep2 = getelementptr double* %phi2, i64 -1
+  store double %load1, double* %gep2
+  %load2 = load double* %phi1
+  store double %load2, double* %phi2
+  %dec.i = add nsw i64 %i, -1
+  %gep3 = getelementptr double* %phi2, i64 -2
+  %gep4 = getelementptr double* %phi1, i64 -2
+  %cond = icmp sgt i64 %dec.i, 0
+  br i1 %cond, label %for.body, label %end
+end:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index 77493d8..125995c 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -2,8 +2,7 @@ import re
 
 config.suffixes = ['.ll']
 
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
 
 # For now we don't test arm64-win32.
diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll
new file mode 100644
index 0000000..76db297
--- /dev/null
+++ b/test/CodeGen/AArch64/memcpy-f128.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+%structA = type { i128 }
+@stubA = internal unnamed_addr constant %structA zeroinitializer, align 8
+
+; Make sure we don't hit llvm_unreachable.
+
+define void @test1() {
+; CHECK-LABEL: @test1
+; CHECK: adrp
+; CHECK: ldr q0
+; CHECK: str q0
+; CHECK: ret
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/mul_pow2.ll b/test/CodeGen/AArch64/mul_pow2.ll
new file mode 100644
index 0000000..efc0ec8
--- /dev/null
+++ b/test/CodeGen/AArch64/mul_pow2.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -march=aarch64 | FileCheck %s
+
+; Convert mul x, pow2 to shift.
+; Convert mul x, pow2 +/- 1 to shift + add/sub.
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: test2
+; CHECK: lsl w0, w0, #1
+
+  %mul = shl nsw i32 %x, 1
+  ret i32 %mul
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: test3
+; CHECK: add w0, w0, w0, lsl #1
+
+  %mul = mul nsw i32 %x, 3
+  ret i32 %mul
+}
+
+define i32 @test4(i32 %x) {
+; CHECK-LABEL: test4
+; CHECK: lsl w0, w0, #2
+
+  %mul = shl nsw i32 %x, 2
+  ret i32 %mul
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: test5
+; CHECK: add w0, w0, w0, lsl #2
+
+
+  %mul = mul nsw i32 %x, 5
+  ret i32 %mul
+}
+
+define i32 @test7(i32 %x) {
+; CHECK-LABEL: test7
+; CHECK: lsl {{w[0-9]+}}, w0, #3
+; CHECK: sub w0, {{w[0-9]+}}, w0
+
+  %mul = mul nsw i32 %x, 7
+  ret i32 %mul
+}
+
+define i32 @test8(i32 %x) {
+; CHECK-LABEL: test8
+; CHECK: lsl w0, w0, #3
+
+  %mul = shl nsw i32 %x, 3
+  ret i32 %mul
+}
+
+define i32 @test9(i32 %x) {
+; CHECK-LABEL: test9
+; CHECK: add w0, w0, w0, lsl #3
+
+  %mul = mul nsw i32 %x, 9
+  ret i32 %mul
+}
+
+; Convert mul x, -pow2 to shift.
+; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
+
+define i32 @ntest2(i32 %x) {
+; CHECK-LABEL: ntest2
+; CHECK: neg w0, w0, lsl #1
+
+  %mul = mul nsw i32 %x, -2
+  ret i32 %mul
+}
+
+define i32 @ntest3(i32 %x) {
+; CHECK-LABEL: ntest3
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
+; CHECK: neg w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -3
+  ret i32 %mul
+}
+
+define i32 @ntest4(i32 %x) {
+; CHECK-LABEL: ntest4
+; CHECK:neg w0, w0, lsl #2
+
+  %mul = mul nsw i32 %x, -4
+  ret i32 %mul
+}
+
+define i32 @ntest5(i32 %x) {
+; CHECK-LABEL: ntest5
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
+; CHECK: neg w0, {{w[0-9]+}}
+  %mul = mul nsw i32 %x, -5
+  ret i32 %mul
+}
+
+define i32 @ntest7(i32 %x) {
+; CHECK-LABEL: ntest7
+; CHECK: sub w0, w0, w0, lsl #3
+
+  %mul = mul nsw i32 %x, -7
+  ret i32 %mul
+}
+
+define i32 @ntest8(i32 %x) {
+; CHECK-LABEL: ntest8
+; CHECK: neg w0, w0, lsl #3
+
+  %mul = mul nsw i32 %x, -8
+  ret i32 %mul
+}
+
+define i32 @ntest9(i32 %x) {
+; CHECK-LABEL: ntest9
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #3
+; CHECK: neg w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -9
+  ret i32 %mul
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
index e32ac84..03c3f33 100644
--- a/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -17,3 +17,17 @@ define void @foo() {
 ; CHECK: br {{x([0-79]|1[0-8])}}
        ret void
 }
+
+; No matter how tempting it is, LLVM should not use x30 since that'll be
+; restored to its incoming value before the "br".
+define void @test_x30_tail() {
+; CHECK-LABEL: test_x30_tail:
+; CHECK: mov [[DEST:x[0-9]+]], x30
+; CHECK: br [[DEST]]
+  %addr = call i8* @llvm.returnaddress(i32 0)
+  %faddr = bitcast i8* %addr to void()*
+  tail call void %faddr()
+  ret void
+}
+
+declare i8* @llvm.returnaddress(i32)
diff --git a/test/CodeGen/AArch64/trunc-v1i64.ll b/test/CodeGen/AArch64/trunc-v1i64.ll
new file mode 100644
index 0000000..159b8e0
--- /dev/null
+++ b/test/CodeGen/AArch64/trunc-v1i64.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+
+; An optimization in DAG Combiner to fold
+; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
+; will generate nodes like:
+;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
+; And such nodes will be defaultly scalarized in type legalization. But such
+; scalarization will cause an assertion failure, as v1i64 is a legal type in
+; AArch64. We change the default behaviour from be scalarized to be widen.
+
+; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
+; Just like v1i16 and v1i8, there is no XTN generated.
+
+define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_0:
+; CHECK: xtn v0.2s, v0.2d
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_1:
+; CHECK: xtn v0.2s, v0.2d
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
+  %2 = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_0:
+; CHECK-NOT: xtn
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <4 x i64> %1 to <4 x i16>
+  ret <4 x i16> %2
+}
+
+define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.4h, v0.h[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
+  %2 = trunc <4 x i64> %1 to <4 x i16>
+  ret <4 x i16> %2
+}
+
+define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_0:
+; CHECK-NOT: xtn
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <8 x i64> %1 to <8 x i8>
+  ret <8 x i8> %2
+}
+
+define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.8b, v0.b[0]
+  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = trunc <8 x i64> %1 to <8 x i8>
+  ret <8 x i8> %2
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 8a2fe26..5dc7b5d 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
 
 ; We've got the usual issues with LLVM reordering blocks here. The
 ; tests are correct for the current order, but who knows when that
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index ca5ae8b..2597b41 100644
--- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
 
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 4fb2be0..38eb0ea 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index 35995b7..b040b2d 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -4,22 +4,26 @@
 
 %struct.foo = type { i64, i64 }
 
-define zeroext i8 @t(%struct.foo* %this) noreturn optsize {
+define zeroext i8 @t(%struct.foo* %this, i1 %tst) noreturn optsize {
 entry:
 ; ARM-LABEL:       t:
-; ARM:       str r2, [r1], r0
+; ARM-DAG:       mov r[[ADDR:[0-9]+]], #8
+; ARM-DAG:       mov [[VAL:r[0-9]+]], #0
+; ARM:       str [[VAL]], [r[[ADDR]]], r0
 
 ; THUMB-LABEL:     t:
-; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r1, [r0]
+; THUMB-DAG:       movs r[[ADDR:[0-9]+]], #8
+; THUMB-DAG:       movs [[VAL:r[0-9]+]], #0
+; THUMB-NOT: str {{[a-z0-9]+}}, [{{[a-z0-9]+}}], {{[a-z0-9]+}}
+; THUMB:     str [[VAL]], [r[[ADDR]]]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
-  br i1 undef, label %bb.nph96, label %bb3
+  br i1 %tst, label %bb.nph96, label %bb3
 
 bb3:                                              ; preds = %entry
   %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
-  unreachable
+  ret i8 42
 
 bb.nph96:                                         ; preds = %entry
-  unreachable
+  ret i8 3
 }
diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
index 32d350e9..e7e0580 100644
--- a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
+++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; Radar 8589805: Counting the number of microcoded operations, such as for an
 ; LDM instruction, was causing an assertion failure because the microop count
 ; was being treated as an instruction count.
@@ -11,7 +11,7 @@
 define i32 @test(i32 %x) {
 entry:
   %0 = tail call signext i16 undef(i32* undef)
-  switch i32 undef, label %bb3 [
+  switch i32 %x, label %bb3 [
     i32 0, label %bb4
     i32 1, label %bb1
     i32 2, label %bb2
diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
index 85a1137..3950c9e 100644
--- a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
+++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; rdar://8959122 illegal register operands for UMULL instruction
 ;   in cfrac nightly test.
 ; Armv6 generates a umull that must write to two distinct destination regs.
@@ -7,7 +7,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
 target triple = "armv6-apple-darwin10"
 
-define void @ptoa() nounwind {
+define void @ptoa(i1 %tst, i8* %p8, i8 %val8) nounwind {
 entry:
   br i1 false, label %bb3, label %bb
 
@@ -16,7 +16,7 @@ bb:                                               ; preds = %entry
 
 bb3:                                              ; preds = %bb, %entry
   %0 = call noalias i8* @malloc() nounwind
-  br i1 undef, label %bb46, label %bb8
+  br i1 %tst, label %bb46, label %bb8
 
 bb8:                                              ; preds = %bb3
   %1 = getelementptr inbounds i8* %0, i32 0
@@ -35,7 +35,7 @@ bb8:                                              ; preds = %bb3
   %7 = or i8 %6, 48
   %8 = add i8 %6, 87
   %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
-  store i8 %iftmp.5.0.1, i8* undef, align 1
+  store i8 %iftmp.5.0.1, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -49,7 +49,7 @@ bb8:                                              ; preds = %bb3
   %13 = or i8 %12, 48
   %14 = add i8 %12, 87
   %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
-  store i8 %iftmp.5.0.2, i8* undef, align 1
+  store i8 %iftmp.5.0.2, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -73,8 +73,8 @@ bb8:                                              ; preds = %bb3
   %21 = udiv i32 %2, 100000
   %22 = urem i32 %21, 10
   %23 = icmp ult i32 %22, 10
-  %iftmp.5.0.5 = select i1 %23, i8 0, i8 undef
-  store i8 %iftmp.5.0.5, i8* undef, align 1
+  %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8
+  store i8 %iftmp.5.0.5, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -88,7 +88,7 @@ bb8:                                              ; preds = %bb3
   %28 = or i8 %27, 48
   %29 = add i8 %27, 87
   %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
-  store i8 %iftmp.5.0.6, i8* undef, align 1
+  store i8 %iftmp.5.0.6, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -102,7 +102,7 @@ bb8:                                              ; preds = %bb3
   %34 = or i8 %33, 48
   %35 = add i8 %33, 87
   %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
-  store i8 %iftmp.5.0.7, i8* undef, align 1
+  store i8 %iftmp.5.0.7, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -117,7 +117,7 @@ bb8:                                              ; preds = %bb3
   %41 = add i8 %39, 87
   %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
   store i8 %iftmp.5.0.8, i8* null, align 1
-  unreachable
+  br label %bb46
 
 bb46:                                             ; preds = %bb3
   ret void
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index bc72e12..837feb6 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -8,7 +8,7 @@
 
 @oStruct = external global %struct.Outer, align 4
 
-define void @main() nounwind {
+define void @main(i8 %val8) nounwind {
 ; CHECK-LABEL: main:
 ; CHECK-NOT: ldrd
 ; CHECK: mul
@@ -28,7 +28,7 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
 
 if.then.i16:                                      ; preds = %_Z14printIsNotZeroi.exit
-  unreachable
+  ret void
 
 _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
   br label %_Z14printIsNotZeroi.exit17.for.body_crit_edge
@@ -36,7 +36,7 @@ _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
   %tmp3.pre = load i8* %b.phi.trans.insert, align 1
-  %phitmp27 = icmp eq i8 undef, 0
+  %phitmp27 = icmp eq i8 %val8, 0
   br label %for.body
 
 for.end:                                          ; preds = %_Z14printIsNotZeroi.exit17
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
index 8df295a..3308330 100644
--- a/test/CodeGen/ARM/2012-11-14-subs_carry.ll
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 ;CHECK-LABEL: foo:
 ;CHECK: adds
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
index 480d087..162f863 100644
--- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -42,34 +42,34 @@ UnifiedReturnBlock:
   ret i32 %tmp13
 }
 
-define hidden fastcc void @t3(i8** %retaddr) {
+define hidden fastcc void @t3(i8** %retaddr, i1 %tst, i8* %p8) {
 ; CHECK-LABEL: t3:
 ; CHECK: Block address taken
 ; CHECK-NOT: Address of block that was removed by CodeGen
 bb:
   store i8* blockaddress(@t3, %KBBlockZero_return_1), i8** %retaddr
-  br i1 undef, label %bb77, label %bb7.i
+  br i1 %tst, label %bb77, label %bb7.i
 
 bb7.i:                                            ; preds = %bb35
   br label %bb2.i
 
 KBBlockZero_return_1:                             ; preds = %KBBlockZero.exit
-  unreachable
+  ret void
 
 KBBlockZero_return_0:                             ; preds = %KBBlockZero.exit
-  unreachable
+  ret void
 
 bb77:                                             ; preds = %bb26, %bb12, %bb
   ret void
 
 bb2.i:                                            ; preds = %bb6.i350, %bb7.i
-  br i1 undef, label %bb6.i350, label %KBBlockZero.exit
+  br i1 %tst, label %bb6.i350, label %KBBlockZero.exit
 
 bb6.i350:                                         ; preds = %bb2.i
   br label %bb2.i
 
 KBBlockZero.exit:                                 ; preds = %bb2.i
-  indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
+  indirectbr i8* %p8, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
 }
 
 @foo = global i32 ()* null
diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
index a438c1f..05a4ef0 100644
--- a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 target triple = "armv7--linux-gnueabi"
diff --git a/test/CodeGen/ARM/Windows/dllimport.ll b/test/CodeGen/ARM/Windows/dllimport.ll
new file mode 100644
index 0000000..bc737bd
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/dllimport.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+; ModuleID = 'dllimport.c'
+
+@var = external dllimport global i32
+@ext = external global i32
+declare dllimport arm_aapcs_vfpcc i32 @external()
+declare arm_aapcs_vfpcc i32 @internal()
+
+define arm_aapcs_vfpcc i32 @get_var() {
+  %1 = load i32* @var, align 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: get_var
+; CHECK: movw r0, :lower16:__imp_var
+; CHECK: movt r0, :upper16:__imp_var
+; CHECK: ldr r0, [r0]
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+
+define arm_aapcs_vfpcc i32 @get_ext() {
+  %1 = load i32* @ext, align 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: get_ext
+; CHECK: movw r0, :lower16:ext
+; CHECK: movt r0, :upper16:ext
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+
+define arm_aapcs_vfpcc i32* @get_var_pointer() {
+  ret i32* @var
+}
+
+; CHECK-LABEL: get_var_pointer
+; CHECK:  movw r0, :lower16:__imp_var
+; CHECK:  movt r0, :upper16:__imp_var
+; CHECK:  ldr r0, [r0]
+; CHECK:  bx lr
+
+define arm_aapcs_vfpcc i32 @call_external() {
+  %call = tail call arm_aapcs_vfpcc i32 @external()
+  ret i32 %call
+}
+
+; CHECK-LABEL: call_external
+; CHECK: movw r0, :lower16:__imp_external
+; CHECK: movt r0, :upper16:__imp_external
+; CHECK: ldr r0, [r0]
+; CHECK: bx r0
+
+define arm_aapcs_vfpcc i32 @call_internal() {
+  %call = tail call arm_aapcs_vfpcc i32 @internal()
+  ret i32 %call
+}
+
+; CHECK-LABEL: call_internal
+; CHECK: b internal
+
diff --git a/test/CodeGen/ARM/Windows/global-minsize.ll b/test/CodeGen/ARM/Windows/global-minsize.ll
new file mode 100644
index 0000000..c0be36c
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/global-minsize.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+@i = internal global i32 0, align 4
+
+; Function Attrs: minsize
+define arm_aapcs_vfpcc i32* @function() #0 {
+entry:
+  ret i32* @i
+}
+
+attributes #0 = { minsize }
+
+; CHECK: function:
+; CHECK:   movw  r0, :lower16:i
+; CHECK:   movt  r0, :upper16:i
+; CHECK:   bx    lr
diff --git a/test/CodeGen/ARM/Windows/long-calls.ll b/test/CodeGen/ARM/Windows/long-calls.ll
new file mode 100644
index 0000000..e35f414
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/long-calls.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -arm-long-calls -o - %s \
+; RUN:    | FileCheck %s
+
+declare arm_aapcs_vfpcc void @callee()
+
+define arm_aapcs_vfpcc void @caller() nounwind {
+entry:
+  tail call void @callee()
+  ret void
+}
+
+; CHECK-LABEL: caller
+; CHECK: ldr [[REG:r[0-9]+]], [[CPI:.LCPI[_0-9]+]]
+; CHECK: bx [[REG]]
+; CHECK: .align 2
+; CHECK: [[CPI]]:
+; CHECK: .long callee
+
diff --git a/test/CodeGen/ARM/Windows/structors.ll b/test/CodeGen/ARM/Windows/structors.ll
new file mode 100644
index 0000000..a1a9026
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/structors.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -o - %s | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @function, i8* null }]
+
+define arm_aapcs_vfpcc void @function() {
+entry:
+  ret void
+}
+
+; CHECK: .section .CRT$XCU,"rd"
+; CHECK: .long function
+
diff --git a/test/CodeGen/ARM/Windows/vla.ll b/test/CodeGen/ARM/Windows/vla.ll
new file mode 100644
index 0000000..56901de
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/vla.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s \
+; RUN:  | FileCheck %s -check-prefix CHECK-SMALL-CODE
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -o - %s \
+; RUN:  | FileCheck %s -check-prefix CHECK-LARGE-CODE
+; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -o - %s \
+; RUN:  | FileCheck %s -check-prefix CHECK-MSVC
+
+define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
+entry:
+  %vla = alloca i8, i32 %sz, align 1
+  %arrayidx = getelementptr inbounds i8* %vla, i32 %idx
+  %0 = load volatile i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; CHECK-SMALL-CODE:   adds [[R4:r[0-9]+]], #7
+; CHECK-SMALL-CODE:   bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE:   lsrs r4, [[R4]], #2
+; CHECK-SMALL-CODE:   bl __chkstk
+; CHECK-SMALL-CODE:   sub.w sp, sp, r4
+
+; CHECK-LARGE-CODE:   adds  [[R4:r[0-9]+]], #7
+; CHECK-LARGE-CODE:   bic   [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE:   lsrs  r4, [[R4]], #2
+; CHECK-LARGE-CODE:   movw  [[IP:r[0-9]+]], :lower16:__chkstk
+; CHECK-LARGE-CODE:   movt  [[IP]], :upper16:__chkstk
+; CHECK-LARGE-CODE:   blx   [[IP]]
+; CHECK-LARGE-CODE:   sub.w sp, sp, r4
+
+; CHECK-MSVC-NOT: __chkstk
+
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 4de305b..f55ae10 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -29,7 +29,7 @@ define i32 @foo_f() {
 
 @bar_i = alias internal i32* @bar
 
-@A = alias i64, i32* @bar
+@A = alias bitcast (i32* @bar to i64*)
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index bf827d6..14eef83 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck -check-prefix=ARM %s
-; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck -check-prefix=THUMB %s
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
+; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s
+; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s
+; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
 ; RUN:   | FileCheck -check-prefix=T2 %s
-; RUN: llc -mtriple=thumbv8-eabi %s -o - | FileCheck -check-prefix=V8 %s
+; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s
 
 ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
 
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 9913f30..462c185 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -171,9 +171,10 @@ define i64 @test6(i64* %ptr, i64 %val) {
 
 define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-LABEL: test7:
-; CHECK: dmb {{ish$}}
+; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1
+; CHECK-DAG: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]]
 ; CHECK-LE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
 ; CHECK-BE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
 ; CHECK-BE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
@@ -189,16 +190,17 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
 ; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
-; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG2]]
-; CHECK-THUMB: orrs    [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2
+; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3
+; CHECK-THUMB-LE: orrs    [[MISMATCH_HI]], [[MISMATCH_LO]]
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
+  %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
+  %r = extractvalue { i64, i1 } %pair, 0
   ret i64 %r
 }
 
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index a473807..629b16d 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -11,5 +11,6 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
 ; T2: ldrexb
 ; T2: strexb
   %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
-  ret i8 %tmp0
+  %tmp1 = extractvalue { i8, i1 } %tmp0, 0
+  ret i8 %tmp1
 }
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index 45a263d..49342d2 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -5,13 +5,13 @@
 ; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
 
 define void @test1(i32* %ptr, i32 %val1) {
-; ARM: test1
+; ARM-LABEL: test1
 ; ARM: dmb {{ish$}}
 ; ARM-NEXT: str
 ; ARM-NEXT: dmb {{ish$}}
-; THUMBONE: test1
+; THUMBONE-LABEL: test1
 ; THUMBONE: __sync_lock_test_and_set_4
-; THUMBTWO: test1
+; THUMBTWO-LABEL: test1
 ; THUMBTWO: dmb {{ish$}}
 ; THUMBTWO-NEXT: str
 ; THUMBTWO-NEXT: dmb {{ish$}}
@@ -20,12 +20,12 @@ define void @test1(i32* %ptr, i32 %val1) {
 }
 
 define i32 @test2(i32* %ptr) {
-; ARM: test2
+; ARM-LABEL: test2
 ; ARM: ldr
 ; ARM-NEXT: dmb {{ish$}}
-; THUMBONE: test2
+; THUMBONE-LABEL: test2
 ; THUMBONE: __sync_val_compare_and_swap_4
-; THUMBTWO: test2
+; THUMBTWO-LABEL: test2
 ; THUMBTWO: ldr
 ; THUMBTWO-NEXT: dmb {{ish$}}
   %val = load atomic i32* %ptr seq_cst, align 4
@@ -33,22 +33,35 @@ define i32 @test2(i32* %ptr) {
 }
 
 define void @test3(i8* %ptr1, i8* %ptr2) {
-; ARM: test3
+; ARM-LABEL: test3
+; ARM-NOT: dmb
 ; ARM: ldrb
+; ARM-NOT: dmb
 ; ARM: strb
-; THUMBTWO: test3
+; ARM-NOT: dmb
+; ARM: bx lr
+
+; THUMBTWO-LABEL: test3
+; THUMBTWO-NOT: dmb
 ; THUMBTWO: ldrb
+; THUMBTWO-NOT: dmb
 ; THUMBTWO: strb
-; THUMBONE: test3
+; THUMBTWO-NOT: dmb
+; THUMBTWO: bx lr
+
+; THUMBONE-LABEL: test3
+; THUMBONE-NOT: dmb
 ; THUMBONE: ldrb
+; THUMBONE-NOT: dmb
 ; THUMBONE: strb
+; THUMBONE-NOT: dmb
   %val = load atomic i8* %ptr1 unordered, align 1
   store atomic i8 %val, i8* %ptr2 unordered, align 1
   ret void
 }
 
 define void @test4(i8* %ptr1, i8* %ptr2) {
-; THUMBONE: test4
+; THUMBONE-LABEL: test4
 ; THUMBONE: ___sync_val_compare_and_swap_1
 ; THUMBONE: ___sync_lock_test_and_set_1
   %val = load atomic i8* %ptr1 seq_cst, align 1
@@ -57,14 +70,14 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
 }
 
 define i64 @test_old_load_64bit(i64* %p) {
-; ARMV4: test_old_load_64bit
+; ARMV4-LABEL: test_old_load_64bit
 ; ARMV4: ___sync_val_compare_and_swap_8
   %1 = load atomic i64* %p seq_cst, align 8
   ret i64 %1
 }
 
 define void @test_old_store_64bit(i64* %p, i64 %v) {
-; ARMV4: test_old_store_64bit
+; ARMV4-LABEL: test_old_store_64bit
 ; ARMV4: ___sync_lock_test_and_set_8
   store atomic i64 %v, i64* %p seq_cst, align 8
   ret void
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index ac8e949..b988242 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -198,7 +198,8 @@ entry:
 define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: test_cmpxchg_fail_order:
 
-  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 0
 ; CHECK:     dmb ish
 ; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
 ; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
@@ -216,7 +217,8 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
 define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: test_cmpxchg_fail_order1:
 
-  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
+  %oldval = extractvalue { i32, i1 } %pair, 0
 ; CHECK-NOT:     dmb ish
 ; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
 ; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index a39565e..7072aaa 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1051,7 +1051,8 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %old = extractvalue { i8, i1 } %pair, 0
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1077,7 +1078,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind
 
 define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
-   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+   %old = extractvalue { i16, i1 } %pair, 0
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
@@ -1103,7 +1105,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
 
 define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   %old = extractvalue { i32, i1 } %pair, 0
    store i32 %old, i32* @var32
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
@@ -1130,7 +1133,8 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+   %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+   %old = extractvalue { i64, i1 } %pair, 0
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
diff --git a/test/CodeGen/ARM/big-endian-neon-extend.ll b/test/CodeGen/ARM/big-endian-neon-extend.ll
new file mode 100644
index 0000000..931c6c3
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-neon-extend.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -o - | FileCheck %s
+
+define void @vector_ext_2i8_to_2i64( <2 x i8>* %loadaddr, <2 x i64>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_2i8_to_2i64:
+; CHECK:       vld1.16 {[[REG:d[0-9]+]]
+; CHECK:       vmov.i64 {{q[0-9]+}}, #0xff
+; CHECK:       vrev16.8  [[REG]], [[REG]]
+; CHECK:       vmovl.u8  {{q[0-9]+}}, [[REG]]
+  %1 = load <2 x i8>* %loadaddr
+  %2 = zext <2 x i8> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %storeaddr
+  ret void
+}
+
+define void @vector_ext_2i16_to_2i64( <2 x i16>* %loadaddr, <2 x i64>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_2i16_to_2i64:
+; CHECK:       vld1.32 {[[REG:d[0-9]+]]
+; CHECK:       vmov.i64 {{q[0-9]+}}, #0xffff
+; CHECK:       vrev32.16  [[REG]], [[REG]]
+; CHECK:       vmovl.u16  {{q[0-9]+}}, [[REG]]
+  %1 = load <2 x i16>* %loadaddr
+  %2 = zext <2 x i16> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %storeaddr
+  ret void
+}
+
+
+define void @vector_ext_2i8_to_2i32( <2 x i8>* %loadaddr, <2 x i32>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_2i8_to_2i32:
+; CHECK:       vld1.16 {[[REG:d[0-9]+]]
+; CHECK:       vrev16.8  [[REG]], [[REG]]
+  %1 = load <2 x i8>* %loadaddr
+  %2 = zext <2 x i8> %1 to <2 x i32>
+  store <2 x i32> %2, <2 x i32>* %storeaddr
+  ret void
+}
+
+define void @vector_ext_2i16_to_2i32( <2 x i16>* %loadaddr, <2 x i32>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_2i16_to_2i32:
+; CHECK:       vld1.32 {[[REG:d[0-9]+]]
+; CHECK:       vrev32.16  [[REG]], [[REG]]
+; CHECK:       vmovl.u16  {{q[0-9]+}}, [[REG]]
+  %1 = load <2 x i16>* %loadaddr
+  %2 = zext <2 x i16> %1 to <2 x i32>
+  store <2 x i32> %2, <2 x i32>* %storeaddr
+  ret void
+}
+
+define void @vector_ext_2i8_to_2i16( <2 x i8>* %loadaddr, <2 x i16>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_2i8_to_2i16:
+; CHECK:       vld1.16 {[[REG:d[0-9]+]]
+; CHECK:       vrev16.8  [[REG]], [[REG]]
+; CHECK:       vmovl.u8  {{q[0-9]+}}, [[REG]]
+  %1 = load <2 x i8>* %loadaddr
+  %2 = zext <2 x i8> %1 to <2 x i16>
+  store <2 x i16> %2, <2 x i16>* %storeaddr
+  ret void
+}
+
+define void @vector_ext_4i8_to_4i32( <4 x i8>* %loadaddr, <4 x i32>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_4i8_to_4i32:
+; CHECK:       vld1.32 {[[REG:d[0-9]+]]
+; CHECK:       vrev32.8  [[REG]], [[REG]]
+; CHECK:       vmovl.u8  {{q[0-9]+}}, [[REG]]
+  %1 = load <4 x i8>* %loadaddr
+  %2 = zext <4 x i8> %1 to <4 x i32>
+  store <4 x i32> %2, <4 x i32>* %storeaddr
+  ret void
+}
+
+define void @vector_ext_4i8_to_4i16( <4 x i8>* %loadaddr, <4 x i16>* %storeaddr ) {
+; CHECK-LABEL: vector_ext_4i8_to_4i16:
+; CHECK:       vld1.32 {[[REG:d[0-9]+]]
+; CHECK:       vrev32.8  [[REG]], [[REG]]
+; CHECK:       vmovl.u8  {{q[0-9]+}}, [[REG]]
+  %1 = load <4 x i8>* %loadaddr
+  %2 = zext <4 x i8> %1 to <4 x i16>
+  store <4 x i16> %2, <4 x i16>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/big-endian-neon-trunc-store.ll b/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
new file mode 100644
index 0000000..65147ad
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -o - | FileCheck %s
+
+define void @vector_trunc_store_2i64_to_2i16( <2 x i64>* %loadaddr, <2 x i16>* %storeaddr ) {
+; CHECK-LABEL: vector_trunc_store_2i64_to_2i16:
+; CHECK:       vmovn.i64  [[REG:d[0-9]+]]
+; CHECK:       vrev32.16  [[REG]], [[REG]]
+; CHECK:       vuzp.16    [[REG]], [[REG2:d[0-9]+]]
+; CHECK:       vrev32.16  [[REG]], [[REG2]]
+  %1 = load <2 x i64>* %loadaddr
+  %2 = trunc <2 x i64> %1 to <2 x i16>
+  store <2 x i16> %2, <2 x i16>* %storeaddr
+  ret void
+}
+
+define void @vector_trunc_store_4i32_to_4i8( <4 x i32>* %loadaddr, <4 x i8>* %storeaddr ) {
+; CHECK-LABEL: vector_trunc_store_4i32_to_4i8:
+; CHECK:       vmovn.i32 [[REG:d[0-9]+]]
+; CHECK:       vrev16.8  [[REG]], [[REG]]
+; CHECK:       vuzp.8    [[REG]], [[REG2:d[0-9]+]]
+; CHECK:       vrev32.8  [[REG]], [[REG2]]
+  %1 = load <4 x i32>* %loadaddr
+  %2 = trunc <4 x i32> %1 to <4 x i8>
+  store <4 x i8> %2, <4 x i8>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/big-endian-ret-f64.ll b/test/CodeGen/ARM/big-endian-ret-f64.ll
new file mode 100644
index 0000000..614bfc0
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-ret-f64.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=armebv7a-eabi %s -O0 -o - | FileCheck %s
+; RUN: llc -mtriple=armebv8a-eabi %s -O0 -o - | FileCheck %s
+
+define double @fn() {
+; CHECK-LABEL: fn
+; CHECK: ldr r0, [sp]
+; CHECK: ldr r1, [sp, #4]
+  %r = alloca double, align 8
+  %1 = load double* %r, align 8
+  ret double %1
+}
+
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 40694bf..a35fd74 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 | FileCheck %s -check-prefix=CHECKV6
-; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 \
+; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-atomic-cfg-tidy=0 \
 ; RUN:    | FileCheck %s -check-prefix=CHECKELF
 
 ; Enable tailcall optimization for iOS 5.0
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll
new file mode 100644
index 0000000..fb88575
--- /dev/null
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -0,0 +1,107 @@
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s | FileCheck %s
+
+define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_return:
+
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
+; CHECK: cmp [[STATUS]], #0
+; CHECK: bne [[LOOP]]
+
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs r0, #1
+; CHECK: dmb ish
+; CHECK: bx lr
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs r0, #0
+; CHECK: dmb ish
+; CHECK: bx lr
+
+  %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  %conv = zext i1 %success to i32
+  ret i32 %conv
+}
+
+define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
+; CHECK-LABEL: test_return_bool:
+
+; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
+; CHECK: cmp [[LOADED]], [[OLDBYTE]]
+; CHECK: bne [[FAIL:LBB[0-9]+_[0-9]+]]
+
+; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
+; CHECK: cmp [[STATUS]], #0
+; CHECK: bne [[LOOP]]
+
+  ; FIXME: this eor is redundant. Need to teach DAG combine that.
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: eor r0, [[TMP]], #1
+; CHECK: bx lr
+
+; CHECK: [[FAIL]]:
+; CHECK: movs [[TMP:r[0-9]+]], #0
+; CHECK: eor r0, [[TMP]], #1
+; CHECK: bx lr
+
+
+  %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
+  %success = extractvalue { i8, i1 } %pair, 1
+  %failure = xor i1 %success, 1
+  ret i1 %failure
+}
+
+define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_conditional:
+
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
+; CHECK: cmp [[STATUS]], #0
+; CHECK: bne [[LOOP]]
+
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: b.w _bar
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: b.w _baz
+
+  %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  br i1 %success, label %true, label %false
+
+true:
+  tail call void @bar() #2
+  br label %end
+
+false:
+  tail call void @baz() #2
+  br label %end
+
+end:
+  ret void
+}
+
+declare void @bar()
+declare void @baz()
diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll
new file mode 100644
index 0000000..126e330
--- /dev/null
+++ b/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+
+define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_weak:
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 0
+; CHECK:     dmb ish
+; CHECK:     ldrex   [[LOADED:r[0-9]+]], [r0]
+; CHECK:     cmp     [[LOADED]], r1
+; CHECK:     strexeq [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK:     cmpeq   [[SUCCESS]], #0
+; CHECK:     bne     [[DONE:LBB[0-9]+_[0-9]+]]
+; CHECK:     dmb     ish
+; CHECK: [[DONE]]:
+; CHECK:     str     r3, [r0]
+; CHECK:     bx      lr
+
+  store i32 %oldval, i32* %addr
+  ret void
+}
+
+
+define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_weak_to_bool:
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %success = extractvalue { i32, i1 } %pair, 1
+
+; CHECK:      dmb     ish
+; CHECK:      mov     r0, #0
+; CHECK:      ldrex   [[LOADED:r[0-9]+]], [r1]
+; CHECK:      cmp     [[LOADED]], r2
+; CHECK:      strexeq [[STATUS:r[0-9]+]], r3, [r1]
+; CHECK:      cmpeq   [[STATUS]], #0
+; CHECK:      bne     [[DONE:LBB[0-9]+_[0-9]+]]
+; CHECK:      dmb     ish
+; CHECK:      mov     r0, #1
+; CHECK: [[DONE]]:
+; CHECK:      bx      lr
+
+  ret i1 %success
+}
diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll
index da70178..5eb81b2 100644
--- a/test/CodeGen/ARM/data-in-code-annotations.ll
+++ b/test/CodeGen/ARM/data-in-code-annotations.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 define double @f1() nounwind {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index e8bf3ba..31d0324 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -59,7 +59,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 11, i32 81, metadata !1, null}
 !25 = metadata !{i32 11, i32 101, metadata !1, null}
 !26 = metadata !{i32 12, i32 3, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 107, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 13, i32 5, metadata !27, null}
 !29 = metadata !{i32 14, i32 1, metadata !27, null}
 !30 = metadata !{metadata !1}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 6cbe4b4..5ad5e59 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -231,10 +231,10 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !133 = metadata !{i32 609, i32 175, metadata !23, null}
 !134 = metadata !{i32 786689, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
 !135 = metadata !{i32 609, i32 190, metadata !23, null}
-!136 = metadata !{i32 786688, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, null, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!136 = metadata !{i32 786688, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, null, metadata !163} ; [ DW_TAG_auto_variable ]
 !137 = metadata !{i32 604, i32 49, metadata !23, null}
-!138 = metadata !{i32 786688, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, null, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
-!139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!138 = metadata !{i32 786688, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, null, metadata !164} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, metadata !165} ; [ DW_TAG_auto_variable ]
 !140 = metadata !{i32 607, i32 30, metadata !23, null}
 !141 = metadata !{i32 610, i32 17, metadata !142, null}
 !142 = metadata !{i32 786443, metadata !152, metadata !23, i32 609, i32 200, i32 94} ; [ DW_TAG_lexical_block ]
@@ -258,3 +258,6 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
 !161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
 !162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!163 = metadata !{i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24}
+!164 = metadata !{i64 1, i64 24}
+!165 = metadata !{i64 1, i64 28}
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 695a20b..eb0120f 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -12,11 +12,11 @@ declare void @bar(i8*)
 
 define void @check_simple() minsize {
 ; CHECK-LABEL: check_simple:
-; CHECK: push {r3, r4, r5, r6, r7, lr}
+; CHECK: push.w {r7, r8, r9, r10, r11, lr}
 ; CHECK-NOT: sub sp, sp,
 ; ...
 ; CHECK-NOT: add sp, sp,
-; CHECK: pop {r0, r1, r2, r3, r7, pc}
+; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
 
 ; CHECK-T1-LABEL: check_simple:
 ; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@@ -44,11 +44,11 @@ define void @check_simple() minsize {
 
 define void @check_simple_too_big() minsize {
 ; CHECK-LABEL: check_simple_too_big:
-; CHECK: push {r7, lr}
+; CHECK: push.w {r11, lr}
 ; CHECK: sub sp,
 ; ...
 ; CHECK: add sp,
-; CHECK: pop {r7, pc}
+; CHECK: pop.w {r11, pc}
   %var = alloca i8, i32 64
   call void @bar(i8* %var)
   ret void
@@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
 ; folded in except that doing so would clobber the value being returned.
 define i64 @check_no_return_clobber() minsize {
 ; CHECK-LABEL: check_no_return_clobber:
-; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
+; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NOT: sub sp,
 ; ...
 ; CHECK: add sp, #24
-; CHECK: pop {r7, pc}
+; CHECK: pop.w {r11, pc}
 
   ; Just to keep iOS FileCheck within previous function:
 ; CHECK-IOS-LABEL: check_no_return_clobber:
@@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {
 
 ; CHECK-LABEL: test_varsize:
 ; CHECK: sub	sp, #16
-; CHECK: push	{r5, r6, r7, lr}
+; CHECK: push.w {r9, r10, r11, lr}
 ; ...
-; CHECK: pop.w	{r2, r3, r7, lr}
+; CHECK: pop.w	{r2, r3, r11, lr}
 ; CHECK: add	sp, #16
 ; CHECK: bx	lr
 
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index c721756..f50d0b9 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 -mattr=+v6,+vfp2 %s -o - | FileCheck %s
 
 @i = weak global i32 0		; <i32*> [#uses=2]
 @u = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/CodeGen/ARM/global-merge-1.ll
index 8c77de6..341597e 100644
--- a/test/Transforms/GlobalMerge/ARM/arm.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -1,9 +1,9 @@
 ; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
 ; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
 
 ; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
index cd8a561..a994d3d 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -arm-atomic-cfg-tidy=0 -o /dev/null 2>&1 | FileCheck %s
 
 %struct.S = type { i8* (i8*)*, [1 x i8] }
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 26c7272..509c182 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/indirectbr-3.ll b/test/CodeGen/ARM/indirectbr-3.ll
index 5a9c459..291fedb 100644
--- a/test/CodeGen/ARM/indirectbr-3.ll
+++ b/test/CodeGen/ARM/indirectbr-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 ; If ARMBaseInstrInfo::AnalyzeBlocks returns the wrong value, which was possible
 ; for blocks with indirect branches, the IfConverter could end up deleting
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
index c5be667..cb67dd9 100644
--- a/test/CodeGen/ARM/interrupt-attr.ll
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
   ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
   ; appropriate sentinel so no special return needed).
 ; CHECK-M-LABEL: irq_fn:
-; CHECK-M: push {r4, r6, r7, lr}
-; CHECK-M: add r7, sp, #8
+; CHECK-M: push.w {r4, r10, r11, lr}
+; CHECK-M: add.w r11, sp, #8
 ; CHECK-M: mov r4, sp
 ; CHECK-M: bic r4, r4, #7
 ; CHECK-M: mov sp, r4
 ; CHECK-M: blx _bar
-; CHECK-M: sub.w r4, r7, #8
+; CHECK-M: sub.w r4, r11, #8
 ; CHECK-M: mov sp, r4
-; CHECK-M: pop {r4, r6, r7, pc}
+; CHECK-M: pop.w {r4, r10, r11, pc}
 
   call arm_aapcscc void @bar()
   ret void
diff --git a/test/CodeGen/ARM/intrinsics-memory-barrier.ll b/test/CodeGen/ARM/intrinsics-memory-barrier.ll
new file mode 100644
index 0000000..5ee0b3e
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-memory-barrier.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -mattr=+db | FileCheck %s
+
+; CHECK-LABEL: test
+define void @test() {
+  call void @llvm.arm.dmb(i32 3)     ; CHECK: dmb osh
+  call void @llvm.arm.dsb(i32 7)     ; CHECK: dsb nsh
+  call void @llvm.arm.isb(i32 15)    ; CHECK: isb sy
+  ret void
+}
+
+; Important point is that the compiler should not reorder memory access
+; instructions around DMB.
+; Failure to do so, two STRs will collapse into one STRD.
+; CHECK-LABEL: test_dmb_reordering
+define void @test_dmb_reordering(i32 %a, i32 %b, i32* %d) {
+  store i32 %a, i32* %d              ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}]
+
+  call void @llvm.arm.dmb(i32 15)    ; CHECK: dmb sy
+
+  %d1 = getelementptr i32* %d, i32 1
+  store i32 %b, i32* %d1             ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
+
+  ret void
+}
+
+; Similarly for DSB.
+; CHECK-LABEL: test_dsb_reordering
+define void @test_dsb_reordering(i32 %a, i32 %b, i32* %d) {
+  store i32 %a, i32* %d              ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}]
+
+  call void @llvm.arm.dsb(i32 15)    ; CHECK: dsb sy
+
+  %d1 = getelementptr i32* %d, i32 1
+  store i32 %b, i32* %d1             ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
+
+  ret void
+}
+
+; And ISB.
+; CHECK-LABEL: test_isb_reordering
+define void @test_isb_reordering(i32 %a, i32 %b, i32* %d) {
+  store i32 %a, i32* %d              ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}]
+
+  call void @llvm.arm.isb(i32 15)    ; CHECK: isb sy
+
+  %d1 = getelementptr i32* %d, i32 1
+  store i32 %b, i32* %d1             ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
+
+  ret void
+}
+
+declare void @llvm.arm.dmb(i32)
+declare void @llvm.arm.dsb(i32)
+declare void @llvm.arm.isb(i32)
diff --git a/test/CodeGen/ARM/jump_tables.ll b/test/CodeGen/ARM/jump_tables.ll
new file mode 100644
index 0000000..907a86c
--- /dev/null
+++ b/test/CodeGen/ARM/jump_tables.ll
@@ -0,0 +1,32 @@
+; RUN: llc <%s -mtriple=arm-unknown-linux-gnueabi -jump-table-type=single | FileCheck --check-prefix=ARM %s
+; RUN: llc <%s -mtriple=thumb-unknown-linux-gnueabi -jump-table-type=single | FileCheck --check-prefix=THUMB %s
+
+define void @indirect_fun() unnamed_addr jumptable {
+  ret void
+}
+define void ()* @get_fun() {
+  ret void ()* @indirect_fun
+
+; ARM:         ldr     r0, [[LABEL:.*]]
+; ARM:         mov     pc, lr
+; ARM: [[LABEL]]:
+; ARM:         .long   __llvm_jump_instr_table_0_1
+
+; THUMB:         ldr     r0, [[LABEL:.*]]
+; THUMB:         bx      lr
+; THUMB: [[LABEL]]:
+; THUMB:         .long   __llvm_jump_instr_table_0_1
+}
+
+; ARM:         .globl  __llvm_jump_instr_table_0_1
+; ARM:         .align  3
+; ARM:         .type   __llvm_jump_instr_table_0_1,%function
+; ARM: __llvm_jump_instr_table_0_1:
+; ARM:         b     indirect_fun(PLT)
+
+; THUMB:         .globl  __llvm_jump_instr_table_0_1
+; THUMB:         .align  3
+; THUMB:         .thumb_func
+; THUMB:         .type   __llvm_jump_instr_table_0_1,%function
+; THUMB: __llvm_jump_instr_table_0_1:
+; THUMB:         b     indirect_fun(PLT)
diff --git a/test/CodeGen/ARM/ldstrex-m.ll b/test/CodeGen/ARM/ldstrex-m.ll
new file mode 100644
index 0000000..b50699f
--- /dev/null
+++ b/test/CodeGen/ARM/ldstrex-m.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 | FileCheck %s
+
+; CHECK-LABEL: f0:
+; CHECK-NOT: ldrexd
+define i64 @f0(i64* %p) nounwind readonly {
+entry:
+  %0 = load atomic i64* %p seq_cst, align 8
+  ret i64 %0
+}
+
+; CHECK-LABEL: f1:
+; CHECK-NOT: strexd
+define void @f1(i64* %p) nounwind readonly {
+entry:
+  store atomic i64 0, i64* %p seq_cst, align 8
+  ret void
+}
+
+; CHECK-LABEL: f2:
+; CHECK-NOT: ldrexd
+; CHECK-NOT: strexd
+define i64 @f2(i64* %p) nounwind readonly {
+entry:
+  %0 = atomicrmw add i64* %p, i64 1 seq_cst
+  ret i64 %0
+}
+
+; CHECK-LABEL: f3:
+; CHECK: ldr
+define i32 @f3(i32* %p) nounwind readonly {
+entry:
+  %0 = load atomic i32* %p seq_cst, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: f4:
+; CHECK: ldrb
+define i8 @f4(i8* %p) nounwind readonly {
+entry:
+  %0 = load atomic i8* %p seq_cst, align 4
+  ret i8 %0
+}
+
+; CHECK-LABEL: f5:
+; CHECK: str
+define void @f5(i32* %p) nounwind readonly {
+entry:
+  store atomic i32 0, i32* %p seq_cst, align 4
+  ret void
+}
+
+; CHECK-LABEL: f6:
+; CHECK: ldrex
+; CHECK: strex
+define i32 @f6(i32* %p) nounwind readonly {
+entry:
+  %0 = atomicrmw add i32* %p, i32 1 seq_cst
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 1dafa00..3ad60d4 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=greedy < %s | FileCheck %s
+; RUN: llc -regalloc=greedy -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
 
 ; LSR shouldn't introduce more induction variables than needed, increasing
 ; register pressure and therefore spilling. There is more room for improvement
diff --git a/test/CodeGen/ARM/metadata-default.ll b/test/CodeGen/ARM/metadata-default.ll
new file mode 100644
index 0000000..f6a3fe2
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-default.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-enums.ll b/test/CodeGen/ARM/metadata-short-enums.ll
new file mode 100644
index 0000000..bccd332
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-enums.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 1}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 1   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-wchar.ll b/test/CodeGen/ARM/metadata-short-wchar.ll
new file mode 100644
index 0000000..6de9bf1
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-wchar.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 2}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 2   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
index 26adf0c..bb2d42c 100644
--- a/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched %s -o - 2>&1 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -arm-atomic-cfg-tidy=0 %s -o - 2>&1 | FileCheck %s
 ;
 ; Loop counter copies should be eliminated.
 ; There is also a MUL here, but we don't care where it is scheduled.
diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll
index 2795b8c..60c2171 100644
--- a/test/CodeGen/ARM/none-macho.ll
+++ b/test/CodeGen/ARM/none-macho.ll
@@ -48,8 +48,8 @@ define i32 @test_frame_ptr() {
 ; CHECK-LABEL: test_frame_ptr:
   call void @test_trap()
 
-  ; Frame pointer is r7 as for Darwin
-; CHECK: mov r7, sp
+  ; Frame pointer is r11.
+; CHECK: mov r11, sp
   ret i32 42
 }
 
@@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) {
   ; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
   ; consecutively on the stack for the frame record to be valid, which means we
   ; need the 2 register-save areas employed by iOS.
-; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
-; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
+; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; ...
-; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
-; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
+; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/null-streamer.ll b/test/CodeGen/ARM/null-streamer.ll
new file mode 100644
index 0000000..350c45e
--- /dev/null
+++ b/test/CodeGen/ARM/null-streamer.ll
@@ -0,0 +1,7 @@
+; Test the null streamer with a terget streamer.
+; RUN: llc -O0 -filetype=null -mtriple=arm-linux < %s
+
+define i32 @main()  {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index b245674..feed5ad 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -regalloc=basic | FileCheck %s
 ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
 
 %struct.int16x8_t = type { <8 x i16> }
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index b924663..4fa97ea 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; PR4789
 
 %bar = type { float, float, float }
diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll
index 465ee12..0fd55ec 100644
--- a/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 ; Check a spill right after a function call with large struct byval is correctly
 ; generated.
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
index 8da875f..01df3b4 100644
--- a/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -1,5 +1,5 @@
 ; Tests for the two-address instruction pass.
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -arm-atomic-cfg-tidy=0 %s -o - | FileCheck %s
 
 define void @PR13378() nounwind {
 ; This was orriginally a crasher trying to schedule the instructions.
diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll
index f18b498..d901a74 100644
--- a/test/CodeGen/ARM/va_arg.ll
+++ b/test/CodeGen/ARM/va_arg.ll
@@ -24,13 +24,13 @@ entry:
 ; CHECK-NOT:	bfc
 ; CHECK: bx	lr
 
-define double @test2(i32 %a, i32 %b, ...) nounwind optsize {
+define double @test2(i32 %a, i32* %b, ...) nounwind optsize {
 entry:
   %ap = alloca i8*, align 4                       ; <i8**> [#uses=3]
   %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=2]
   call void @llvm.va_start(i8* %ap1)
   %0 = va_arg i8** %ap, i32                       ; <i32> [#uses=0]
-  store i32 %0, i32* undef
+  store i32 %0, i32* %b
   %1 = va_arg i8** %ap, double                    ; <double> [#uses=1]
   call void @llvm.va_end(i8* %ap1)
   ret double %1
diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll
index d0a9ac6..f2e5eb9 100644
--- a/test/CodeGen/ARM/vldm-sched-a9.ll
+++ b/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
+; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -arm-atomic-cfg-tidy=0 -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64"
 
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 1efbc73..316cfab 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -17,7 +17,7 @@ target triple = "thumbv7-apple-ios"
 ; - Register liveness is verified.
 ; - The execution domain switch to vorr works across basic blocks.
 
-define void @Mm() nounwind {
+define void @Mm(i32 %in, float* %addr) nounwind {
 entry:
   br label %for.body4
 
@@ -27,10 +27,10 @@ for.body4:
 for.body.i:
   %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
   %add.i = fadd float %tmp3.i, 1.000000e+10
-  %exitcond.i = icmp eq i32 undef, 41
+  %exitcond.i = icmp eq i32 %in, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
 
 rInnerproduct.exit:
-  store float %add.i, float* undef, align 4
+  store float %add.i, float* %addr, align 4
   br label %for.body4
 }
diff --git a/test/CodeGen/CPP/atomic.ll b/test/CodeGen/CPP/atomic.ll
new file mode 100644
index 0000000..e79c45d
--- /dev/null
+++ b/test/CodeGen/CPP/atomic.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=cpp -o - %s | FileCheck %s
+
+define void @test_atomicrmw(i32* %addr, i32 %inc) {
+  %inst0 = atomicrmw xchg i32* %addr, i32 %inc seq_cst
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Xchg, {{.*}}, SequentiallyConsistent, CrossThread
+  ; CHECK: [[INST]]->setName("inst0");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst1 = atomicrmw add i32* %addr, i32 %inc seq_cst
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Add, {{.*}}, SequentiallyConsistent, CrossThread
+  ; CHECK: [[INST]]->setName("inst1");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst2 = atomicrmw volatile sub i32* %addr, i32 %inc singlethread monotonic
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Sub, {{.*}}, Monotonic, SingleThread
+  ; CHECK: [[INST]]->setName("inst2");
+  ; CHECK: [[INST]]->setVolatile(true);
+
+  %inst3 = atomicrmw and i32* %addr, i32 %inc acq_rel
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::And, {{.*}}, AcquireRelease, CrossThread
+  ; CHECK: [[INST]]->setName("inst3");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst4 = atomicrmw nand i32* %addr, i32 %inc release
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Nand, {{.*}}, Release, CrossThread
+  ; CHECK: [[INST]]->setName("inst4");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst5 = atomicrmw volatile or i32* %addr, i32 %inc singlethread seq_cst
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Or, {{.*}}, SequentiallyConsistent, SingleThread
+  ; CHECK: [[INST]]->setName("inst5");
+  ; CHECK: [[INST]]->setVolatile(true);
+
+  %inst6 = atomicrmw xor i32* %addr, i32 %inc release
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Xor, {{.*}}, Release, CrossThread
+  ; CHECK: [[INST]]->setName("inst6");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst7 = atomicrmw volatile max i32* %addr, i32 %inc singlethread monotonic
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Max, {{.*}}, Monotonic, SingleThread
+  ; CHECK: [[INST]]->setName("inst7");
+  ; CHECK: [[INST]]->setVolatile(true);
+
+  %inst8 = atomicrmw min i32* %addr, i32 %inc acquire
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::Min, {{.*}}, Acquire, CrossThread
+  ; CHECK: [[INST]]->setName("inst8");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+  %inst9 = atomicrmw volatile umax i32* %addr, i32 %inc monotonic
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::UMax, {{.*}}, Monotonic, CrossThread
+  ; CHECK: [[INST]]->setName("inst9");
+  ; CHECK: [[INST]]->setVolatile(true);
+
+  %inst10 = atomicrmw umin i32* %addr, i32 %inc singlethread release
+  ; CHECK: AtomicRMWInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicRMWInst(AtomicRMWInst::UMin, {{.*}}, Release, SingleThread
+  ; CHECK: [[INST]]->setName("inst10");
+  ; CHECK: [[INST]]->setVolatile(false);
+
+
+  ret void
+}
+
+define void @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+  %inst0 = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  ; CHECK: AtomicCmpXchgInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicCmpXchgInst({{.*}}, SequentiallyConsistent, Monotonic, CrossThread
+  ; CHECK: [[INST]]->setName("inst0");
+  ; CHECK: [[INST]]->setVolatile(false);
+  ; CHECK: [[INST]]->setWeak(false);
+
+  %inst1 = cmpxchg volatile i32* %addr, i32 %desired, i32 %new singlethread acq_rel acquire
+  ; CHECK: AtomicCmpXchgInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicCmpXchgInst({{.*}}, AcquireRelease, Acquire, SingleThread
+  ; CHECK: [[INST]]->setName("inst1");
+  ; CHECK: [[INST]]->setVolatile(true);
+  ; CHECK: [[INST]]->setWeak(false);
+
+  %inst2 = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  ; CHECK: AtomicCmpXchgInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicCmpXchgInst({{.*}}, SequentiallyConsistent, Monotonic, CrossThread
+  ; CHECK: [[INST]]->setName("inst2");
+  ; CHECK: [[INST]]->setVolatile(false);
+  ; CHECK: [[INST]]->setWeak(true);
+
+  %inst3 = cmpxchg weak volatile i32* %addr, i32 %desired, i32 %new singlethread acq_rel acquire
+  ; CHECK: AtomicCmpXchgInst* [[INST:[a-zA-Z0-9_]+]] = new AtomicCmpXchgInst({{.*}}, AcquireRelease, Acquire, SingleThread
+  ; CHECK: [[INST]]->setName("inst3");
+  ; CHECK: [[INST]]->setVolatile(true);
+  ; CHECK: [[INST]]->setWeak(true);
+
+  ret void
+}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
index 4063dd1..3ff5c6b 100644
--- a/test/CodeGen/CPP/lit.local.cfg
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'CppBackend' in targets:
+if not 'CppBackend' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 802ee2c..0e98280 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -print-machineinstrs=expand-isel-pseudos -o /dev/null 2>&1 | FileCheck %s
 
+; ARM & AArch64 run an extra SimplifyCFG which disrupts this test.
+; XFAIL: arm,aarch64
+
 ; Make sure we have the correct weight attached to each successor.
 define i32 @test2(i32 %x) nounwind uwtable readnone ssp {
 ; CHECK: Machine code for function test2:
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index 77636eb..c4841b7 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -192,4 +192,3 @@ define <1 x i32> @checkScalariseVSELECT(<1 x i32> %a, <1 x i32> %b) {
         %s = select <1 x i1> %cond, <1 x i32> %a, <1 x i32> %b
         ret <1 x i32> %s
 }
-
diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll
index 557e097..5e0e350 100644
--- a/test/CodeGen/Generic/stop-after.ll
+++ b/test/CodeGen/Generic/stop-after.ll
@@ -5,6 +5,6 @@
 ; STOP: Loop Strength Reduction
 ; STOP-NEXT: Machine Function Analysis
 
-; START: -machine-branch-prob -gc-lowering
+; START: -machine-branch-prob -jump-instr-tables -gc-lowering
 ; START: FunctionPass Manager
 ; START-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
index e96bab8..ba72ff6 100644
--- a/test/CodeGen/Hexagon/lit.local.cfg
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Hexagon' in targets:
+if not 'Hexagon' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index a18fe6f..b1cf1fb 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'MSP430' in targets:
+if not 'MSP430' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index e274bc0..3c1bb39 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
 %struct.DWstruct = type { i32, i32 }
diff --git a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
index f8390d9..6a210a0 100644
--- a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
+++ b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=mips -mattr=-fp64 < %s | FileCheck -check-prefix=CHECK-FP32 %s
-; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
 
 ; This test case is a simplified version of an llvm-stress generated test with
 ; seed=3718491962.
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
new file mode 100644
index 0000000..f113a0e
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -0,0 +1,83 @@
+; ModuleID = 'loadstore2.c'
+target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@c2 = common global i8 0, align 1
+@c1 = common global i8 0, align 1
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@s2 = common global i16 0, align 2
+@s1 = common global i16 0, align 2
+@i2 = common global i32 0, align 4
+@i1 = common global i32 0, align 4
+@f2 = common global float 0.000000e+00, align 4
+@f1 = common global float 0.000000e+00, align 4
+@d2 = common global double 0.000000e+00, align 8
+@d1 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @cfoo() #0 {
+entry:
+  %0 = load i8* @c2, align 1
+  store i8 %0, i8* @c1, align 1
+; CHECK-LABEL:	cfoo:
+; CHECK:	lbu	$[[REGc:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sb	$[[REGc]], 0(${{[0-9]+}})
+
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sfoo() #0 {
+entry:
+  %0 = load i16* @s2, align 2
+  store i16 %0, i16* @s1, align 2
+; CHECK-LABEL:	sfoo:
+; CHECK:	lhu	$[[REGs:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sh	$[[REGs]], 0(${{[0-9]+}})
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ifoo() #0 {
+entry:
+  %0 = load i32* @i2, align 4
+  store i32 %0, i32* @i1, align 4
+; CHECK-LABEL:	ifoo:
+; CHECK:	lw	$[[REGi:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	sw	$[[REGi]], 0(${{[0-9]+}})
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ffoo() #0 {
+entry:
+  %0 = load float* @f2, align 4
+  store float %0, float* @f1, align 4
+; CHECK-LABEL:	ffoo:
+; CHECK:	lwc1	$f[[REGf:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:	swc1	$f[[REGf]], 0(${{[0-9]+}})
+
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dfoo() #0 {
+entry:
+  %0 = load double* @d2, align 8
+  store double %0, double* @d1, align 8
+; CHECK-LABEL:        dfoo:
+; CHECK:        ldc1    $f[[REGd:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:        sdc1    $f[[REGd]], 0(${{[0-9]+}})
+; CHECK:        .end    dfoo
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
new file mode 100644
index 0000000..6759c01
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@f = common global float 0.000000e+00, align 4
+@de = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @f1() #0 {
+entry:
+  store float 0x3FFA76C8C0000000, float* @f, align 4
+  ret void
+; CHECK:  .ent  f1
+; CHECK:  lui  $[[REG1:[0-9]+]], 16339
+; CHECK:  ori  $[[REG2:[0-9]+]], $[[REG1]], 46662
+; CHECK:  mtc1  $[[REG2]], $f[[REG3:[0-9]+]]
+; CHECK:  lw  $[[REG4:[0-9]+]], %got(f)(${{[0-9]+}})
+; CHECK:  swc1  $f[[REG3]], 0($[[REG4]])
+; CHECK:   .end  f1
+
+}
+
+; Function Attrs: nounwind
+define void @d1() #0 {
+entry:
+  store double 1.234567e+00, double* @de, align 8
+; CHECK:  .ent  d1
+; CHECK:  lui  $[[REG1a:[0-9]+]], 16371
+; CHECK:  ori  $[[REG2a:[0-9]+]], $[[REG1a]], 49353
+; CHECK:  lui  $[[REG1b:[0-9]+]], 21403
+; CHECK:  ori  $[[REG2b:[0-9]+]], $[[REG1b]], 34951
+; CHECK:  mtc1  $[[REG2b]], $f[[REG3:[0-9]+]]
+; CHECK:  mthc1  $[[REG2a]], $f[[REG3]]
+; CHECK:  sdc1  $f[[REG3]], 0(${{[0-9]+}})
+; CHECK:  .end  d1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/abiflags-xx.ll b/test/CodeGen/Mips/abiflags-xx.ll
new file mode 100644
index 0000000..b8aa071
--- /dev/null
+++ b/test/CodeGen/Mips/abiflags-xx.ll
@@ -0,0 +1,6 @@
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fpxx %s -o - | FileCheck %s
+; XFAIL: *
+
+; CHECK: .nan    legacy
+; CHECK: .module fp=xx
+
diff --git a/test/CodeGen/Mips/abiflags32.ll b/test/CodeGen/Mips/abiflags32.ll
new file mode 100644
index 0000000..093964f
--- /dev/null
+++ b/test/CodeGen/Mips/abiflags32.ll
@@ -0,0 +1,12 @@
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fp64 %s -o - | FileCheck  -check-prefix=CHECK-64 %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -mattr=-n64,n32 %s -o - | FileCheck  -check-prefix=CHECK-64n %s
+
+; CHECK: .nan    legacy
+; CHECK: .module fp=32
+
+; CHECK-64: .nan    legacy
+; CHECK-64: .module fp=64
+
+; CHECK-64n: .nan    legacy
+; CHECK-64n: .module fp=64
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 8ec5d93..4b5d097 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -1,9 +1,25 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=32-GPR
+; RUN: llc -march=mips64 -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=FCC
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=64-GPR
 
 define double @foo(double %a, double %b) nounwind readnone {
 entry:
-; CHECK: bc1f $BB
-; CHECK: nop
+; ALL-LABEL: foo:
+
+; FCC:           bc1f $BB
+; FCC:           nop
+
+; 32-GPR:        mtc1      $zero, $[[Z:f[0-9]]]
+; 32-GPR:        mthc1     $zero, $[[Z:f[0-9]]]
+; 64-GPR:        dmtc1     $zero, $[[Z:f[0-9]]]
+; GPR:           cmp.lt.d  $[[FGRCC:f[0-9]+]], $[[Z]], $f12
+; GPR:           mfc1      $[[GPRCC:[0-9]+]], $[[FGRCC]]
+; GPR-NOT:       not       $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez      $[[GPRCC]], $BB
 
   %cmp = fcmp ogt double %a, 0.000000e+00
   br i1 %cmp, label %if.end6, label %if.else
@@ -25,8 +41,17 @@ return:                                           ; preds = %if.else, %if.end6
 
 define void @f1(float %f) nounwind {
 entry:
-; CHECK: bc1f $BB
-; CHECK: nop
+; ALL-LABEL: f1:
+
+; FCC:           bc1f $BB
+; FCC:           nop
+
+; GPR:           mtc1     $zero, $[[Z:f[0-9]]]
+; GPR:           cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $[[Z]]
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB
+
   %cmp = fcmp une float %f, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 77d7bf3..066d42c 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -1,5 +1,14 @@
-; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc -march=mips   --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+
+; Keep one big-endian check so that we don't reduce testing, but don't add more
+; since endianness doesn't affect the body of the atomic operations.
+; RUN: llc -march=mips   --disable-machine-licm -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=CHECK-EB
 
 @x = common global i32 0, align 4
 
@@ -8,21 +17,16 @@ entry:
   %0 = atomicrmw add i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicLoadAdd32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicLoadAdd32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicLoadAdd32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[R0]])
+; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
+; ALL:           sc      $[[R2]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicLoadNand32(i32 %incr) nounwind {
@@ -30,23 +34,17 @@ entry:
   %0 = atomicrmw nand i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicLoadNand32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   and     $[[R3:[0-9]+]], $[[R1]], $4
-; CHECK-EL:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
-; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicLoadNand32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   and     $[[R3:[0-9]+]], $[[R1]], $4
-; CHECK-EB:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
-; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicLoadNand32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[R0]])
+; ALL:           and     $[[R3:[0-9]+]], $[[R1]], $4
+; ALL:           nor     $[[R2:[0-9]+]], $zero, $[[R3]]
+; ALL:           sc      $[[R2]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicSwap32(i32 %newval) nounwind {
@@ -57,19 +55,15 @@ entry:
   %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
   ret i32 %0
 
-; CHECK-EL-LABEL:   AtomicSwap32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      ${{[0-9]+}}, 0($[[R0]])
-; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-
-; CHECK-EB-LABEL:   AtomicSwap32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      ${{[0-9]+}}, 0($[[R0]])
-; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; ALL-LABEL: AtomicSwap32:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      ${{[0-9]+}}, 0($[[R0]])
+; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
@@ -78,25 +72,20 @@ entry:
   store i32 %newval, i32* %newval.addr, align 4
   %tmp = load i32* %newval.addr, align 4
   %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
-  ret i32 %0
+  %1 = extractvalue { i32, i1 } %0, 0
+  ret i32 %1
+
+; ALL-LABEL: AtomicCmpSwap32:
 
-; CHECK-EL-LABEL:   AtomicCmpSwap32:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $2, 0($[[R0]])
-; CHECK-EL:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
-; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
-; CHECK-EL:   $[[BB1]]:
-
-; CHECK-EB-LABEL:   AtomicCmpSwap32:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $2, 0($[[R0]])
-; CHECK-EB:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
-; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
-; CHECK-EB:   $[[BB1]]:
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $2, 0($[[R0]])
+; ALL:           bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
+; ALL:       $[[BB1]]:
 }
 
 
@@ -108,56 +97,38 @@ entry:
   %0 = atomicrmw add i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadAdd8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadAdd8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicLoadAdd8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
@@ -165,56 +136,38 @@ entry:
   %0 = atomicrmw sub i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadSub8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv     $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadSub8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; ALL-LABEL: AtomicLoadSub8:
+
+; MIPS32-ANY: lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY: ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:        addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:        and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:        andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R5:[0-9]+]], $[[R3]], 3
 ; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
 ; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL:        ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:        sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:        nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:        sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:    $[[BB0:[A-Z_0-9]+]]:
+; ALL:        ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:        subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:        and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:        and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:        or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:        sc      $[[R14]], 0($[[R2]])
+; ALL:        beqz    $[[R14]], $[[BB0]]
+
+; ALL:        and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:        srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH: sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH: sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
@@ -222,58 +175,39 @@ entry:
   %0 = atomicrmw nand i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicLoadNand8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EL:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
-; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicLoadNand8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK-EB:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
-; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicLoadNand8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
@@ -281,121 +215,126 @@ entry:
   %0 = atomicrmw xchg i8* @y, i8 %newval monotonic
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicSwap8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R9]], $[[R6]]
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
-; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicSwap8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R9]], $[[R7]]
-; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
-; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
-; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
-; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EB:   sra     $2, $[[R17]], 24
+; ALL-LABEL: AtomicSwap8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R18:[0-9]+]], $[[R9]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 24
+; NO-SEB-SEH:    sra     $2, $[[R17]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R16]]
 }
 
 define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
 entry:
-  %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
+  %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
+  %0 = extractvalue { i8, i1 } %pair0, 0
   ret i8 %0
 
-; CHECK-EL-LABEL:   AtomicCmpSwap8:
-; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK-EL:   andi    $[[R8:[0-9]+]], $4, 255
-; CHECK-EL:   sllv    $[[R9:[0-9]+]], $[[R8]], $[[R4]]
-; CHECK-EL:   andi    $[[R10:[0-9]+]], $5, 255
-; CHECK-EL:   sllv    $[[R11:[0-9]+]], $[[R10]], $[[R4]]
-
-; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EL:   ll      $[[R12:[0-9]+]], 0($[[R2]])
-; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
-; CHECK-EL:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
-
-; CHECK-EL:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
-; CHECK-EL:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
-; CHECK-EL:   sc      $[[R15]], 0($[[R2]])
-; CHECK-EL:   beqz    $[[R15]], $[[BB0]]
-
-; CHECK-EL:   $[[BB1]]:
-; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R13]], $[[R4]]
-; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK-EL:   sra     $2, $[[R17]], 24
-
-; CHECK-EB-LABEL:   AtomicCmpSwap8:
-; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
-; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
-; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
-; CHECK-EB:   andi    $[[R9:[0-9]+]], $4, 255
-; CHECK-EB:   sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
-; CHECK-EB:   andi    $[[R11:[0-9]+]], $5, 255
-; CHECK-EB:   sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
-
-; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK-EB:   ll      $[[R13:[0-9]+]], 0($[[R2]])
-; CHECK-EB:   and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
-; CHECK-EB:   bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
-
-; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
-; CHECK-EB:   or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
-; CHECK-EB:   sc      $[[R16]], 0($[[R2]])
-; CHECK-EB:   beqz    $[[R16]], $[[BB0]]
-
-; CHECK-EB:   $[[BB1]]:
-; CHECK-EB:   srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
-; CHECK-EB:   sll     $[[R18:[0-9]+]], $[[R17]], 24
-; CHECK-EB:   sra     $2, $[[R18]], 24
+; ALL-LABEL: AtomicCmpSwap8:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(y)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(y)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 255
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           andi    $[[R9:[0-9]+]], $4, 255
+; ALL:           sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
+; ALL:           andi    $[[R11:[0-9]+]], $5, 255
+; ALL:           sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R13:[0-9]+]], 0($[[R2]])
+; ALL:           and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
+; ALL:           bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
+; ALL:           or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
+; ALL:           sc      $[[R16]], 0($[[R2]])
+; ALL:           beqz    $[[R16]], $[[BB0]]
+
+; ALL:       $[[BB1]]:
+; ALL:           srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R18:[0-9]+]], $[[R17]], 24
+; NO-SEB-SEH:    sra     $2, $[[R18]], 24
+
+; HAS-SEB-SEH:   seb     $2, $[[R17]]
+}
+
+; Check one i16 so that we cover the seh sign extend
+@z = common global i16 0, align 1
+
+define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw add i16* @z, i16 %incr monotonic
+  ret i16 %0
+
+; ALL-LABEL: AtomicLoadAdd16:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(z)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(z)(
+
+; ALL:           addiu   $[[R1:[0-9]+]], $zero, -4
+; ALL:           and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; ALL:           andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:      xori    $[[R4:[0-9]+]], $[[R3]], 2
+; CHECK-EB:      sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EL:      sll     $[[R5:[0-9]+]], $[[R3]], 3
+; ALL:           ori     $[[R6:[0-9]+]], $zero, 65535
+; ALL:           sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; ALL:           nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; ALL:           sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R10:[0-9]+]], 0($[[R2]])
+; ALL:           addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; ALL:           and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; ALL:           sc      $[[R14]], 0($[[R2]])
+; ALL:           beqz    $[[R14]], $[[BB0]]
+
+; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+
+; NO-SEB-SEH:    sll     $[[R17:[0-9]+]], $[[R16]], 16
+; NO-SEB-SEH:    sra     $2, $[[R17]], 16
+
+; MIPS32R2:      seh     $2, $[[R16]]
 }
 
+
 @countsint = common global i32 0, align 4
 
 define i32 @CheckSync(i32 %v) nounwind noinline {
@@ -403,19 +342,13 @@ entry:
   %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
   ret i32 %0 
 
-; CHECK-EL-LABEL:   CheckSync:
-; CHECK-EL:   sync 0
-; CHECK-EL:   ll
-; CHECK-EL:   sc
-; CHECK-EL:   beq
-; CHECK-EL:   sync 0
-
-; CHECK-EB-LABEL:   CheckSync:
-; CHECK-EB:   sync 0
-; CHECK-EB:   ll
-; CHECK-EB:   sc
-; CHECK-EB:   beq
-; CHECK-EB:   sync 0
+; ALL-LABEL: CheckSync:
+
+; ALL:           sync
+; ALL:           ll
+; ALL:           sc
+; ALL:           beq
+; ALL:           sync
 }
 
 ; make sure that this assertion in
@@ -429,8 +362,29 @@ entry:
 
 define i32 @zeroreg() nounwind {
 entry:
-  %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst
+  %pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst
+  %0 = extractvalue { i32, i1 } %pair0, 0
   %1 = icmp eq i32 %0, 1
   %conv = zext i1 %1 to i32
   ret i32 %conv
 }
+
+; Check that MIPS32R6 has the correct offset range.
+; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+entry:
+  %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
+  ret i32 %0
+
+; ALL-LABEL: AtomicLoadAdd32_OffGt9Bit:
+
+; MIPS32-ANY:    lw      $[[R0:[0-9]+]], %got(x)
+; MIPS64-ANY:    ld      $[[R0:[0-9]+]], %got_disp(x)(
+
+; ALL:           addiu   $[[PTR:[0-9]+]], $[[R0]], 1024
+; ALL:       $[[BB0:[A-Z_0-9]+]]:
+; ALL:           ll      $[[R1:[0-9]+]], 0($[[PTR]])
+; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
+; ALL:           sc      $[[R2]], 0($[[PTR]])
+; ALL:           beqz    $[[R2]], $[[BB0]]
+}
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index dc07c63..c264152 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -20,7 +20,8 @@ entry:
   %add.i = add nsw i32 %0, 2
   %1 = load volatile i32* %x, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
-  %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
+  %pair = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
+  %2 = extractvalue { i32, i1 } %pair, 0
   %3 = load volatile i32* %x, align 4
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll
index b9bf2b6..88d1d07 100644
--- a/test/CodeGen/Mips/buildpairextractelementf64.ll
+++ b/test/CodeGen/Mips/buildpairextractelementf64.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=FP32 -check-prefix=CHECK
 ; RUN: llc -march=mips  < %s | FileCheck %s -check-prefix=FP32 -check-prefix=CHECK
-; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
-; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
 
 @a = external global i32
 
diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll
new file mode 100644
index 0000000..4b28b99
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s
+
+define void @fpu_clobber() nounwind {
+entry:
+    call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+    ret void
+}
+
+; O32-FPXX-LABEL: fpu_clobber:
+; O32-FPXX-INV-NOT:   sdc1 $f0,
+; O32-FPXX-INV-NOT:   sdc1 $f1,
+; O32-FPXX-INV-NOT:   sdc1 $f2,
+; O32-FPXX-INV-NOT:   sdc1 $f3,
+; O32-FPXX-INV-NOT:   sdc1 $f4,
+; O32-FPXX-INV-NOT:   sdc1 $f5,
+; O32-FPXX-INV-NOT:   sdc1 $f6,
+; O32-FPXX-INV-NOT:   sdc1 $f7,
+; O32-FPXX-INV-NOT:   sdc1 $f8,
+; O32-FPXX-INV-NOT:   sdc1 $f9,
+; O32-FPXX-INV-NOT:   sdc1 $f10,
+; O32-FPXX-INV-NOT:   sdc1 $f11,
+; O32-FPXX-INV-NOT:   sdc1 $f12,
+; O32-FPXX-INV-NOT:   sdc1 $f13,
+; O32-FPXX-INV-NOT:   sdc1 $f14,
+; O32-FPXX-INV-NOT:   sdc1 $f15,
+; O32-FPXX-INV-NOT:   sdc1 $f16,
+; O32-FPXX-INV-NOT:   sdc1 $f17,
+; O32-FPXX-INV-NOT:   sdc1 $f18,
+; O32-FPXX-INV-NOT:   sdc1 $f19,
+; O32-FPXX-INV-NOT:   sdc1 $f21,
+; O32-FPXX-INV-NOT:   sdc1 $f23,
+; O32-FPXX-INV-NOT:   sdc1 $f25,
+; O32-FPXX-INV-NOT:   sdc1 $f27,
+; O32-FPXX-INV-NOT:   sdc1 $f29,
+; O32-FPXX-INV-NOT:   sdc1 $f31,
+
+; O32-FPXX:           addiu $sp, $sp, -48
+; O32-FPXX-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; O32-FPXX-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; O32-FPXX:           addiu $sp, $sp, 48
diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll
new file mode 100644
index 0000000..489879e
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mips -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s
+; RUN: llc -march=mipsel -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s
+
+; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+
+; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s
+
+define void @fpu_clobber() nounwind {
+entry:
+    call void asm "# Clobber", "~{$f21}"()
+    ret void
+}
+
+; O32-FPXX-LABEL: fpu_clobber:
+
+; O32-FPXX:           addiu $sp, $sp, -8
+
+; O32-FP64-INV-NOT:   sdc1 $f20,
+; O32-FPXX-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-FPXX-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+
+; O32-FPXX:           addiu $sp, $sp, 8
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index b9732eb..999bdb4 100644
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,17 +1,43 @@
-; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mips     -mcpu=mips32                 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32 -regalloc=basic < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32r2               < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc -march=mips     -mcpu=mips32r6               < %s | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
+; RUN: llc -march=mips64el -mcpu=mips4                  < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc -march=mips64el -mcpu=mips64                 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc -march=mips64el -mcpu=mips64r6               < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; O32-DAG:  lw $[[R0:[0-9]+]], %got(i3)
-; O32-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
-; O32:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
-; N64-DAG:  ldr $[[R0:[0-9]+]]
-; N64-DAG:  ld $[[R1:[0-9]+]], %got_disp(i1)
-; N64:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
+; ALL-LABEL: cmov1:
+
+; 32-CMOV-DAG:  lw $[[R0:[0-9]+]], %got(i3)
+; 32-CMOV-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; 32-CMOV-DAG:  movn $[[R0]], $[[R1]], $4
+; 32-CMOV-DAG:  lw $2, 0($[[R0]])
+
+; 32-CMP-DAG:   lw $[[R0:[0-9]+]], %got(i3)
+; 32-CMP-DAG:   addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; 32-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R1]], $4
+; 32-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R0]], $4
+; 32-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG:   lw $2, 0($[[T2]])
+
+; 64-CMOV-DAG:  ldr $[[R0:[0-9]+]]
+; 64-CMOV-DAG:  ld $[[R1:[0-9]+]], %got_disp(i1)
+; 64-CMOV-DAG:  movn $[[R0]], $[[R1]], $4
+
+; 64-CMP-DAG:   ld $[[R0:[0-9]+]], %got_disp(i3)(
+; 64-CMP-DAG:   daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(i1)
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64-CMP-DAG:   sll $[[CC:[0-9]+]], $4, 0
+; 64-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R1]], $[[CC]]
+; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R0]], $[[CC]]
+; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG:   ld $2, 0($[[T2]])
+
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -23,14 +49,35 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; O32-LABEL: cmov2:
-; O32: addiu $[[R1:[0-9]+]], ${{[a-z0-9]+}}, %got(d)
-; O32: addiu $[[R0:[0-9]+]], ${{[a-z0-9]+}}, %got(c)
-; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
-; N64-LABEL: cmov2:
-; N64: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
-; N64: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
-; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; ALL-LABEL: cmov2:
+
+; 32-CMOV-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(d)
+; 32-CMOV-DAG:  addiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got(c)
+; 32-CMOV-DAG:  movn  $[[R1]], $[[R0]], $4
+; 32-CMOV-DAG:  lw $2, 0($[[R0]])
+
+; 32-CMP-DAG:   addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(d)
+; 32-CMP-DAG:   addiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got(c)
+; 32-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R0]], $4
+; 32-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R1]], $4
+; 32-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG:   lw $2, 0($[[T2]])
+
+; 64-CMOV:      daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; 64-CMOV:      daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
+; 64-CMOV:      movn  $[[R1]], $[[R0]], $4
+
+; 64-CMP-DAG:   daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; 64-CMP-DAG:   daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64-CMP-DAG:   sll $[[CC:[0-9]+]], $4, 0
+; 64-CMP-DAG:   selnez $[[T0:[0-9]+]], $[[R0]], $[[CC]]
+; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $[[R1]], $[[CC]]
+; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG:   lw $2, 0($[[T2]])
+
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -40,9 +87,28 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: cmov3:
-; O32: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: cmov3:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be either one of two registers so we can at least
+; check that.
+
+; 32-CMOV:      xori $[[R0:[0-9]+]], $4, 234
+; 32-CMOV:      movz ${{[26]}}, $5, $[[R0]]
+
+; 32-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 32-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV:      xori $[[R0:[0-9]+]], $4, 234
+; 64-CMOV:      movz ${{[26]}}, $5, $[[R0]]
+
+; 64-CMP-DAG:   xori $[[CC:[0-9]+]], $4, 234
+; 64-CMP-DAG:   seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 64-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]
+
 define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
@@ -50,9 +116,36 @@ entry:
   ret i32 %cond
 }
 
-; N64-LABEL: cmov4:
-; N64: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: cmov4:
+
+; We won't check the result register since we can't know if the move is first
+; or last. We do know it will be one of two registers so we can at least check
+; that.
+
+; 32-CMOV-DAG: xori $[[R0:[0-9]+]], $4, 234
+; 32-CMOV-DAG: lw $[[R1:2]], 16($sp)
+; 32-CMOV-DAG: lw $[[R2:3]], 20($sp)
+; 32-CMOV-DAG: movz $[[R1]], $6, $[[R0]]
+; 32-CMOV-DAG: movz $[[R2]], $7, $[[R0]]
+
+; 32-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 32-CMP-DAG:  lw $[[R1:[0-9]+]], 16($sp)
+; 32-CMP-DAG:  lw $[[R2:[0-9]+]], 20($sp)
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $6, $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $7, $[[R0]]
+; 32-CMP-DAG:  selnez $[[T2:[0-9]+]], $[[R1]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T3:[0-9]+]], $[[R2]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T2]]
+; 32-CMP-DAG:  or $3, $[[T1]], $[[T3]]
+
+; 64-CMOV: xori $[[R0:[0-9]+]], $4, 234
+; 64-CMOV: movz ${{[26]}}, $5, $[[R0]]
+
+; 64-CMP-DAG:  xori $[[R0:[0-9]+]], $4, 234
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $5, $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $6, $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
 define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
@@ -68,9 +161,33 @@ entry:
 ;  (movz t, (setlt a, N + 1), f)
 ; if N + 1 fits in 16-bit.
 
-; O32-LABEL: slti0:
-; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti0:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: slti $[[R0:[0-9]+]], $4, 32767
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: slti $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti0(i32 %a) {
 entry:
@@ -79,19 +196,72 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: slti1:
-; O32: slt ${{[0-9]+}}
+; ALL-LABEL: slti1:
+
+; 32-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  addiu $[[I32767:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:  slt $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti1(i32 %a) {
 entry:
   %cmp = icmp sgt i32 %a, 32767
-  %cond = select i1 %cmp, i32 3, i32 5
+  %cond = select i1 %cmp, i32 7, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: slti2:
-; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti2:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: slti $[[R0:[0-9]+]], $4, -32768
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: slti $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti2(i32 %a) {
 entry:
@@ -100,8 +270,41 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: slti3:
-; O32: slt ${{[0-9]+}}
+; ALL-LABEL: slti3:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 32-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 32-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 32-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 32-CMP-DAG:  slt $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 64-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 64-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[IMM]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @slti3(i32 %a) {
 entry:
@@ -112,30 +315,117 @@ entry:
 
 ; 64-bit patterns.
 
-; N64-LABEL: slti64_0:
-; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti64_0:
+
+; 32-CMOV-DAG:  slt $[[CC:[0-9]+]], $zero, $4
+; 32-CMOV-DAG:  addiu $[[I32766:[0-9]+]], $zero, 32766
+; 32-CMOV-DAG:  sltu $[[R1:[0-9]+]], $[[I32766]], $5
+; 32-CMOV-DAG:  movz $[[CC:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMOV-DAG:  addiu $[[I4:3]], $zero, 4
+; 32-CMOV-DAG:  movn $[[I4]], $[[I5]], $[[CC]]
+; 32-CMOV-DAG:  addiu $2, $zero, 0
+
+; 32-CMP-DAG:   slt $[[CC0:[0-9]+]], $zero, $4
+; 32-CMP-DAG:   addiu $[[I32766:[0-9]+]], $zero, 32766
+; 32-CMP-DAG:   sltu $[[CC1:[0-9]+]], $[[I32766]], $5
+; 32-CMP-DAG:   selnez $[[CC2:[0-9]+]], $[[CC0]], $4
+; 32-CMP-DAG:   seleqz $[[CC3:[0-9]+]], $[[CC1]], $4
+; 32-CMP:       or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]]
+; 32-CMP-DAG:   addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:   addiu $[[I4:[0-9]+]], $zero, 4
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $[[I4]], $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $[[I5]], $[[CC]]
+; 32-CMP-DAG:   or $3, $[[T1]], $[[T0]]
+; 32-CMP-DAG:   addiu $2, $zero, 0
+
+; 64-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG:  addiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG:  movz $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  addiu $[[I4:[0-9]+]], $zero, 4
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by adding/subtracting the result of slti
+;        to/from one of the constants.
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_0(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, 32766
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_1:
-; N64: slt ${{[0-9]+}}
+; ALL-LABEL: slti64_1:
+
+; 32-CMOV-DAG:  slt $[[CC:[0-9]+]], $zero, $4
+; 32-CMOV-DAG:  addiu $[[I32766:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG:  sltu $[[R1:[0-9]+]], $[[I32766]], $5
+; 32-CMOV-DAG:  movz $[[CC:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMOV-DAG:  addiu $[[I4:3]], $zero, 4
+; 32-CMOV-DAG:  movn $[[I4]], $[[I5]], $[[CC]]
+; 32-CMOV-DAG:  addiu $2, $zero, 0
+
+; 32-CMP-DAG:   slt $[[CC0:[0-9]+]], $zero, $4
+; 32-CMP-DAG:   addiu $[[I32766:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:   sltu $[[CC1:[0-9]+]], $[[I32766]], $5
+; 32-CMP-DAG:   selnez $[[CC2:[0-9]+]], $[[CC0]], $4
+; 32-CMP-DAG:   seleqz $[[CC3:[0-9]+]], $[[CC1]], $4
+; 32-CMP:       or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]]
+; 32-CMP-DAG:   addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:   addiu $[[I4:[0-9]+]], $zero, 4
+; 32-CMP-DAG:   seleqz $[[T0:[0-9]+]], $[[I4]], $[[CC]]
+; 32-CMP-DAG:   selnez $[[T1:[0-9]+]], $[[I5]], $[[CC]]
+; 32-CMP-DAG:   or $3, $[[T1]], $[[T0]]
+; 32-CMP-DAG:   addiu $2, $zero, 0
+
+; 64-CMOV-DAG: daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG: daddiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG: daddiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  daddiu $[[I4:2]], $zero, 4
+; 64-CMP-DAG:  daddiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_1(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, 32767
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_2:
-; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: slti64_2:
+
+; FIXME: The 32-bit versions of this test are too complicated to reasonably
+;        match at the moment. They do show some missing optimizations though
+;        such as:
+;           (movz $a, $b, (neg $c)) -> (movn $a, $b, $c)
+
+; 64-CMOV-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG:  addiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG:  movz $[[I4]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I4:[0-9]+]], $zero, 4
+; 64-CMP-DAG:  slti $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by adding/subtracting the result of slti
+;        to/from one of the constants.
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_2(i64 %a) {
 entry:
@@ -144,21 +434,64 @@ entry:
   ret i64 %conv
 }
 
-; N64-LABEL: slti64_3:
-; N64: slt ${{[0-9]+}}
+; ALL-LABEL: slti64_3:
+
+; FIXME: The 32-bit versions of this test are too complicated to reasonably
+;        match at the moment. They do show some missing optimizations though
+;        such as:
+;           (movz $a, $b, (neg $c)) -> (movn $a, $b, $c)
+
+; 64-CMOV-DAG: daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMOV-DAG: daddiu $[[I4:2]], $zero, 4
+; 64-CMOV-DAG: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, 32766
+; 64-CMOV-DAG: slt $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I4]], $[[I5]], $[[R0]]
+
+; 64-CMP-DAG:  daddiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  daddiu $[[I4:2]], $zero, 4
+; 64-CMP-DAG:  daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, 32766
+; 64-CMP-DAG:  slt $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i64 @slti64_3(i64 %a) {
 entry:
   %cmp = icmp sgt i64 %a, -32770
-  %conv = select i1 %cmp, i64 3, i64 4
+  %conv = select i1 %cmp, i64 5, i64 4
   ret i64 %conv
 }
 
 ; sltiu instructions.
 
-; O32-LABEL: sltiu0:
-; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: sltiu0:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, 32767
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, 32767
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, 32767
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu0(i32 %a) {
 entry:
@@ -167,19 +500,72 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu1:
-; O32: sltu ${{[0-9]+}}
+; ALL-LABEL: sltiu1:
+
+; 32-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 32-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  addiu $[[I32767:[0-9]+]], $zero, 32767
+; 32-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I7]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I7:[0-9]+]], $zero, 7
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  addiu $[[R1:[0-9]+]], $zero, 32767
+; 64-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[R1]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I7]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu1(i32 %a) {
 entry:
   %cmp = icmp ugt i32 %a, 32767
-  %cond = select i1 %cmp, i32 3, i32 5
+  %cond = select i1 %cmp, i32 7, i32 5
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu2:
-; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
-; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+; ALL-LABEL: sltiu2:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, -32768
+; 32-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 32-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: sltiu $[[R0:[0-9]+]], $4, -32768
+; 64-CMOV-DAG: movz $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 64-CMP-DAG:  sltiu $[[R0:[0-9]+]], $4, -32768
+; FIXME: We can do better than this by using selccz to choose between +0 and +2
+; 64-CMP-DAG:  seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu2(i32 %a) {
 entry:
@@ -188,8 +574,41 @@ entry:
   ret i32 %cond
 }
 
-; O32-LABEL: sltiu3:
-; O32: sltu ${{[0-9]+}}
+; ALL-LABEL: sltiu3:
+
+; 32-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 32-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 32-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 32-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 32-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 32-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 32-CMP-DAG:  addiu $[[I5:[0-9]+]], $zero, 5
+; 32-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 32-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 32-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[I32767]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 32-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 32-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 32-CMP-DAG:  or $2, $[[T0]], $[[T1]]
+
+; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMOV-DAG: addiu $[[I5:2]], $zero, 5
+; 64-CMOV-DAG: lui $[[R1:[0-9]+]], 65535
+; 64-CMOV-DAG: ori $[[R1]], $[[R1]], 32766
+; 64-CMOV-DAG: sltu $[[R0:[0-9]+]], $[[R1]], $4
+; 64-CMOV-DAG: movn $[[I5]], $[[I3]], $[[R0]]
+
+; 64-CMP-DAG:  addiu $[[I3:[0-9]+]], $zero, 3
+; 64-CMP-DAG:  addiu $[[I5:2]], $zero, 5
+; 64-CMP-DAG:  lui $[[IMM:[0-9]+]], 65535
+; 64-CMP-DAG:  ori $[[IMM]], $[[IMM]], 32766
+; 64-CMP-DAG:  sltu $[[R0:[0-9]+]], $[[IMM]], $4
+; FIXME: We can do better than this by using selccz to choose between -0 and -2
+; 64-CMP-DAG:  selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]]
+; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
+; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
 define i32 @sltiu3(i32 %a) {
 entry:
@@ -210,11 +629,25 @@ define i32 @slti4(i32 %a) nounwind readnone {
   ret i32 %2
 }
 
-; O32-LABEL: slti4:
-; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; O32-NOT: movn
-; O32:.size slti4
+; ALL-LABEL: slti4:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: addiu $2, [[R1]], 3
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  addiu $2, [[R1]], 3
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: addiu $2, [[R1]], 3
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  addiu $2, [[R1]], 3
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
 
 define i32 @slti5(i32 %a) nounwind readnone {
   %1 = icmp slt i32 %a, 7
@@ -222,11 +655,25 @@ define i32 @slti5(i32 %a) nounwind readnone {
   ret i32 %2
 }
 
-; O32-LABEL: slti5:
-; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; O32-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
-; O32-NOT: movn
-; O32:.size slti5
+; ALL-LABEL: slti5:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
 
 define i32 @slti6(i32 %a) nounwind readnone {
   %1 = icmp slt i32 %a, 7
@@ -234,9 +681,26 @@ define i32 @slti6(i32 %a) nounwind readnone {
   ret i32 %2
 }
 
-; O32-LABEL: slti6:
-; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; O32-DAG: xori [[R1]], [[R1]], 1
-; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; O32-NOT: movn
-; O32:.size slti6
+; ALL-LABEL: slti6:
+
+; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
+; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 32-CMOV-NOT: movn
+
+; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 32-CMP-DAG:  xori [[R1]], [[R1]], 1
+; 32-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 32-CMP-NOT:  seleqz
+; 32-CMP-NOT:  selnez
+
+; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
+; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 64-CMOV-NOT: movn
+
+; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
+; 64-CMP-DAG:  xori [[R1]], [[R1]], 1
+; 64-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
+; 64-CMP-NOT:  seleqz
+; 64-CMP-NOT:  selnez
diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll
new file mode 100644
index 0000000..6e63cff
--- /dev/null
+++ b/test/CodeGen/Mips/countleading.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=mipsel -mcpu=mips32   < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R1-R2 -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R1-R2 -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-R6    -check-prefix=MIPS32-GT-R1 %s
+; RUN: llc -march=mips64el -mcpu=mips4    < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS4 %s
+; RUN: llc -march=mips64el -mcpu=mips64   < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+; R!N: llc -march=mips64el -mcpu=mips64r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64-GT-R1 %s
+
+; Prefixes:
+;   ALL      - All
+;   MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
+;   MIPS64-GT-R1 - MIPS64r1 and above
+
+define i32 @ctlz_i32(i32 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlz_i32:
+
+; MIPS4-NOT:     clz
+
+; MIPS32-GT-R1:  clz $2, $4
+
+; MIPS64-GT-R1:  clz $2, $4
+
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @ctlo_i32(i32 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlo_i32:
+
+; MIPS4-NOT:     clo
+
+; MIPS32-GT-R1:  clo $2, $4
+
+; MIPS64-GT-R1:  clo $2, $4
+
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
+  ret i32 %tmp1
+}
+
+define i64 @ctlz_i64(i64 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlz_i64:
+
+; MIPS4-NOT:     dclz
+
+; MIPS32-GT-R1-DAG: clz $[[R0:[0-9]+]], $4
+; MIPS32-GT-R1-DAG: clz $[[R1:[0-9]+]], $5
+; MIPS32-GT-R1-DAG: addiu $[[R2:2+]], $[[R0]], 32
+; MIPS32-R1-R2-DAG: movn $[[R2]], $[[R1]], $5
+; MIPS32-R6-DAG:    seleqz $[[R5:[0-9]+]], $[[R2]], $5
+; MIPS32-R6-DAG:    selnez $[[R6:[0-9]+]], $[[R1]], $5
+; MIPS32-R6-DAG:    or $2, $[[R6]], $[[R5]]
+; MIPS32-GT-R1-DAG: addiu $3, $zero, 0
+
+; MIPS64-GT-R1:  dclz $2, $4
+
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @ctlo_i64(i64 %X) nounwind readnone {
+entry:
+; ALL-LABEL: ctlo_i64:
+
+; MIPS4-NOT:     dclo
+
+; MIPS32-GT-R1-DAG: clo $[[R0:[0-9]+]], $4
+; MIPS32-GT-R1-DAG: clo $[[R1:[0-9]+]], $5
+; MIPS32-GT-R1-DAG: addiu $[[R2:2+]], $[[R0]], 32
+; MIPS32-GT-R1-DAG: addiu $[[R3:[0-9]+]], $zero, -1
+; MIPS32-GT-R1-DAG: xor $[[R4:[0-9]+]], $5, $[[R3]]
+; MIPS32-R1-R2-DAG: movn $[[R2]], $[[R1]], $[[R4]]
+; MIPS32-R6-DAG:    selnez $[[R5:[0-9]+]], $[[R1]], $[[R4]]
+; MIPS32-R6-DAG:    seleqz $[[R6:[0-9]+]], $[[R2]], $[[R4]]
+; MIPS32-R6-DAG:    or $2, $[[R5]], $[[R6]]
+; MIPS32-GT-R1-DAG: addiu $3, $zero, 0
+
+; MIPS64-GT-R1:  dclo $2, $4
+
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index b631c3b..97f8360 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -1,77 +1,223 @@
-; RUN: llc -march=mips -verify-machineinstrs < %s |\
-; RUN: FileCheck %s -check-prefix=TRAP
-; RUN: llc -march=mips -mno-check-zero-division < %s |\
-; RUN: FileCheck %s -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32   -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=ACC32-TRAP
+; RUN: llc -march=mips   -mcpu=mips32r2 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=ACC32-TRAP
+; RUN: llc -march=mips   -mcpu=mips32r6 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=GPR32-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64   -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=ACC64-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64r2 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=ACC64-TRAP
+; RUN: llc -march=mips64 -mcpu=mips64r6 -verify-machineinstrs    < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=GPR64-TRAP
 
-; TRAP-LABEL: sdiv1:
-; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mflo
+; RUN: llc -march=mips   -mcpu=mips32   -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32r2 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC32 -check-prefix=NOCHECK
+; RUN: llc -march=mips   -mcpu=mips32r6 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64   -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC64 -check-prefix=NOCHECK
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mno-check-zero-division < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=NOCHECK
 
-; NOCHECK-LABEL: sdiv1:
-; NOCHECK-NOT: teq
-; NOCHECK: .end sdiv1
+; FileCheck Prefixes:
+;   ALL - All targets
+;   ACC32 - Accumulator based multiply/divide on 32-bit targets
+;   ACC64 - Same as ACC32 but only for 64-bit targets
+;   GPR32 - GPR based multiply/divide on 32-bit targets
+;   GPR64 - Same as GPR32 but only for 64-bit targets
+;   ACC32-TRAP - Same as TRAP and ACC32 combined
+;   ACC64-TRAP - Same as TRAP and ACC64 combined
+;   GPR32-TRAP - Same as TRAP and GPR32 combined
+;   GPR64-TRAP - Same as TRAP and GPR64 combined
+;   NOCHECK - Division by zero will not be detected
 
 @g0 = common global i32 0, align 4
 @g1 = common global i32 0, align 4
 
 define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: sdiv1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         div $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         div $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mflo $2
+; ACC64:         mflo $2
+
+; ALL: .end sdiv1
+
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP-LABEL: srem1:
-; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mfhi
-
 define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: srem1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         mod $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         mod $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mfhi $2
+; ACC64:         mfhi $2
+
+; ALL: .end srem1
+
   %rem = srem i32 %a0, %a1
   ret i32 %rem
 }
 
-; TRAP-LABEL: udiv1:
-; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mflo
-
 define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: udiv1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         divu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         divu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mflo $2
+; ACC64:         mflo $2
+
+; ALL: .end udiv1
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP-LABEL: urem1:
-; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; TRAP: teq $[[R0]], $zero, 7
-; TRAP: mfhi
-
 define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
+; ALL-LABEL: urem1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR32:         modu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         modu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC32:         mfhi $2
+; ACC64:         mfhi $2
+
+; ALL: .end urem1
+
   %rem = urem i32 %a0, %a1
   ret i32 %rem
 }
 
-; TRAP: div $zero,
 define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
+; ALL-LABEL: sdivrem1:
+
+; ACC32:         div $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC32:         mflo $2
+; ACC32:         mfhi $[[R0:[0-9]+]]
+; ACC32:         sw $[[R0]], 0(${{[0-9]+}})
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sw $[[R0]], 0(${{[0-9]+}})
+
+; GPR32:         mod $[[R0:[0-9]+]], $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR32:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR32-DAG:     div $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+
+; GPR64:         mod $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR64-DAG:     div $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end sdivrem1
+
   %rem = srem i32 %a0, %a1
   store i32 %rem, i32* %r, align 4
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
 
-; TRAP: divu $zero,
 define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
+; ALL-LABEL: udivrem1:
+
+; ACC32:         divu $zero, $4, $5
+; ACC32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC32:         mflo $2
+; ACC32:         mfhi $[[R0:[0-9]+]]
+; ACC32:         sw $[[R0]], 0(${{[0-9]+}})
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sw $[[R0]], 0(${{[0-9]+}})
+
+; GPR32:         modu $[[R0:[0-9]+]], $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR32:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR32-DAG:     divu $2, $4, $5
+; GPR32-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; GPR64:         modu $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sw $[[R0]], 0(${{[0-9]+}})
+; GPR64-DAG:     divu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end udivrem1
+
   %rem = urem i32 %a0, %a1
   store i32 %rem, i32* %r, align 4
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
 
+; FIXME: It's not clear what this is supposed to test.
 define i32 @killFlags() {
 entry:
   %0 = load i32* @g0, align 4
@@ -79,3 +225,164 @@ entry:
   %div = sdiv i32 %0, %1
   ret i32 %div
 }
+
+define i64 @sdiv2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: sdiv2:
+
+; ACC32:         lw $25, %call16(__divdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddiv $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         ddiv $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mflo $2
+
+; ALL: .end sdiv2
+
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @srem2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: srem2:
+
+; ACC32:         lw $25, %call16(__moddi3)(
+; ACC32:         jalr $25
+
+; ACC64:         div $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         dmod $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mfhi $2
+
+; ALL: .end srem2
+
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @udiv2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: udiv2:
+
+; ACC32:         lw $25, %call16(__udivdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         ddivu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mflo $2
+
+; ALL: .end udiv2
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @urem2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; ALL-LABEL: urem2:
+
+; ACC32:         lw $25, %call16(__umoddi3)(
+; ACC32:         jalr $25
+
+; ACC64:         divu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+
+; GPR64:         dmodu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+
+; NOCHECK-NOT:   teq
+
+; ACC64:         mfhi $2
+
+; ALL: .end urem2
+
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @sdivrem2(i64 %a0, i64 %a1, i64* nocapture %r) nounwind {
+entry:
+; ALL-LABEL: sdivrem2:
+
+; sdivrem2 is too complex to effectively check. We can at least check for the
+; calls though.
+; ACC32:         lw $25, %call16(__moddi3)(
+; ACC32:         jalr $25
+; ACC32:         lw $25, %call16(__divdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddiv $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64:         dmod $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64-DAG:     ddiv $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end sdivrem2
+
+  %rem = srem i64 %a0, %a1
+  store i64 %rem, i64* %r, align 8
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @udivrem2(i64 %a0, i64 %a1, i64* nocapture %r) nounwind {
+entry:
+; ALL-LABEL: udivrem2:
+
+; udivrem2 is too complex to effectively check. We can at least check for the
+; calls though.
+; ACC32:         lw $25, %call16(__umoddi3)(
+; ACC32:         jalr $25
+; ACC32:         lw $25, %call16(__udivdi3)(
+; ACC32:         jalr $25
+
+; ACC64:         ddivu $zero, $4, $5
+; ACC64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; ACC64:         mflo $2
+; ACC64:         mfhi $[[R0:[0-9]+]]
+; ACC64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64:         dmodu $[[R0:[0-9]+]], $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+; GPR64:         sd $[[R0]], 0(${{[0-9]+}})
+
+; GPR64-DAG:     ddivu $2, $4, $5
+; GPR64-TRAP:    teq $5, $zero, 7
+; NOCHECK-NOT:   teq
+
+; ALL: .end udivrem2
+
+  %rem = urem i64 %a0, %a1
+  store i64 %rem, i64* %r, align 8
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
index acdd17d..fbd9703 100644
--- a/test/CodeGen/Mips/dsp-r1.ll
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+dsp < %s | FileCheck %s
 
 define i32 @test__builtin_mips_extr_w1(i32 %i0, i32, i64 %a0) nounwind {
 entry:
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
index c3003b3..748050c 100644
--- a/test/CodeGen/Mips/eh-return32.ll
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mipsel -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mipsel -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=R6
 
 declare void @llvm.eh.return.i32(i32, i8*)
 declare void @foo(...)
@@ -9,7 +11,7 @@ entry:
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f1
+; CHECK:    f1:
 ; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -41,7 +43,8 @@ entry:
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        addu    $sp, $sp, $3
 }
 
@@ -50,7 +53,7 @@ entry:
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f2
+; CHECK:    f2:
 ; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -80,6 +83,7 @@ entry:
 ; CHECK:        addiu   $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        addu    $sp, $sp, $3
 }
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 8c5af50..74a4323 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -1,5 +1,7 @@
-; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6
+; RUN: llc -march=mips64el -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=R6
 
 declare void @llvm.eh.return.i64(i64, i8*)
 declare void @foo(...)
@@ -10,7 +12,7 @@ entry:
   call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f1
+; CHECK:    f1:
 ; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
 
 ; check that $a0-$a3 are saved on stack.
@@ -42,9 +44,9 @@ entry:
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        daddu   $sp, $sp, $3
-
 }
 
 define void @f2(i64 %offset, i8* %handler) {
@@ -52,7 +54,7 @@ entry:
   call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
   unreachable
 
-; CHECK:        f2
+; CHECK:    f2:
 ; CHECK:        .cfi_startproc
 ; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
 ; CHECK:        .cfi_def_cfa_offset [[spoffset]]
@@ -84,7 +86,8 @@ entry:
 ; CHECK:        daddiu  $sp, $sp, [[spoffset]]
 ; CHECK:        move    $25, $2
 ; CHECK:        move    $ra, $2
-; CHECK:        jr      $ra
+; NOT-R6:       jr      $ra # <MCInst #{{[0-9]+}} JR
+; R6:           jr      $ra # <MCInst #{{[0-9]+}} JALR
 ; CHECK:        daddu   $sp, $sp, $3
 ; CHECK:        .cfi_endproc
 }
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
new file mode 100644
index 0000000..e78497a
--- /dev/null
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s
+
+define i32 @main() {
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 128, DW.ref.__gxx_personality_v0
+
+entry:
+  invoke void @foo() to label %cont unwind label %lpad
+; CHECK: foo
+; CHECK: jalr
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8*
+    bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+  ret i32 0
+
+cont:
+  ret i32 0
+}
+; CHECK: .cfi_endproc
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @foo()
+
+; CHECK: .hidden DW.ref.__gxx_personality_v0
+; CHECK: .weak DW.ref.__gxx_personality_v0
+; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
+; CHECK: .align 2
+; CHECK: .type DW.ref.__gxx_personality_v0,@object
+; CHECK: .size DW.ref.__gxx_personality_v0, 4
+; CHECK: DW.ref.__gxx_personality_v0:
+; CHECK: .4byte __gxx_personality_v0
diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll
new file mode 100644
index 0000000..b775983
--- /dev/null
+++ b/test/CodeGen/Mips/fcmp.ll
@@ -0,0 +1,783 @@
+; RUN: llc < %s -march=mipsel   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
+; RUN: llc < %s -march=mips64el -mcpu=mips4    | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
+
+define i32 @false_f32(float %a, float %b) nounwind {
+; ALL-LABEL: false_f32:
+; ALL:           addiu $2, $zero, 0
+
+  %1 = fcmp false float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oeq_f32(float %a, float %b) nounwind {
+; ALL-LABEL: oeq_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oeq float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ogt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ogt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ogt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oge_f32(float %a, float %b) nounwind {
+; ALL-LABEL: oge_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oge float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @olt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: olt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp olt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ole_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ole_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ole float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @one_f32(float %a, float %b) nounwind {
+; ALL-LABEL: one_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp one float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ord_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ord_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp ord float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ueq_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ueq_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ueq float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ugt_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ugt_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ugt float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uge_f32(float %a, float %b) nounwind {
+; ALL-LABEL: uge_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uge float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ult_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ult_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ult float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ule_f32(float %a, float %b) nounwind {
+; ALL-LABEL: ule_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ule float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @une_f32(float %a, float %b) nounwind {
+; ALL-LABEL: une_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.s $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.s $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp une float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uno_f32(float %a, float %b) nounwind {
+; ALL-LABEL: uno_f32:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.s $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.s $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.un.s $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uno float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @true_f32(float %a, float %b) nounwind {
+; ALL-LABEL: true_f32:
+; ALL:           addiu $2, $zero, 1
+
+  %1 = fcmp true float %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @false_f64(double %a, double %b) nounwind {
+; ALL-LABEL: false_f64:
+; ALL:           addiu $2, $zero, 0
+
+  %1 = fcmp false double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oeq_f64(double %a, double %b) nounwind {
+; ALL-LABEL: oeq_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oeq double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ogt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ogt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ogt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @oge_f64(double %a, double %b) nounwind {
+; ALL-LABEL: oge_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp oge double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @olt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: olt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.lt.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp olt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ole_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ole_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.le.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ole double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @one_f64(double %a, double %b) nounwind {
+; ALL-LABEL: one_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp one double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ord_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ord_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp ord double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ueq_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ueq_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ueq.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ueq.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ueq double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ugt_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ugt_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ole.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ole.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ugt double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uge_f64(double %a, double %b) nounwind {
+; ALL-LABEL: uge_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.olt.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.olt.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uge double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ult_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ult_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ult.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ult.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ult.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ult double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @ule_f64(double %a, double %b) nounwind {
+; ALL-LABEL: ule_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.ule.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.ule.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.ule.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp ule double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @une_f64(double %a, double %b) nounwind {
+; ALL-LABEL: une_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.eq.d $f12, $f14
+; 32-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.eq.d $f12, $f13
+; 64-C-DAG:      movf $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 32-CMP-DAG:    andi $2, $[[T2]], 1
+
+; 64-CMP-DAG:    cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    not $[[T2:[0-9]+]], $[[T1]]
+; 64-CMP-DAG:    andi $2, $[[T2]], 1
+
+  %1 = fcmp une double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @uno_f64(double %a, double %b) nounwind {
+; ALL-LABEL: uno_f64:
+
+; 32-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 32-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG:      c.un.d $f12, $f14
+; 32-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 64-C-DAG:      addiu $[[T0:2]], $zero, 0
+; 64-C-DAG:      addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG:      c.un.d $f12, $f13
+; 64-C-DAG:      movt $[[T0]], $1, $fcc0
+
+; 32-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
+; 32-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 32-CMP-DAG:    andi $2, $[[T1]], 1
+
+; 64-CMP-DAG:    cmp.un.d $[[T0:f[0-9]+]], $f12, $f13
+; 64-CMP-DAG:    mfc1 $[[T1:[0-9]+]], $[[T0]]
+; 64-CMP-DAG:    andi $2, $[[T1]], 1
+
+  %1 = fcmp uno double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @true_f64(double %a, double %b) nounwind {
+; ALL-LABEL: true_f64:
+; ALL:           addiu $2, $zero, 1
+
+  %1 = fcmp true double %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 44c4117..3a9d9c7 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -17,7 +17,7 @@ entry:
 
 ; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
 ; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
-; 32R2: mtc1 $[[INS]], $f1
+; 32R2: mthc1 $[[INS]], $f0
 
 ; 64: daddiu $[[T0:[0-9]+]], $zero, 1
 ; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index a9a8e21..271631e 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -5,15 +5,54 @@
 ; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only
 ; available when -enable-no-nans-fp-math is given.
 
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2 -check-prefix=CHECK
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2 -check-prefix=CHECK
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN -check-prefix=CHECK
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN -check-prefix=CHECK
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32   -check-prefix=32-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64   -check-prefix=64-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              | FileCheck %s -check-prefix=ALL -check-prefix=32 -check-prefix=32-NAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64   -check-prefix=64-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NAN
 
 define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO0float:
-; CHECK: madd.s 
+; ALL-LABEL: FOO0float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:          madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 64R2:          madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
   %add1 = fadd float %add, 0.000000e+00
@@ -22,8 +61,39 @@ entry:
 
 define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO1float:
-; CHECK: msub.s 
+; ALL-LABEL: FOO1float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:          msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.s $f0, $[[T1]], $[[T2]]
+
+; 64R2:          msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   %add = fadd float %sub, 0.000000e+00
@@ -32,11 +102,44 @@ entry:
 
 define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO2float:
-; 32R2: nmadd.s 
-; 64R2: nmadd.s 
-; 32R2NAN: madd.s 
-; 64R2NAN: madd.s 
+; ALL-LABEL: FOO2float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:    nmadd.s $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:      madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmadd.s $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
   %sub = fsub float 0.000000e+00, %add
@@ -45,11 +148,36 @@ entry:
 
 define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO3float:
-; 32R2: nmsub.s 
-; 64R2: nmsub.s 
-; 32R2NAN: msub.s 
-; 64R2NAN: msub.s 
+; ALL-LABEL: FOO3float:
+
+; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:    nmsub.s $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:      msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+
+; 64R2-NAN:      msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   %sub1 = fsub float 0.000000e+00, %sub
@@ -58,8 +186,40 @@ entry:
 
 define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO10double:
-; CHECK: madd.d
+; ALL-LABEL: FOO10double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:          madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          mthc1 $zero, $[[T2]]
+; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 64R2:          madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
   %add1 = fadd double %add, 0.000000e+00
@@ -68,8 +228,40 @@ entry:
 
 define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO11double:
-; CHECK: msub.d
+; ALL-LABEL: FOO11double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:          msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:          mthc1 $zero, $[[T2]]
+; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        add.d $f0, $[[T1]], $[[T2]]
+
+; 64R2:          msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   %add = fadd double %sub, 0.000000e+00
@@ -78,11 +270,45 @@ entry:
 
 define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO12double:
-; 32R2: nmadd.d 
-; 64R2: nmadd.d 
-; 32R2NAN: madd.d 
-; 64R2NAN: madd.d 
+; ALL-LABEL: FOO12double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:    nmadd.d $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:      madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      mthc1 $zero, $[[T2]]
+; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmadd.d $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
   %sub = fsub double 0.000000e+00, %add
@@ -91,11 +317,45 @@ entry:
 
 define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
 entry:
-; CHECK-LABEL: FOO13double:
-; 32R2: nmsub.d 
-; 64R2: nmsub.d 
-; 32R2NAN: msub.d 
-; 64R2NAN: msub.d 
+; ALL-LABEL: FOO13double:
+
+; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:    nmsub.d $f0, $[[T0]], $f12, $f14
+
+; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:      msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:      mthc1 $zero, $[[T2]]
+; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
+; 64-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64-DAG:        dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+
+; 64R2-NONAN:    nmsub.d $f0, $f14, $f12, $f13
+
+; 64R2-NAN:      msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+
+; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   %sub1 = fsub double 0.000000e+00, %sub
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index d8c37e7..787e131 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -1,6 +1,13 @@
-; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-none-nacl-gnu -mcpu=mips32r2 < %s \
-; RUN:  | FileCheck %s -check-prefix=CHECK-NACL
+; RUN: llc -march=mipsel   -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R1
+; RUN: llc -march=mipsel   -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R2
+; RUN: llc -march=mipsel   -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R6
+; RUN: llc -march=mips64el -mcpu=mips4    -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64   -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64R6
+
+; Check that [ls][dwu]xc1 are not emitted for nacl.
+; RUN: llc -mtriple=mipsel-none-nacl-gnu -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=CHECK-NACL
 
 %struct.S = type <{ [4 x float] }>
 %struct.S2 = type <{ [4 x double] }>
@@ -14,8 +21,30 @@
 
 define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
 entry:
-; CHECK: lwxc1
+; ALL-LABEL: foo0:
+
+; MIPS32R1:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R1:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R1:      lwc1 $f0, 0($[[T3]])
+
+; MIPS32R2:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R2:      lwxc1 $f0, $[[T1]]($4)
+
+; MIPS32R6:      sll $[[T1:[0-9]+]], $5, 2
+; MIPS32R6:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R6:      lwc1 $f0, 0($[[T3]])
+
+; MIPS4:         sll $[[T0:[0-9]+]], $5, 0
+; MIPS4:         dsll $[[T1:[0-9]+]], $[[T0]], 2
+; MIPS4:         lwxc1 $f0, $[[T1]]($4)
+
+; MIPS64R6:      sll $[[T0:[0-9]+]], $5, 0
+; MIPS64R6:      dsll $[[T1:[0-9]+]], $[[T0]], 2
+; MIPS64R6:      daddu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS64R6:      lwc1 $f0, 0($[[T3]])
+
 ; CHECK-NACL-NOT: lwxc1
+
   %arrayidx = getelementptr inbounds float* %b, i32 %o
   %0 = load float* %arrayidx, align 4
   ret float %0
@@ -23,8 +52,30 @@ entry:
 
 define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
 entry:
-; CHECK: ldxc1
+; ALL-LABEL: foo1:
+
+; MIPS32R1:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R1:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R1:      ldc1 $f0, 0($[[T3]])
+
+; MIPS32R2:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R2:      ldxc1 $f0, $[[T1]]($4)
+
+; MIPS32R6:      sll $[[T1:[0-9]+]], $5, 3
+; MIPS32R6:      addu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS32R6:      ldc1 $f0, 0($[[T3]])
+
+; MIPS4:         sll $[[T0:[0-9]+]], $5, 0
+; MIPS4:         dsll $[[T1:[0-9]+]], $[[T0]], 3
+; MIPS4:         ldxc1 $f0, $[[T1]]($4)
+
+; MIPS64R6:      sll $[[T0:[0-9]+]], $5, 0
+; MIPS64R6:      dsll $[[T1:[0-9]+]], $[[T0]], 3
+; MIPS64R6:      daddu $[[T3:[0-9]+]], $4, $[[T1]]
+; MIPS64R6:      ldc1 $f0, 0($[[T3]])
+
 ; CHECK-NACL-NOT: ldxc1
+
   %arrayidx = getelementptr inbounds double* %b, i32 %o
   %0 = load double* %arrayidx, align 8
   ret double %0
@@ -32,7 +83,23 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo2:
+
+; luxc1 did not exist in MIPS32r1
+; MIPS32R1-NOT:  luxc1
+
+; luxc1 is a misnomer since it aligns the given pointer downwards and performs
+; an aligned load. We mustn't use it to handle unaligned loads.
+; MIPS32R2-NOT:  luxc1
+
+; luxc1 was removed in MIPS32r6
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; luxc1 was removed in MIPS64r6
+; MIPS64R6-NOT:  luxc1
+
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   %0 = load float* %arrayidx1, align 1
   ret float %0
@@ -40,8 +107,28 @@ entry:
 
 define void @foo3(float* nocapture %b, i32 %o) nounwind {
 entry:
-; CHECK: swxc1
+; ALL-LABEL: foo3:
+
+; MIPS32R1-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R1-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R1-DAG:  swc1 $[[T0]], 0($[[T1]])
+
+; MIPS32R2:      lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R2:      swxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS32R6-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R6-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R6-DAG:  swc1 $[[T0]], 0($[[T1]])
+
+; MIPS4:         lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS4:         swxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS64R6-DAG:  lwc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS64R6-DAG:  daddu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS64R6-DAG:  swc1 $[[T0]], 0($[[T1]])
+
 ; CHECK-NACL-NOT: swxc1
+
   %0 = load float* @gf, align 4
   %arrayidx = getelementptr inbounds float* %b, i32 %o
   store float %0, float* %arrayidx, align 4
@@ -50,8 +137,28 @@ entry:
 
 define void @foo4(double* nocapture %b, i32 %o) nounwind {
 entry:
-; CHECK: sdxc1
+; ALL-LABEL: foo4:
+
+; MIPS32R1-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R1-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R1-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
+; MIPS32R2:      ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R2:      sdxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS32R6-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS32R6-DAG:  addu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS32R6-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
+; MIPS4:         ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS4:         sdxc1 $[[T0]], ${{[0-9]+}}($4)
+
+; MIPS64R6-DAG:  ldc1 $[[T0:f0]], 0(${{[0-9]+}})
+; MIPS64R6-DAG:  daddu $[[T1:[0-9]+]], $4, ${{[0-9]+}}
+; MIPS64R6-DAG:  sdc1 $[[T0]], 0($[[T1]])
+
 ; CHECK-NACL-NOT: sdxc1
+
   %0 = load double* @gd, align 8
   %arrayidx = getelementptr inbounds double* %b, i32 %o
   store double %0, double* %arrayidx, align 8
@@ -60,7 +167,18 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo5:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   %0 = load float* @gf, align 4
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   store float %0, float* %arrayidx1, align 1
@@ -69,8 +187,18 @@ entry:
 
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: foo6
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo6:
+
+; MIPS32R1-NOT:  luxc1
+
+; MIPS32R2-NOT:  luxc1
+
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; MIPS64R6-NOT:  luxc1
+
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   %0 = load double* %arrayidx1, align 1
   ret double %0
@@ -78,8 +206,18 @@ entry:
 
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: foo7
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo7:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   %0 = load double* @gd, align 8
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   store double %0, double* %arrayidx1, align 1
@@ -88,16 +226,36 @@ entry:
 
 define float @foo8() nounwind readonly {
 entry:
-; CHECK: foo8
-; CHECK-NOT: luxc1
+; ALL-LABEL: foo8:
+
+; MIPS32R1-NOT:  luxc1
+
+; MIPS32R2-NOT:  luxc1
+
+; MIPS32R6-NOT:  luxc1
+
+; MIPS4-NOT:     luxc1
+
+; MIPS64R6-NOT:  luxc1
+
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret float %0
 }
 
 define void @foo9(float %f) nounwind {
 entry:
-; CHECK: foo9
-; CHECK-NOT: suxc1
+; ALL-LABEL: foo9:
+
+; MIPS32R1-NOT:  suxc1
+
+; MIPS32R2-NOT:  suxc1
+
+; MIPS32R6-NOT:  suxc1
+
+; MIPS4-NOT:     suxc1
+
+; MIPS64R6-NOT:  suxc1
+
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index a136557..311b830 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -1,9 +1,25 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=32-FCC
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=32-FCC
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=32-GPR
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=64-FCC
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=FCC -check-prefix=64-FCC
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=GPR -check-prefix=64-GPR
 
 define void @func0(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.eq.s
-; CHECK: bc1f
+; ALL-LABEL: func0:
+
+; 32-FCC:        c.eq.s $f12, $f14
+; 64-FCC:        c.eq.s $f12, $f13
+; FCC:           bc1f   $BB0_2
+
+; 32-GPR:        cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f14
+; 64-GPR:        cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $f13
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; FIXME: We ought to be able to transform not+bnez -> beqz
+; GPR:           not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB0_2
+
   %cmp = fcmp oeq float %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -25,8 +41,18 @@ declare void @g1(...)
 
 define void @func1(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.olt.s
-; CHECK: bc1f
+; ALL-LABEL: func1:
+
+; 32-FCC:        c.olt.s $f12, $f14
+; 64-FCC:        c.olt.s $f12, $f13
+; FCC:           bc1f    $BB1_2
+
+; 32-GPR:        cmp.ule.s $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ule.s $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB1_2
+
   %cmp = fcmp olt float %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -44,8 +70,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func2(float %f2, float %f3) nounwind {
 entry:
-; CHECK: c.ole.s
-; CHECK: bc1t
+; ALL-LABEL: func2:
+
+; 32-FCC:        c.ole.s $f12, $f14
+; 64-FCC:        c.ole.s $f12, $f13
+; FCC:           bc1t    $BB2_2
+
+; 32-GPR:        cmp.ult.s $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ult.s $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB2_2
+
   %cmp = fcmp ugt float %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
@@ -63,8 +99,19 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func3(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.eq.d
-; CHECK: bc1f
+; ALL-LABEL: func3:
+
+; 32-FCC:        c.eq.d $f12, $f14
+; 64-FCC:        c.eq.d $f12, $f13
+; FCC:           bc1f $BB3_2
+
+; 32-GPR:        cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f14
+; 64-GPR:        cmp.eq.d $[[FGRCC:f[0-9]+]], $f12, $f13
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; FIXME: We ought to be able to transform not+bnez -> beqz
+; GPR:           not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB3_2
+
   %cmp = fcmp oeq double %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -82,8 +129,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func4(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.olt.d
-; CHECK: bc1f
+; ALL-LABEL: func4:
+
+; 32-FCC:        c.olt.d $f12, $f14
+; 64-FCC:        c.olt.d $f12, $f13
+; FCC:           bc1f $BB4_2
+
+; 32-GPR:        cmp.ule.d $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ule.d $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           bnez     $[[GPRCC]], $BB4_2
+
   %cmp = fcmp olt double %f2, %f3
   br i1 %cmp, label %if.then, label %if.else
 
@@ -101,8 +158,18 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @func5(double %f2, double %f3) nounwind {
 entry:
-; CHECK: c.ole.d
-; CHECK: bc1t
+; ALL-LABEL: func5:
+
+; 32-FCC:        c.ole.d $f12, $f14
+; 64-FCC:        c.ole.d $f12, $f13
+; FCC:           bc1t $BB5_2
+
+; 32-GPR:        cmp.ult.d $[[FGRCC:f[0-9]+]], $f14, $f12
+; 64-GPR:        cmp.ult.d $[[FGRCC:f[0-9]+]], $f13, $f12
+; GPR:           mfc1     $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]]
+; GPR-NOT:       not      $[[GPRCC]], $[[GPRCC]]
+; GPR:           beqz     $[[GPRCC]], $BB5_2
+
   %cmp = fcmp ugt double %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 9464918..a67ddce 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -1,6 +1,7 @@
 ; Positive test for inline register constraints
 ;
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index 1fa54b4..a3183a2 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
new file mode 100644
index 0000000..4cbf43c
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -0,0 +1,166 @@
+; Test the 'call' instruction and the tailcall variant.
+
+; FIXME: We should remove the need for -enable-mips-tail-calls
+; RUN: llc -march=mips   -mcpu=mips32   -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips   -mcpu=mips32r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips   -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips64 -mcpu=mips4    -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64   -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+
+declare void @extern_void_void()
+declare i32 @extern_i32_void()
+declare float @extern_float_void()
+
+define i32 @call_void_void() {
+; ALL-LABEL: call_void_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+  call void @extern_void_void()
+  ret i32 0
+}
+
+define i32 @call_i32_void() {
+; ALL-LABEL: call_i32_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+  %1 = call i32 @extern_i32_void()
+  %2 = add i32 %1, 1
+  ret i32 %2
+}
+
+define float @call_float_void() {
+; ALL-LABEL: call_float_void:
+
+; FIXME: Not sure why we don't use $gp directly on such a simple test. We should
+;        look into it at some point.
+; O32:           addu $[[GP:[0-9]+]], ${{[0-9]+}}, $25
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_float_void)($[[GP]])
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; ALL:           jalr $[[TGT]]
+
+; O32:           move $gp, $[[GP]]
+
+  %1 = call float @extern_float_void()
+  %2 = fadd float %1, 1.0
+  ret float %2
+}
+
+define void @musttail_call_void_void() {
+; ALL-LABEL: musttail_call_void_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  musttail call void @extern_void_void()
+  ret void
+}
+
+define i32 @musttail_call_i32_void() {
+; ALL-LABEL: musttail_call_i32_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  %1 = musttail call i32 @extern_i32_void()
+  ret i32 %1
+}
+
+define float @musttail_call_float_void() {
+; ALL-LABEL: musttail_call_float_void:
+
+; O32:           lw $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; N64:           ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
+
+; NOT-R6:        jr $[[TGT]]
+; R6:            r6.jr $[[TGT]]
+
+  %1 = musttail call float @extern_float_void()
+  ret float %1
+}
+
+define i32 @indirect_call_void_void(void ()* %addr) {
+; ALL-LABEL: indirect_call_void_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  call void %addr()
+  ret i32 0
+}
+
+define i32 @indirect_call_i32_void(i32 ()* %addr) {
+; ALL-LABEL: indirect_call_i32_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  %1 = call i32 %addr()
+  %2 = add i32 %1, 1
+  ret i32 %2
+}
+
+define float @indirect_call_float_void(float ()* %addr) {
+; ALL-LABEL: indirect_call_float_void:
+
+; ALL:           move $25, $4
+; ALL:           jalr $25
+
+  %1 = call float %addr()
+  %2 = fadd float %1, 1.0
+  ret float %2
+}
+
+; We can't use 'musttail' here because the verifier is too conservative and
+; prohibits any prototype difference.
+define void @tail_indirect_call_void_void(void ()* %addr) {
+; ALL-LABEL: tail_indirect_call_void_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  tail call void %addr()
+  ret void
+}
+
+define i32 @tail_indirect_call_i32_void(i32 ()* %addr) {
+; ALL-LABEL: tail_indirect_call_i32_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  %1 = tail call i32 %addr()
+  ret i32 %1
+}
+
+define float @tail_indirect_call_float_void(float ()* %addr) {
+; ALL-LABEL: tail_indirect_call_float_void:
+
+; ALL:           move $25, $4
+; ALL:           jr $25
+
+  %1 = tail call float %addr()
+  ret float %1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
new file mode 100644
index 0000000..d8fd787
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
@@ -0,0 +1,34 @@
+; Test all important variants of the unconditional 'br' instruction.
+
+; RUN: llc -march=mips   -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
+; RUN: llc -march=mips64 -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
+
+define i32 @br(i8 *%addr) {
+; ALL-LABEL: br:
+; NOT-R6:        jr $4 # <MCInst #{{[0-9]+}} JR
+; R6:            jr $4 # <MCInst #{{[0-9]+}} JALR
+
+; ALL: $BB0_1: # %L1
+; NOT-R6:        jr $ra # <MCInst #{{[0-9]+}} JR
+; R6:            jr $ra # <MCInst #{{[0-9]+}} JALR
+; ALL:           addiu $2, $zero, 0
+
+; ALL: $BB0_2: # %L2
+; NOT-R6:        jr $ra # <MCInst #{{[0-9]+}} JR
+; R6:            jr $ra # <MCInst #{{[0-9]+}} JALR
+; ALL:           addiu $2, $zero, 1
+
+entry:
+  indirectbr i8* %addr, [label %L1, label %L2]
+
+L1:
+  ret i32 0
+
+L2:
+  ret i32 1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/ret.ll b/test/CodeGen/Mips/llvm-ir/ret.ll
new file mode 100644
index 0000000..8f5b115
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/ret.ll
@@ -0,0 +1,205 @@
+; Test all important variants of the 'ret' instruction.
+;
+; For non-void returns it is necessary to have something to return so we also
+; test constant generation here.
+;
+; We'll test pointer returns in a separate file since the relocation model
+; affects it and it's undesirable to repeat the non-pointer returns for each
+; relocation model.
+
+; RUN: llc -march=mips   -mcpu=mips32   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=NO-MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips   -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=R6
+; RUN: llc -march=mips64 -mcpu=mips4    -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64   -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=R6
+
+define void @ret_void() {
+; ALL-LABEL: ret_void:
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret void
+}
+
+define i8 @ret_i8() {
+; ALL-LABEL: ret_i8:
+; ALL-DAG:       addiu $2, $zero, 3
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i8 3
+}
+
+define i16 @ret_i16_3() {
+; ALL-LABEL: ret_i16_3:
+; ALL-DAG:       addiu $2, $zero, 3
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 3
+}
+
+define i16 @ret_i16_256() {
+; ALL-LABEL: ret_i16_256:
+; ALL-DAG:       addiu $2, $zero, 256
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 256
+}
+
+define i16 @ret_i16_257() {
+; ALL-LABEL: ret_i16_257:
+; ALL-DAG:       addiu $2, $zero, 257
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i16 257
+}
+
+define i32 @ret_i32_257() {
+; ALL-LABEL: ret_i32_257:
+; ALL-DAG:       addiu $2, $zero, 257
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 257
+}
+
+define i32 @ret_i32_65536() {
+; ALL-LABEL: ret_i32_65536:
+; ALL-DAG:       lui $2, 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 65536
+}
+
+define i32 @ret_i32_65537() {
+; ALL-LABEL: ret_i32_65537:
+; ALL:           lui $[[T0:[0-9]+]], 1
+; ALL-DAG:       ori $2, $[[T0]], 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i32 65537
+}
+
+define i64 @ret_i64_65537() {
+; ALL-LABEL: ret_i64_65537:
+; ALL:           lui $[[T0:[0-9]+]], 1
+
+; GPR32-DAG:     ori $3, $[[T0]], 1
+; GPR32-DAG:     addiu $2, $zero, 0
+
+; GPR64-DAG:     daddiu $2, $[[T0]], 1
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 65537
+}
+
+define i64 @ret_i64_281479271677952() {
+; ALL-LABEL: ret_i64_281479271677952:
+; ALL-DAG:       lui $[[T0:[0-9]+]], 1
+
+; GPR32-DAG:     ori $2, $[[T0]], 1
+; GPR32-DAG:     addiu $3, $zero, 0
+
+; GPR64-DAG:     daddiu $[[T1:[0-9]+]], $[[T0]], 1
+; GPR64-DAG:     dsll $2, $[[T1]], 32
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 281479271677952
+}
+
+define i64 @ret_i64_281479271809026() {
+; ALL-LABEL: ret_i64_281479271809026:
+; GPR32-DAG:     lui $[[T0:[0-9]+]], 1
+; GPR32-DAG:     lui $[[T1:[0-9]+]], 2
+; GPR32-DAG:     ori $2, $[[T0]], 1
+; GPR32-DAG:     ori $3, $[[T1]], 2
+
+; GPR64-DAG:     ori  $[[T0:[0-9]+]], $zero, 32769
+; GPR64-DAG:     dsll $[[T1:[0-9]+]], $[[T0]], 16
+; GPR64-DAG:     daddiu $[[T0:[0-9]+]], $[[T0]], -32767
+; GPR64-DAG:     dsll $[[T1:[0-9]+]], $[[T0]], 17
+; GPR64-DAG:     daddiu $2, $[[T1]], 2
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret i64 281479271809026
+}
+
+define float @ret_float_0x0() {
+; ALL-LABEL: ret_float_0x0:
+
+; NO-MTHC1-DAG:  mtc1 $zero, $f0
+
+; MTHC1-DAG:     mtc1 $zero, $f0
+
+; DMTC-DAG:      dmtc1 $zero, $f0
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret float 0x0000000000000000
+}
+
+define float @ret_float_0x3() {
+; ALL-LABEL: ret_float_0x3:
+
+; Use a constant pool
+; O32-DAG:       lwc1 $f0, %lo($CPI
+; N64-DAG:       lwc1 $f0, %got_ofst($CPI
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+; float constants are written as double constants
+  ret float 0x36b8000000000000
+}
+
+define double @ret_double_0x0() {
+; ALL-LABEL: ret_double_0x0:
+
+; NO-MTHC1-DAG:  mtc1 $zero, $f0
+; NO-MTHC1-DAG:  mtc1 $zero, $f1
+
+; MTHC1-DAG:     mtc1 $zero, $f0
+; MTHC1-DAG:     mthc1 $zero, $f0
+
+; DMTC-DAG:      dmtc1 $zero, $f0
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret double 0x0000000000000000
+}
+
+define double @ret_double_0x3() {
+; ALL-LABEL: ret_double_0x3:
+
+; Use a constant pool
+; O32-DAG:       ldc1 $f0, %lo($CPI
+; N64-DAG:       ldc1 $f0, %got_ofst($CPI
+
+; NOT-R6-DAG:    jr $ra # <MCInst #{{[0-9]+}} JR
+; R6-DAG:        jr $ra # <MCInst #{{[0-9]+}} JALR
+
+  ret double 0x0000000000000003
+}
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index c7fe6fd..a403744 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -7,6 +7,8 @@
 ; RUN:   < %s | FileCheck %s -check-prefix=N64
 ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=micromips \
 ; RUN:   -force-mips-long-branch -O3 < %s | FileCheck %s -check-prefix=MICROMIPS
+; RUN: llc -mtriple=mipsel-none-nacl -force-mips-long-branch -O3 < %s \
+; RUN:   | FileCheck %s -check-prefix=NACL
 
 
 @x = external global i32
@@ -126,4 +128,36 @@ end:
 ; MICROMIPS:   $[[BB2]]:
 ; MICROMIPS:        jr      $ra
 ; MICROMIPS:        nop
+
+
+; Check the NaCl version.  Check that sp change is not in the branch delay slot
+; of "jr $1" instruction.  Check that target of indirect branch "jr $1" is
+; bundle aligned.
+
+; NACL:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; NACL:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; NACL:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; NACL:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; NACL:             addiu   $sp, $sp, -8
+; NACL-NEXT:        sw      $ra, 0($sp)
+; NACL-NEXT:        lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; NACL-NEXT:        bal     $[[BB1]]
+; NACL-NEXT:        addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; NACL-NEXT:   $[[BB1]]:
+; NACL-NEXT:        addu    $1, $ra, $1
+; NACL-NEXT:        lw      $ra, 0($sp)
+; NACL-NEXT:        addiu   $sp, $sp, 8
+; NACL-NEXT:        jr      $1
+; NACL-NEXT:        nop
+
+; NACL:        $[[BB0]]:
+; NACL:             lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; NACL:             addiu   $[[R2:[0-9]+]], $zero, 1
+; NACL:             sw      $[[R2]], 0($[[R1]])
+; NACL:             .align  4
+; NACL-NEXT:   $[[BB2]]:
+; NACL:             jr      $ra
+; NACL:             nop
 }
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 0dbb2c2..8222967 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -1,9 +1,49 @@
-; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32
-; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc -march=mips -mcpu=mips32 -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc -march=mips -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc -march=mips -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64R6
+
+; FIXME: The MIPS16 test should check its output
 ; RUN: llc -march=mips -mcpu=mips16 < %s
 
-; 32: madd ${{[0-9]+}}
-; DSP: madd $ac
+; ALL-LABEL: madd1:
+
+; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $6
+; 32R6-DAG:      sra  $[[T3:[0-9]+]], $6, 31
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; 32R6-DAG:      muh  $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T5]], $[[T4]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
+; 64-DAG:        daddu $2, $[[T2]], $[[T3]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
+; 64R6-DAG:      daddu $2, $[[T2]], $[[T3]]
+
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -14,8 +54,47 @@ entry:
   ret i64 %add
 }
 
-; 32: maddu ${{[0-9]+}}
-; DSP: maddu $ac
+; ALL-LABEL: madd2:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]addu ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       maddu $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $6
+; FIXME: There's a redundant move here. We should remove it
+; 32R6-DAG:      muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T3]], $[[T2]]
+
+; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
+; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
+; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
+; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
+; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
+; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64-DAG:        daddu $2, $[[T4]], $[[T6]]
+
+; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
+; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
+; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
+; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64R6-DAG:      daddu $2, $[[T4]], $[[T6]]
+
 define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
@@ -26,8 +105,38 @@ entry:
   ret i64 %add
 }
 
-; 32: madd ${{[0-9]+}}
-; DSP: madd $ac
+; ALL-LABEL: madd3:
+
+; 32-DAG:        mthi $6
+; 32-DAG:        mtlo $7
+; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       mthi $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       mtlo $[[AC]], $7
+; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $7
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $7
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T2]], $6
+; 32R6-DAG:      muh  $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      addu $2, $[[T5]], $[[T4]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        daddu $2, $[[T2]], $6
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      daddu $2, $[[T2]], $6
+
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -37,8 +146,41 @@ entry:
   ret i64 %add
 }
 
-; 32: msub ${{[0-9]+}}
-; DSP: msub $ac
+; ALL-LABEL: msub1:
+
+; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muh  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul  $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      sltu $[[T3:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; 32R6-DAG:      sra  $[[T5:[0-9]+]], $6, 31
+; 32R6-DAG:      subu $2, $[[T5]], $[[T4]]
+; 32R6-DAG:      subu $3, $6, $[[T1]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
+; 64-DAG:        dsubu $2, $[[T3]], $[[T2]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
+; 64R6-DAG:      dsubu $2, $[[T3]], $[[T2]]
+
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %c to i64
@@ -49,8 +191,48 @@ entry:
   ret i64 %sub
 }
 
-; 32: msubu ${{[0-9]+}}
-; DSP: msubu $ac
+; ALL-LABEL: msub2:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
+; 32-DAG:        mtlo $6
+; 32-DAG:        [[m:m]]subu ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msubu $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muhu $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG:      addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG:      negu $2, $[[T3]]
+; 32R6-DAG:      subu $3, $6, $[[T1]]
+
+; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
+; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
+; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
+; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
+; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
+; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64-DAG:        dsubu $2, $[[T6]], $[[T4]]
+
+; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
+; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
+; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
+; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
+; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
+; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
+; 64R6-DAG:      dsubu $2, $[[T6]], $[[T4]]
+
 define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
@@ -61,8 +243,39 @@ entry:
   ret i64 %sub
 }
 
-; 32: msub ${{[0-9]+}}
-; DSP: msub $ac
+; ALL-LABEL: msub3:
+
+; FIXME: We don't really need this instruction
+; 32-DAG:        mthi $6
+; 32-DAG:        mtlo $7
+; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
+; 32-DAG:        [[m]]fhi $2
+; 32-DAG:        [[m]]flo $3
+
+; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
+; DSP-DAG:       mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
+; DSP-DAG:       mfhi $2, $[[AC]]
+; DSP-DAG:       mflo $3, $[[AC]]
+
+; 32R6-DAG:      muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG:      sltu $[[T2:[0-9]+]], $7, $[[T1]]
+; 32R6-DAG:      addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG:      subu $2, $6, $[[T3]]
+; 32R6-DAG:      subu $3, $7, $[[T1]]
+
+; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
+; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
+; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
+; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
+; 64-DAG:        dsubu $2, $6, $[[T2]]
+
+; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
+; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
+; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 64R6-DAG:      dsubu $2, $6, $[[T2]]
+
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index ecb30b5..a1a9919 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -1,6 +1,8 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
-;16: $eh_func_begin0=.
+;16: .cfi_personality
+;16-NEXT: [[TMP:.*]]:
+;16-NEXT: $eh_func_begin0 = ([[TMP]])
 @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
 @_ZTIi = external constant i8*
 @.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index 4d590b6..7f7d515 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -1,7 +1,11 @@
 ; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips4 -soft-float -O1 \
-; RUN:     -disable-mips-delay-filler < %s | FileCheck %s
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
 ; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
-; RUN:     -disable-mips-delay-filler < %s | FileCheck %s
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r2 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r6 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=CMP_CC_FMT
 
 @gld0 = external global fp128
 @gld1 = external global fp128
@@ -9,8 +13,8 @@
 @gf1 = external global float
 @gd1 = external global double
 
-; CHECK-LABEL: addLD:
-; CHECK: ld $25, %call16(__addtf3)
+; ALL-LABEL: addLD:
+; ALL: ld $25, %call16(__addtf3)
 
 define fp128 @addLD() {
 entry:
@@ -20,8 +24,8 @@ entry:
   ret fp128 %add
 }
 
-; CHECK-LABEL: subLD:
-; CHECK: ld $25, %call16(__subtf3)
+; ALL-LABEL: subLD:
+; ALL: ld $25, %call16(__subtf3)
 
 define fp128 @subLD() {
 entry:
@@ -31,8 +35,8 @@ entry:
   ret fp128 %sub
 }
 
-; CHECK-LABEL: mulLD:
-; CHECK: ld $25, %call16(__multf3)
+; ALL-LABEL: mulLD:
+; ALL: ld $25, %call16(__multf3)
 
 define fp128 @mulLD() {
 entry:
@@ -42,8 +46,8 @@ entry:
   ret fp128 %mul
 }
 
-; CHECK-LABEL: divLD:
-; CHECK: ld $25, %call16(__divtf3)
+; ALL-LABEL: divLD:
+; ALL: ld $25, %call16(__divtf3)
 
 define fp128 @divLD() {
 entry:
@@ -53,8 +57,8 @@ entry:
   ret fp128 %div
 }
 
-; CHECK-LABEL: conv_LD_char:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_char:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_char(i8 signext %a) {
 entry:
@@ -62,8 +66,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_short:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_short:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_short(i16 signext %a) {
 entry:
@@ -71,8 +75,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_int:
-; CHECK: ld $25, %call16(__floatsitf)
+; ALL-LABEL: conv_LD_int:
+; ALL: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_int(i32 %a) {
 entry:
@@ -80,8 +84,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_LL:
-; CHECK: ld $25, %call16(__floatditf)
+; ALL-LABEL: conv_LD_LL:
+; ALL: ld $25, %call16(__floatditf)
 
 define fp128 @conv_LD_LL(i64 %a) {
 entry:
@@ -89,8 +93,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UChar:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UChar:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UChar(i8 zeroext %a) {
 entry:
@@ -98,8 +102,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UShort:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UShort:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UShort(i16 zeroext %a) {
 entry:
@@ -107,8 +111,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_UInt:
-; CHECK: ld $25, %call16(__floatunsitf)
+; ALL-LABEL: conv_LD_UInt:
+; ALL: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UInt(i32 %a) {
 entry:
@@ -116,8 +120,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_ULL:
-; CHECK: ld $25, %call16(__floatunditf)
+; ALL-LABEL: conv_LD_ULL:
+; ALL: ld $25, %call16(__floatunditf)
 
 define fp128 @conv_LD_ULL(i64 %a) {
 entry:
@@ -125,8 +129,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_char_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_char_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define signext i8 @conv_char_LD(fp128 %a) {
 entry:
@@ -134,8 +138,8 @@ entry:
   ret i8 %conv
 }
 
-; CHECK-LABEL: conv_short_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_short_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define signext i16 @conv_short_LD(fp128 %a) {
 entry:
@@ -143,8 +147,8 @@ entry:
   ret i16 %conv
 }
 
-; CHECK-LABEL: conv_int_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_int_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define i32 @conv_int_LD(fp128 %a) {
 entry:
@@ -152,8 +156,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: conv_LL_LD:
-; CHECK: ld $25, %call16(__fixtfdi)
+; ALL-LABEL: conv_LL_LD:
+; ALL: ld $25, %call16(__fixtfdi)
 
 define i64 @conv_LL_LD(fp128 %a) {
 entry:
@@ -161,8 +165,8 @@ entry:
   ret i64 %conv
 }
 
-; CHECK-LABEL: conv_UChar_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_UChar_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define zeroext i8 @conv_UChar_LD(fp128 %a) {
 entry:
@@ -170,8 +174,8 @@ entry:
   ret i8 %conv
 }
 
-; CHECK-LABEL: conv_UShort_LD:
-; CHECK: ld $25, %call16(__fixtfsi)
+; ALL-LABEL: conv_UShort_LD:
+; ALL: ld $25, %call16(__fixtfsi)
 
 define zeroext i16 @conv_UShort_LD(fp128 %a) {
 entry:
@@ -179,8 +183,8 @@ entry:
   ret i16 %conv
 }
 
-; CHECK-LABEL: conv_UInt_LD:
-; CHECK: ld $25, %call16(__fixunstfsi)
+; ALL-LABEL: conv_UInt_LD:
+; ALL: ld $25, %call16(__fixunstfsi)
 
 define i32 @conv_UInt_LD(fp128 %a) {
 entry:
@@ -188,8 +192,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: conv_ULL_LD:
-; CHECK: ld $25, %call16(__fixunstfdi)
+; ALL-LABEL: conv_ULL_LD:
+; ALL: ld $25, %call16(__fixunstfdi)
 
 define i64 @conv_ULL_LD(fp128 %a) {
 entry:
@@ -197,8 +201,8 @@ entry:
   ret i64 %conv
 }
 
-; CHECK-LABEL: conv_LD_float:
-; CHECK: ld $25, %call16(__extendsftf2)
+; ALL-LABEL: conv_LD_float:
+; ALL: ld $25, %call16(__extendsftf2)
 
 define fp128 @conv_LD_float(float %a) {
 entry:
@@ -206,8 +210,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_LD_double:
-; CHECK: ld $25, %call16(__extenddftf2)
+; ALL-LABEL: conv_LD_double:
+; ALL: ld $25, %call16(__extenddftf2)
 
 define fp128 @conv_LD_double(double %a) {
 entry:
@@ -215,8 +219,8 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: conv_float_LD:
-; CHECK: ld $25, %call16(__trunctfsf2)
+; ALL-LABEL: conv_float_LD:
+; ALL: ld $25, %call16(__trunctfsf2)
 
 define float @conv_float_LD(fp128 %a) {
 entry:
@@ -224,8 +228,8 @@ entry:
   ret float %conv
 }
 
-; CHECK-LABEL: conv_double_LD:
-; CHECK: ld $25, %call16(__trunctfdf2)
+; ALL-LABEL: conv_double_LD:
+; ALL: ld $25, %call16(__trunctfdf2)
 
 define double @conv_double_LD(fp128 %a) {
 entry:
@@ -233,13 +237,13 @@ entry:
   ret double %conv
 }
 
-; CHECK-LABEL:             libcall1_fabsl:
-; CHECK-DAG: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
-; CHECK-DAG: daddiu  $[[R1:[0-9]+]], $zero, 1
-; CHECK-DAG: dsll    $[[R2:[0-9]+]], $[[R1]], 63
-; CHECK-DAG: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
-; CHECK-DAG: and     $4, $[[R0]], $[[R3]]
-; CHECK-DAG: ld      $2, 0($[[R4]])
+; ALL-LABEL:             libcall1_fabsl:
+; ALL-DAG: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
+; ALL-DAG: daddiu  $[[R1:[0-9]+]], $zero, 1
+; ALL-DAG: dsll    $[[R2:[0-9]+]], $[[R1]], 63
+; ALL-DAG: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
+; ALL-DAG: and     $4, $[[R0]], $[[R3]]
+; ALL-DAG: ld      $2, 0($[[R4]])
 
 define fp128 @libcall1_fabsl() {
 entry:
@@ -250,8 +254,8 @@ entry:
 
 declare fp128 @fabsl(fp128) #1
 
-; CHECK-LABEL: libcall1_ceill:
-; CHECK: ld $25, %call16(ceill)
+; ALL-LABEL: libcall1_ceill:
+; ALL: ld $25, %call16(ceill)
 
 define fp128 @libcall1_ceill() {
 entry:
@@ -262,8 +266,8 @@ entry:
 
 declare fp128 @ceill(fp128) #1
 
-; CHECK-LABEL: libcall1_sinl:
-; CHECK: ld $25, %call16(sinl)
+; ALL-LABEL: libcall1_sinl:
+; ALL: ld $25, %call16(sinl)
 
 define fp128 @libcall1_sinl() {
 entry:
@@ -274,8 +278,8 @@ entry:
 
 declare fp128 @sinl(fp128) #2
 
-; CHECK-LABEL: libcall1_cosl:
-; CHECK: ld $25, %call16(cosl)
+; ALL-LABEL: libcall1_cosl:
+; ALL: ld $25, %call16(cosl)
 
 define fp128 @libcall1_cosl() {
 entry:
@@ -286,8 +290,8 @@ entry:
 
 declare fp128 @cosl(fp128) #2
 
-; CHECK-LABEL: libcall1_expl:
-; CHECK: ld $25, %call16(expl)
+; ALL-LABEL: libcall1_expl:
+; ALL: ld $25, %call16(expl)
 
 define fp128 @libcall1_expl() {
 entry:
@@ -298,8 +302,8 @@ entry:
 
 declare fp128 @expl(fp128) #2
 
-; CHECK-LABEL: libcall1_exp2l:
-; CHECK: ld $25, %call16(exp2l)
+; ALL-LABEL: libcall1_exp2l:
+; ALL: ld $25, %call16(exp2l)
 
 define fp128 @libcall1_exp2l() {
 entry:
@@ -310,8 +314,8 @@ entry:
 
 declare fp128 @exp2l(fp128) #2
 
-; CHECK-LABEL: libcall1_logl:
-; CHECK: ld $25, %call16(logl)
+; ALL-LABEL: libcall1_logl:
+; ALL: ld $25, %call16(logl)
 
 define fp128 @libcall1_logl() {
 entry:
@@ -322,8 +326,8 @@ entry:
 
 declare fp128 @logl(fp128) #2
 
-; CHECK-LABEL: libcall1_log2l:
-; CHECK: ld $25, %call16(log2l)
+; ALL-LABEL: libcall1_log2l:
+; ALL: ld $25, %call16(log2l)
 
 define fp128 @libcall1_log2l() {
 entry:
@@ -334,8 +338,8 @@ entry:
 
 declare fp128 @log2l(fp128) #2
 
-; CHECK-LABEL: libcall1_log10l:
-; CHECK: ld $25, %call16(log10l)
+; ALL-LABEL: libcall1_log10l:
+; ALL: ld $25, %call16(log10l)
 
 define fp128 @libcall1_log10l() {
 entry:
@@ -346,8 +350,8 @@ entry:
 
 declare fp128 @log10l(fp128) #2
 
-; CHECK-LABEL: libcall1_nearbyintl:
-; CHECK: ld $25, %call16(nearbyintl)
+; ALL-LABEL: libcall1_nearbyintl:
+; ALL: ld $25, %call16(nearbyintl)
 
 define fp128 @libcall1_nearbyintl() {
 entry:
@@ -358,8 +362,8 @@ entry:
 
 declare fp128 @nearbyintl(fp128) #1
 
-; CHECK-LABEL: libcall1_floorl:
-; CHECK: ld $25, %call16(floorl)
+; ALL-LABEL: libcall1_floorl:
+; ALL: ld $25, %call16(floorl)
 
 define fp128 @libcall1_floorl() {
 entry:
@@ -370,8 +374,8 @@ entry:
 
 declare fp128 @floorl(fp128) #1
 
-; CHECK-LABEL: libcall1_sqrtl:
-; CHECK: ld $25, %call16(sqrtl)
+; ALL-LABEL: libcall1_sqrtl:
+; ALL: ld $25, %call16(sqrtl)
 
 define fp128 @libcall1_sqrtl() {
 entry:
@@ -382,8 +386,8 @@ entry:
 
 declare fp128 @sqrtl(fp128) #2
 
-; CHECK-LABEL: libcall1_rintl:
-; CHECK: ld $25, %call16(rintl)
+; ALL-LABEL: libcall1_rintl:
+; ALL: ld $25, %call16(rintl)
 
 define fp128 @libcall1_rintl() {
 entry:
@@ -394,8 +398,8 @@ entry:
 
 declare fp128 @rintl(fp128) #1
 
-; CHECK-LABEL: libcall_powil:
-; CHECK: ld $25, %call16(__powitf2)
+; ALL-LABEL: libcall_powil:
+; ALL: ld $25, %call16(__powitf2)
 
 define fp128 @libcall_powil(fp128 %a, i32 %b) {
 entry:
@@ -405,18 +409,18 @@ entry:
 
 declare fp128 @llvm.powi.f128(fp128, i32) #3
 
-; CHECK-LABEL:     libcall2_copysignl:
-; CHECK-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
-; CHECK-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 63
-; CHECK-DAG: ld     $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK-DAG: ld     $[[R1:[0-9]+]], 8($[[R0]])
-; CHECK-DAG: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
-; CHECK-DAG: ld     $[[R5:[0-9]+]], %got_disp(gld0)
-; CHECK-DAG: ld     $[[R6:[0-9]+]], 8($[[R5]])
-; CHECK-DAG: daddiu $[[R7:[0-9]+]], $[[R3]], -1
-; CHECK-DAG: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
-; CHECK-DAG: or     $4, $[[R8]], $[[R4]]
-; CHECK-DAG: ld     $2, 0($[[R5]])
+; ALL-LABEL:     libcall2_copysignl:
+; ALL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; ALL-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 63
+; ALL-DAG: ld     $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL-DAG: ld     $[[R1:[0-9]+]], 8($[[R0]])
+; ALL-DAG: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
+; ALL-DAG: ld     $[[R5:[0-9]+]], %got_disp(gld0)
+; ALL-DAG: ld     $[[R6:[0-9]+]], 8($[[R5]])
+; ALL-DAG: daddiu $[[R7:[0-9]+]], $[[R3]], -1
+; ALL-DAG: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
+; ALL-DAG: or     $4, $[[R8]], $[[R4]]
+; ALL-DAG: ld     $2, 0($[[R5]])
 
 define fp128 @libcall2_copysignl() {
 entry:
@@ -428,8 +432,8 @@ entry:
 
 declare fp128 @copysignl(fp128, fp128) #1
 
-; CHECK-LABEL: libcall2_powl:
-; CHECK: ld $25, %call16(powl)
+; ALL-LABEL: libcall2_powl:
+; ALL: ld $25, %call16(powl)
 
 define fp128 @libcall2_powl() {
 entry:
@@ -441,8 +445,8 @@ entry:
 
 declare fp128 @powl(fp128, fp128) #2
 
-; CHECK-LABEL: libcall2_fmodl:
-; CHECK: ld $25, %call16(fmodl)
+; ALL-LABEL: libcall2_fmodl:
+; ALL: ld $25, %call16(fmodl)
 
 define fp128 @libcall2_fmodl() {
 entry:
@@ -454,8 +458,8 @@ entry:
 
 declare fp128 @fmodl(fp128, fp128) #2
 
-; CHECK-LABEL: libcall3_fmal:
-; CHECK: ld $25, %call16(fmal)
+; ALL-LABEL: libcall3_fmal:
+; ALL: ld $25, %call16(fmal)
 
 define fp128 @libcall3_fmal() {
 entry:
@@ -468,8 +472,8 @@ entry:
 
 declare fp128 @llvm.fma.f128(fp128, fp128, fp128) #4
 
-; CHECK-LABEL: cmp_lt:
-; CHECK: ld $25, %call16(__lttf2)
+; ALL-LABEL: cmp_lt:
+; ALL: ld $25, %call16(__lttf2)
 
 define i32 @cmp_lt(fp128 %a, fp128 %b) {
 entry:
@@ -478,8 +482,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_le:
-; CHECK: ld $25, %call16(__letf2)
+; ALL-LABEL: cmp_le:
+; ALL: ld $25, %call16(__letf2)
 
 define i32 @cmp_le(fp128 %a, fp128 %b) {
 entry:
@@ -488,8 +492,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_gt:
-; CHECK: ld $25, %call16(__gttf2)
+; ALL-LABEL: cmp_gt:
+; ALL: ld $25, %call16(__gttf2)
 
 define i32 @cmp_gt(fp128 %a, fp128 %b) {
 entry:
@@ -498,8 +502,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_ge:
-; CHECK: ld $25, %call16(__getf2)
+; ALL-LABEL: cmp_ge:
+; ALL: ld $25, %call16(__getf2)
 
 define i32 @cmp_ge(fp128 %a, fp128 %b) {
 entry:
@@ -508,8 +512,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_eq:
-; CHECK: ld $25, %call16(__eqtf2)
+; ALL-LABEL: cmp_eq:
+; ALL: ld $25, %call16(__eqtf2)
 
 define i32 @cmp_eq(fp128 %a, fp128 %b) {
 entry:
@@ -518,8 +522,8 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: cmp_ne:
-; CHECK: ld $25, %call16(__netf2)
+; ALL-LABEL: cmp_ne:
+; ALL: ld $25, %call16(__netf2)
 
 define i32 @cmp_ne(fp128 %a, fp128 %b) {
 entry:
@@ -528,10 +532,10 @@ entry:
   ret i32 %conv
 }
 
-; CHECK-LABEL: load_LD_LD:
-; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld $2, 0($[[R0]])
-; CHECK: ld $4, 8($[[R0]])
+; ALL-LABEL: load_LD_LD:
+; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld $2, 0($[[R0]])
+; ALL: ld $4, 8($[[R0]])
 
 define fp128 @load_LD_LD() {
 entry:
@@ -539,11 +543,11 @@ entry:
   ret fp128 %0
 }
 
-; CHECK-LABEL: load_LD_float:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gf1)
-; CHECK: lw   $4, 0($[[R0]])
-; CHECK: ld   $25, %call16(__extendsftf2)
-; CHECK: jalr $25
+; ALL-LABEL: load_LD_float:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gf1)
+; ALL: lw   $4, 0($[[R0]])
+; ALL: ld   $25, %call16(__extendsftf2)
+; ALL: jalr $25
 
 define fp128 @load_LD_float() {
 entry:
@@ -552,11 +556,11 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: load_LD_double:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gd1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $25, %call16(__extenddftf2)
-; CHECK: jalr $25
+; ALL-LABEL: load_LD_double:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gd1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $25, %call16(__extenddftf2)
+; ALL: jalr $25
 
 define fp128 @load_LD_double() {
 entry:
@@ -565,13 +569,13 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK-LABEL: store_LD_LD:
-; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK: ld $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: ld $[[R3:[0-9]+]], %got_disp(gld0)
-; CHECK: sd $[[R2]], 8($[[R3]])
-; CHECK: sd $[[R1]], 0($[[R3]])
+; ALL-LABEL: store_LD_LD:
+; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld $[[R1:[0-9]+]], 0($[[R0]])
+; ALL: ld $[[R2:[0-9]+]], 8($[[R0]])
+; ALL: ld $[[R3:[0-9]+]], %got_disp(gld0)
+; ALL: sd $[[R2]], 8($[[R3]])
+; ALL: sd $[[R1]], 0($[[R3]])
 
 define void @store_LD_LD() {
 entry:
@@ -580,14 +584,14 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: store_LD_float:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $5, 8($[[R0]])
-; CHECK: ld   $25, %call16(__trunctfsf2)
-; CHECK: jalr $25
-; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gf1)
-; CHECK: sw   $2, 0($[[R1]])
+; ALL-LABEL: store_LD_float:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $5, 8($[[R0]])
+; ALL: ld   $25, %call16(__trunctfsf2)
+; ALL: jalr $25
+; ALL: ld   $[[R1:[0-9]+]], %got_disp(gf1)
+; ALL: sw   $2, 0($[[R1]])
 
 define void @store_LD_float() {
 entry:
@@ -597,14 +601,14 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: store_LD_double:
-; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld   $4, 0($[[R0]])
-; CHECK: ld   $5, 8($[[R0]])
-; CHECK: ld   $25, %call16(__trunctfdf2)
-; CHECK: jalr $25
-; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gd1)
-; CHECK: sd   $2, 0($[[R1]])
+; ALL-LABEL: store_LD_double:
+; ALL: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; ALL: ld   $4, 0($[[R0]])
+; ALL: ld   $5, 8($[[R0]])
+; ALL: ld   $25, %call16(__trunctfdf2)
+; ALL: jalr $25
+; ALL: ld   $[[R1:[0-9]+]], %got_disp(gd1)
+; ALL: sd   $2, 0($[[R1]])
 
 define void @store_LD_double() {
 entry:
@@ -614,11 +618,22 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: select_LD:
-; CHECK: movn $8, $6, $4
-; CHECK: movn $9, $7, $4
-; CHECK: move $2, $8
-; CHECK: move $4, $9
+; ALL-LABEL: select_LD:
+; C_CC_FMT:      movn $8, $6, $4
+; C_CC_FMT:      movn $9, $7, $4
+; C_CC_FMT:      move $2, $8
+; C_CC_FMT:      move $4, $9
+
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; CMP_CC_FMT-DAG: sll $[[CC:[0-9]+]], $4, 0
+; CMP_CC_FMT-DAG: seleqz $[[EQ1:[0-9]+]], $8, $[[CC]]
+; CMP_CC_FMT-DAG: selnez $[[NE1:[0-9]+]], $6, $[[CC]]
+; CMP_CC_FMT-DAG: or $2, $[[NE1]], $[[EQ1]]
+; CMP_CC_FMT-DAG: seleqz $[[EQ2:[0-9]+]], $9, $[[CC]]
+; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
+; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]
 
 define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
 entry:
@@ -627,18 +642,27 @@ entry:
   ret fp128 %cond
 }
 
-; CHECK-LABEL: selectCC_LD:
-; CHECK: move $[[R0:[0-9]+]], $11
-; CHECK: move $[[R1:[0-9]+]], $10
-; CHECK: move $[[R2:[0-9]+]], $9
-; CHECK: move $[[R3:[0-9]+]], $8
-; CHECK: ld   $25, %call16(__gttf2)($gp)
-; CHECK: jalr $25
-; CHECK: slti $1, $2, 1
-; CHECK: movz $[[R1]], $[[R3]], $1
-; CHECK: movz $[[R0]], $[[R2]], $1
-; CHECK: move $2, $[[R1]]
-; CHECK: move $4, $[[R0]]
+; ALL-LABEL: selectCC_LD:
+; ALL:           move $[[R0:[0-9]+]], $11
+; ALL:           move $[[R1:[0-9]+]], $10
+; ALL:           move $[[R2:[0-9]+]], $9
+; ALL:           move $[[R3:[0-9]+]], $8
+; ALL:           ld   $25, %call16(__gttf2)($gp)
+; ALL:           jalr $25
+
+; C_CC_FMT:      slti $[[CC:[0-9]+]], $2, 1
+; C_CC_FMT:      movz $[[R1]], $[[R3]], $[[CC]]
+; C_CC_FMT:      movz $[[R0]], $[[R2]], $[[CC]]
+; C_CC_FMT:      move $2, $[[R1]]
+; C_CC_FMT:      move $4, $[[R0]]
+
+; CMP_CC_FMT:    slt $[[CC:[0-9]+]], $zero, $2
+; CMP_CC_FMT:    seleqz $[[EQ1:[0-9]+]], $[[R1]], $[[CC]]
+; CMP_CC_FMT:    selnez $[[NE1:[0-9]+]], $[[R3]], $[[CC]]
+; CMP_CC_FMT:    or $2, $[[NE1]], $[[EQ1]]
+; CMP_CC_FMT:    seleqz $[[EQ2:[0-9]+]], $[[R0]], $[[CC]]
+; CMP_CC_FMT:    selnez $[[NE2:[0-9]+]], $[[R2]], $[[CC]]
+; CMP_CC_FMT:    or $4, $[[NE2]], $[[EQ2]]
 
 define fp128 @selectCC_LD(fp128 %a, fp128 %b, fp128 %c, fp128 %d) {
 entry:
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
deleted file mode 100644
index bbdc05c..0000000
--- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
+++ /dev/null
@@ -1,110 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
-
-%struct.S = type <{ [4 x float] }>
-%struct.S2 = type <{ [4 x double] }>
-%struct.S3 = type <{ i8, float }>
-
-@s = external global [4 x %struct.S]
-@gf = external global float
-@gd = external global double
-@s2 = external global [4 x %struct.S2]
-@s3 = external global %struct.S3
-
-define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
-entry:
-; CHECK: lwxc1
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
-  %0 = load float* %arrayidx, align 4
-  ret float %0
-}
-
-define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
-entry:
-; CHECK: ldxc1
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
-  ret double %0
-}
-
-define float @foo2(i32 %b, i32 %c) nounwind readonly {
-entry:
-; CHECK-NOT: luxc1
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  %0 = load float* %arrayidx2, align 1
-  ret float %0
-}
-
-define void @foo3(float* nocapture %b, i32 %o) nounwind {
-entry:
-; CHECK: swxc1
-  %0 = load float* @gf, align 4
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
-  store float %0, float* %arrayidx, align 4
-  ret void
-}
-
-define void @foo4(double* nocapture %b, i32 %o) nounwind {
-entry:
-; CHECK: sdxc1
-  %0 = load double* @gd, align 8
-  %idxprom = zext i32 %o to i64
-  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
-  store double %0, double* %arrayidx, align 8
-  ret void
-}
-
-define void @foo5(i32 %b, i32 %c) nounwind {
-entry:
-; CHECK-NOT: suxc1
-  %0 = load float* @gf, align 4
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  store float %0, float* %arrayidx2, align 1
-  ret void
-}
-
-define double @foo6(i32 %b, i32 %c) nounwind readonly {
-entry:
-; CHECK: foo6
-; CHECK-NOT: luxc1
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx2, align 1
-  ret double %0
-}
-
-define void @foo7(i32 %b, i32 %c) nounwind {
-entry:
-; CHECK: foo7
-; CHECK-NOT: suxc1
-  %0 = load double* @gd, align 8
-  %idxprom = zext i32 %c to i64
-  %idxprom1 = zext i32 %b to i64
-  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
-  store double %0, double* %arrayidx2, align 1
-  ret void
-}
-
-define float @foo8() nounwind readonly {
-entry:
-; CHECK: foo8
-; CHECK-NOT: luxc1
-  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
-  ret float %0
-}
-
-define void @foo9(float %f) nounwind {
-entry:
-; CHECK: foo9
-; CHECK-NOT: suxc1
-  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
-  ret void
-}
-
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
deleted file mode 100644
index 252f323..0000000
--- a/test/CodeGen/Mips/mips64countleading.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS4 %s
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS64 %s
-
-define i64 @t1(i64 %X) nounwind readnone {
-entry:
-; CHECK-LABEL: t1:
-; MIPS4-NOT: dclz
-; MIPS64: dclz
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
-  ret i64 %tmp1
-}
-
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-
-define i64 @t3(i64 %X) nounwind readnone {
-entry:
-; CHECK-LABEL: t3:
-; MIPS4-NOT: dclo
-; MIPS64: dclo
-  %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
-  ret i64 %tmp1
-}
-
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index 58f11f1..ed617be 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,99 +1,128 @@
-; RUN: llc -march=mips64el -mcpu=mips4 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS4 %s
-; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS64 %s
+; RUN: llc -march=mips64el -mcpu=mips4 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS4 -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=ACCMULDIV %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HAS-DCLO -check-prefix=GPRMULDIV %s
 
 @gll0 = common global i64 0, align 8
 @gll1 = common global i64 0, align 8
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: daddu
+; ALL-LABEL: f0:
+; ALL:           daddu $2, ${{[45]}}, ${{[45]}}
   %add = add nsw i64 %a1, %a0
   ret i64 %add
 }
 
 define i64 @f1(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: dsubu
+; ALL-LABEL: f1:
+; ALL:           dsubu $2, $4, $5
   %sub = sub nsw i64 %a0, %a1
   ret i64 %sub
 }
 
 define i64 @f4(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: and
+; ALL-LABEL: f4:
+; ALL:           and $2, ${{[45]}}, ${{[45]}}
   %and = and i64 %a1, %a0
   ret i64 %and
 }
 
 define i64 @f5(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: or
+; ALL-LABEL: f5:
+; ALL:           or $2, ${{[45]}}, ${{[45]}}
   %or = or i64 %a1, %a0
   ret i64 %or
 }
 
 define i64 @f6(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: xor
+; ALL-LABEL: f6:
+; ALL:           xor $2, ${{[45]}}, ${{[45]}}
   %xor = xor i64 %a1, %a0
   ret i64 %xor
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f7:
+; ALL:           daddiu $2, $4, 20
   %add = add nsw i64 %a0, 20
   ret i64 %add
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, -20
+; ALL-LABEL: f8:
+; ALL:           daddiu $2, $4, -20
   %sub = add nsw i64 %a0, -20
   ret i64 %sub
 }
 
 define i64 @f9(i64 %a0) nounwind readnone {
 entry:
-; CHECK: andi ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f9:
+; ALL:           andi $2, $4, 20
   %and = and i64 %a0, 20
   ret i64 %and
 }
 
 define i64 @f10(i64 %a0) nounwind readnone {
 entry:
-; CHECK: ori ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f10:
+; ALL:           ori $2, $4, 20
   %or = or i64 %a0, 20
   ret i64 %or
 }
 
 define i64 @f11(i64 %a0) nounwind readnone {
 entry:
-; CHECK: xori ${{[0-9]+}}, ${{[0-9]+}}, 20
+; ALL-LABEL: f11:
+; ALL:           xori $2, $4, 20
   %xor = xor i64 %a0, 20
   ret i64 %xor
 }
 
 define i64 @f12(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: mult
+; ALL-LABEL: f12:
+
+; ACCMULDIV:     mult ${{[45]}}, ${{[45]}}
+; GPRMULDIV:     dmul $2, ${{[45]}}, ${{[45]}}
+
   %mul = mul nsw i64 %b, %a
   ret i64 %mul
 }
 
 define i64 @f13(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: mult
+; ALL-LABEL: f13:
+
+; ACCMULDIV:     mult ${{[45]}}, ${{[45]}}
+; GPRMULDIV:     dmul $2, ${{[45]}}, ${{[45]}}
+
   %mul = mul i64 %b, %a
   ret i64 %mul
 }
 
 define i64 @f14(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f14:
-; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mflo
+; ALL-LABEL: f14:
+; ALL-DAG:       ld $[[P0:[0-9]+]], %got_disp(gll0)(
+; ALL-DAG:       ld $[[P1:[0-9]+]], %got_disp(gll1)(
+; ALL-DAG:       ld $[[T0:[0-9]+]], 0($[[P0]])
+; ALL-DAG:       ld $[[T1:[0-9]+]], 0($[[P1]])
+
+; ACCMULDIV:     ddiv $zero, $[[T0]], $[[T1]]
+; ACCMULDIV:     teq $[[T1]], $zero, 7
+; ACCMULDIV:     mflo $2
+
+; GPRMULDIV:     ddiv $2, $[[T0]], $[[T1]]
+; GPRMULDIV:     teq $[[T1]], $zero, 7
+
   %0 = load i64* @gll0, align 8
   %1 = load i64* @gll1, align 8
   %div = sdiv i64 %0, %1
@@ -102,10 +131,19 @@ entry:
 
 define i64 @f15() nounwind readnone {
 entry:
-; CHECK-LABEL: f15:
-; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mflo
+; ALL-LABEL: f15:
+; ALL-DAG:       ld $[[P0:[0-9]+]], %got_disp(gll0)(
+; ALL-DAG:       ld $[[P1:[0-9]+]], %got_disp(gll1)(
+; ALL-DAG:       ld $[[T0:[0-9]+]], 0($[[P0]])
+; ALL-DAG:       ld $[[T1:[0-9]+]], 0($[[P1]])
+
+; ACCMULDIV:     ddivu $zero, $[[T0]], $[[T1]]
+; ACCMULDIV:     teq $[[T1]], $zero, 7
+; ACCMULDIV:     mflo $2
+
+; GPRMULDIV:     ddivu $2, $[[T0]], $[[T1]]
+; GPRMULDIV:     teq $[[T1]], $zero, 7
+
   %0 = load i64* @gll0, align 8
   %1 = load i64* @gll1, align 8
   %div = udiv i64 %0, %1
@@ -114,20 +152,30 @@ entry:
 
 define i64 @f16(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f16:
-; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mfhi
+; ALL-LABEL: f16:
+
+; ACCMULDIV:     ddiv $zero, $4, $5
+; ACCMULDIV:     teq $5, $zero, 7
+; ACCMULDIV:     mfhi $2
+
+; GPRMULDIV:     dmod $2, $4, $5
+; GPRMULDIV:     teq $5, $zero, 7
+
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
 
 define i64 @f17(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f17:
-; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
-; CHECK: teq $[[R0]], $zero, 7
-; CHECK: mfhi
+; ALL-LABEL: f17:
+
+; ACCMULDIV:     ddivu $zero, $4, $5
+; ACCMULDIV:     teq $5, $zero, 7
+; ACCMULDIV:     mfhi $2
+
+; GPRMULDIV:     dmodu $2, $4, $5
+; GPRMULDIV:     teq $5, $zero, 7
+
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -136,24 +184,26 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
-; CHECK-LABEL: f18:
+; ALL-LABEL: f18:
 
 ; The MIPS4 version is too long to reasonably test. At least check we don't get dclz
-; MIPS4-NOT: dclz
+; MIPS4-NOT:     dclz
+
+; HAS-DCLO:      dclz $2, $4
 
-; MIPS64: dclz $2, $4
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
 define i64 @f19(i64 %X) nounwind readnone {
 entry:
-; CHECK-LABEL: f19:
+; ALL-LABEL: f19:
 
 ; The MIPS4 version is too long to reasonably test. At least check we don't get dclo
-; MIPS4-NOT: dclo
+; MIPS4-NOT:     dclo
+
+; HAS-DCLO:      dclo $2, $4
 
-; MIPS64: dclo $2, $4
   %neg = xor i64 %X, -1
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
@@ -161,8 +211,8 @@ entry:
 
 define i64 @f20(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK-LABEL: f20:
-; CHECK: nor
+; ALL-LABEL: f20:
+; ALL:           nor $2, ${{[45]}}, ${{[45]}}
   %or = or i64 %b, %a
   %neg = xor i64 %or, -1
   ret i64 %neg
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
index 39c73e9..32d05a9 100644
--- a/test/CodeGen/Mips/mips64muldiv.ll
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -1,50 +1,79 @@
-; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ACC
+; RUN: llc -march=mips64el -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR
+
+; FileCheck prefixes:
+;   ALL - All targets
+;   ACC - Targets with accumulator based mul/div (i.e. pre-MIPS32r6)
+;   GPR - Targets with register based mul/div (i.e. MIPS32r6)
 
 define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: dmult
-; CHECK: mflo
+; ALL-LABEL: m0:
+; ACC:           dmult ${{[45]}}, ${{[45]}}
+; ACC:           mflo $2
+; GPR:           dmul $2, ${{[45]}}, ${{[45]}}
   %mul = mul i64 %a1, %a0
   ret i64 %mul
 }
 
 define i64 @m1(i64 %a) nounwind readnone {
 entry:
-; CHECK: dmult
-; CHECK: mfhi
+; ALL-LABEL: m1:
+; ALL:           lui $[[T0:[0-9]+]], 21845
+; ALL:           addiu $[[T0]], $[[T0]], 21845
+; ALL:           dsll $[[T0]], $[[T0]], 16
+; ALL:           addiu $[[T0]], $[[T0]], 21845
+; ALL:           dsll $[[T0]], $[[T0]], 16
+; ALL:           addiu $[[T0]], $[[T0]], 21846
+
+; ACC:           dmult $4, $[[T0]]
+; ACC:           mfhi $[[T1:[0-9]+]]
+; GPR:           dmuh $[[T1:[0-9]+]], $4, $[[T0]]
+
+; ALL:           dsrl $2, $[[T1]], 63
+; ALL:           daddu $2, $[[T1]], $2
   %div = sdiv i64 %a, 3
   ret i64 %div
 }
 
 define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddivu
-; CHECK: mflo
+; ALL-LABEL: d0:
+; ACC:           ddivu $zero, $4, $5
+; ACC:           mflo $2
+; GPR:           ddivu $2, $4, $5
   %div = udiv i64 %a0, %a1
   ret i64 %div
 }
 
 define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddiv
-; CHECK: mflo
+; ALL-LABEL: d1:
+; ACC:           ddiv $zero, $4, $5
+; ACC:           mflo $2
+; GPR:           ddiv $2, $4, $5
   %div = sdiv i64 %a0, %a1
   ret i64 %div
 }
 
 define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddivu
-; CHECK: mfhi
+; ALL-LABEL: d2:
+; ACC:           ddivu $zero, $4, $5
+; ACC:           mfhi $2
+; GPR:           dmodu $2, $4, $5
   %rem = urem i64 %a0, %a1
   ret i64 %rem
 }
 
 define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
 entry:
-; CHECK: ddiv
-; CHECK: mfhi
+; ALL-LABEL: d3:
+; ACC:           ddiv $zero, $4, $5
+; ACC:           mfhi $2
+; GPR:           dmod $2, $4, $5
   %rem = srem i64 %a0, %a1
   ret i64 %rem
 }
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
index f4854f8..244b03d 100644
--- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -1,33 +1,113 @@
-; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
-; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
-; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=LE-STATIC
-; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=BE-PIC
+; Check that [sl]dc1 are normally emitted. MIPS32r2 should have [sl]dxc1 too.
+; RUN: llc -march=mipsel -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1-LDC1
 ; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
-; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2-LDXC1
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6-LDC1
+
+; Check that -mno-ldc1-sdc1 disables [sl]dc1
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-LE -check-prefix=32R1-LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-LE -check-prefix=32R2-LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-LE -check-prefix=32R6-LE-PIC
+
+; Check again for big-endian
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-BE -check-prefix=32R1-BE-PIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-BE -check-prefix=32R2-BE-PIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-BE -check-prefix=32R6-BE-PIC
+
+; Check again for the static relocation model
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32   < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R1 \
+; RUN:             -check-prefix=32R1-LE -check-prefix=32R1-LE-STATIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r2 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R2 \
+; RUN:             -check-prefix=32R2-LE -check-prefix=32R2-LE-STATIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 \
+; RUN:   -mcpu=mips32r6 < %s | \
+; RUN:   FileCheck %s -check-prefix=ALL -check-prefix=32R6 \
+; RUN:             -check-prefix=32R6-LE -check-prefix=32R6-LE-STATIC
 
 @g0 = common global double 0.000000e+00, align 8
 
-; LE-PIC-LABEL: test_ldc1:
-; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; LE-PIC-DAG: mtc1 $[[R0]], $f0
-; LE-PIC-DAG: mtc1 $[[R1]], $f1
-; LE-STATIC-LABEL: test_ldc1:
-; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
-; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
-; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
-; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
-; LE-STATIC-DAG: mtc1 $[[R1]], $f0
-; LE-STATIC-DAG: mtc1 $[[R3]], $f1
-; BE-PIC-LABEL: test_ldc1:
-; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; BE-PIC-DAG: mtc1 $[[R1]], $f0
-; BE-PIC-DAG: mtc1 $[[R0]], $f1
-; CHECK-LDC1-SDC1-LABEL: test_ldc1:
-; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
+; ALL-LABEL: test_ldc1:
+
+; 32R1-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R1-LE-PIC-DAG:    mtc1 $[[R1]], $f1
+
+; 32R2-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R2-LE-PIC-DAG:    mthc1 $[[R1]], $f0
+
+; 32R6-LE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    mtc1 $[[R0]], $f0
+; 32R6-LE-PIC-DAG:    mthc1 $[[R1]], $f0
+
+; 32R1-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R1-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R1-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R1-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R1-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R1-LE-STATIC-DAG: mtc1 $[[R3]], $f1
+
+; 32R2-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R2-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R2-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R2-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R2-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R2-LE-STATIC-DAG: mthc1 $[[R3]], $f0
+
+; 32R6-LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; 32R6-LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; 32R6-LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; 32R6-LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; 32R6-LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; 32R6-LE-STATIC-DAG: mthc1 $[[R3]], $f0
+
+; 32R1-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R1-BE-PIC-DAG:    mtc1 $[[R0]], $f1
+
+; 32R2-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R2-BE-PIC-DAG:    mthc1 $[[R0]], $f0
+
+; 32R6-BE-PIC-DAG:    lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    mtc1 $[[R1]], $f0
+; 32R6-BE-PIC-DAG:    mthc1 $[[R0]], $f0
+
+; 32R1-LDC1:          ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:         ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R6-LDC1:          ldc1 $f0, 0(${{[0-9]+}})
 
 define double @test_ldc1() {
 entry:
@@ -35,25 +115,64 @@ entry:
   ret double %0
 }
 
-; LE-PIC-LABEL: test_sdc1:
-; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
-; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
-; LE-STATIC-LABEL: test_sdc1:
-; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
-; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
-; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
-; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
-; BE-PIC-LABEL: test_sdc1:
-; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
-; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
-; CHECK-LDC1-SDC1-LABEL: test_sdc1:
-; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
+; ALL-LABEL: test_sdc1:
+
+; 32R1-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-LE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R1-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R2-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-LE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R2-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R2-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R6-LE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-LE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-LE-PIC-DAG:    sw $[[R0]], 0(${{[0-9]+}})
+; 32R6-LE-PIC-DAG:    sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R1-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R1-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R1-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R1-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R2-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; 32R2-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R2-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R2-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R2-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R6-LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-LE-STATIC-DAG: mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; 32R6-LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; 32R6-LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; 32R6-LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+
+; 32R1-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-BE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R1-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R2-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-BE-PIC-DAG:    mfc1 $[[R1:[0-9]+]], $f13
+; 32R2-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R2-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R6-BE-PIC-DAG:    mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-BE-PIC-DAG:    mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-BE-PIC-DAG:    sw $[[R1]], 0(${{[0-9]+}})
+; 32R6-BE-PIC-DAG:    sw $[[R0]], 4(${{[0-9]+}})
+
+; 32R1-LDC1:          sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:         sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R6-LDC1:          sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
 
 define void @test_sdc1(double %a) {
 entry:
@@ -61,14 +180,35 @@ entry:
   ret void
 }
 
+; ALL-LABEL: test_ldxc1:
+
+; 32R1-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R1-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R1-DAG:      mtc1 $[[R0]], $f0
+; 32R1-DAG:      mtc1 $[[R1]], $f1
+
+; 32R2-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R2-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R2-DAG:      mtc1 $[[R0]], $f0
+; 32R2-DAG:      mthc1 $[[R1]], $f0
 
-; LE-PIC-LABEL: test_ldxc1:
-; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
-; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
-; LE-PIC-DAG: mtc1 $[[R0]], $f0
-; LE-PIC-DAG: mtc1 $[[R1]], $f1
-; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
-; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
+; 32R6-LE-DAG:   lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-LE-DAG:   lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-DAG:   lw $[[R0:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-BE-DAG:   lw $[[R1:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      mtc1 $[[R0]], $f0
+; 32R6-DAG:      mthc1 $[[R1]], $f0
+
+; 32R1-LDC1:     ldc1 $f0, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:    sll $[[OFFSET:[0-9]+]], $5, 3
+; 32R2-LDXC1:    ldxc1 $f0, $[[OFFSET]]($4)
+
+; 32R6-LDC1:     ldc1 $f0, 0(${{[0-9]+}})
 
 define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
 entry:
@@ -77,13 +217,29 @@ entry:
   ret double %0
 }
 
-; LE-PIC-LABEL: test_sdxc1:
-; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
-; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
-; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
-; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
-; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
-; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
+; ALL-LABEL: test_sdxc1:
+
+; 32R1-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R1-DAG:      mfc1 $[[R1:[0-9]+]], $f13
+; 32R1-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R1-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R2-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R2-DAG:      mfc1 $[[R1:[0-9]+]], $f13
+; 32R2-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R2-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R6-DAG:      mfc1 $[[R0:[0-9]+]], $f12
+; 32R6-DAG:      mfhc1 $[[R1:[0-9]+]], $f12
+; 32R6-DAG:      sw $[[R0]], 0(${{[0-9]+}})
+; 32R6-DAG:      sw $[[R1]], 4(${{[0-9]+}})
+
+; 32R1-LDC1:     sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
+
+; 32R2-LDXC1:    sll $[[OFFSET:[0-9]+]], $7, 3
+; 32R2-LDXC1:    sdxc1 $f{{[0-9]+}}, $[[OFFSET]]($6)
+
+; 32R6-LDC1:     sdc1 $f{{[0-9]+}}, 0(${{[0-9]+}})
 
 define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
 entry:
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
index f65a14f..b9badf5 100644
--- a/test/CodeGen/Mips/msa/special.ll
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -4,6 +4,10 @@
 ; RUN:   FileCheck %s --check-prefix=MIPS32
 ; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
 ; RUN:   FileCheck %s --check-prefix=MIPS64
+; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS64
 
 define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
 entry:
diff --git a/test/CodeGen/Mips/no-odd-spreg.ll b/test/CodeGen/Mips/no-odd-spreg.ll
new file mode 100644
index 0000000..b42ed6a
--- /dev/null
+++ b/test/CodeGen/Mips/no-odd-spreg.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+
+; ODDSPREG:       .module oddspreg
+; NOODDSPREG:     .module nooddspreg
+
+define float @two_floats(float %a) {
+entry:
+  ; Clobber all except $f12 and $f13
+  ;
+  ; The intention is that if odd single precision registers are permitted, the
+  ; allocator will choose $f12 and $f13 to avoid the spill/reload.
+  ;
+  ; On the other hand, if odd single precision registers are not permitted, it
+  ; will be forced to spill/reload either %a or %0.
+
+  %0 = fadd float %a, 1.0
+  call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  %1 = fadd float %a, %0
+  ret float %1
+}
+
+; ALL-LABEL:  two_floats:
+; ODDSPREG:       add.s $f13, $f12, ${{f[0-9]+}}
+; ODDSPREG-NOT:   swc1
+; ODDSPREG-NOT:   lwc1
+; ODDSPREG:       add.s $f0, $f12, $f13
+
+; NOODDSPREG:     add.s $[[T0:f[0-9]*[02468]]], $f12, ${{f[0-9]+}}
+; NOODDSPREG:     swc1 $[[T0]],
+; NOODDSPREG:     lwc1 $[[T1:f[0-9]*[02468]]],
+; NOODDSPREG:     add.s $f0, $f12, $[[T1]]
+
+define double @two_doubles(double %a) {
+entry:
+  ; Clobber all except $f12 and $f13
+  ;
+  ; -mno-odd-sp-reg doesn't need to affect double precision values so both cases
+  ; use $f12 and $f13.
+
+  %0 = fadd double %a, 1.0
+  call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  %1 = fadd double %a, %0
+  ret double %1
+}
+
+; ALL-LABEL: two_doubles:
+; ALL:           add.d $[[T0:f[0-9]+]], $f12, ${{f[0-9]+}}
+; ALL:           add.d $f0, $f12, $[[T0]]
+
+
+; INVALID: -mattr=+nooddspreg is not currently permitted for a 32-bit FPU register file (FR=0 mode).
diff --git a/test/CodeGen/Mips/null-streamer.ll b/test/CodeGen/Mips/null-streamer.ll
new file mode 100644
index 0000000..56cebbf
--- /dev/null
+++ b/test/CodeGen/Mips/null-streamer.ll
@@ -0,0 +1,7 @@
+; Test the null streamer with a terget streamer.
+; RUN: llc -O0 -filetype=null -mtriple=mips-linux < %s
+
+define i32 @main()  {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/prevent-hoisting.ll b/test/CodeGen/Mips/prevent-hoisting.ll
new file mode 100644
index 0000000..da665c2
--- /dev/null
+++ b/test/CodeGen/Mips/prevent-hoisting.ll
@@ -0,0 +1,144 @@
+; RUN: llc -march=mipsel -O3 < %s | FileCheck %s
+
+
+; MIPS direct branches implicitly define register $at. This test makes sure that
+; code hoisting optimization (which moves identical instructions at the start of
+; two basic blocks to the common predecessor block) takes this into account and
+; doesn't move definition of $at to the predecessor block (which would make $at
+; live-in at the start of successor block).
+
+
+; CHECK-LABEL: readLumaCoeff8x8_CABAC
+
+; The check for "addiu" instruction is added so that we can match the correct "b" instruction.
+; CHECK:           addiu ${{[0-9]+}}, $zero, -1
+; CHECK:           b $[[BB0:BB[0-9_]+]]
+
+; Check that sll instruction that writes to $1 starts basic block.
+; CHECK:       {{BB[0-9_#]+}}: 
+; CHECK-NEXT:      sll $1, $[[R0:[0-9]+]], 4
+
+; Check that identical sll instruction starts another basic block.
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:      sll $1, $[[R0]], 4
+
+
+%struct.img_par = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice*, %struct.macroblock*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i32, i32, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.img_par*, %struct.inp_par*)*, i32, i32, i32, i32 }
+%struct.datapartition = type { %struct.Bitstream*, %struct.DecodingEnvironment, i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)* }
+%struct.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+%struct.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+%struct.syntaxelement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.syntaxelement*, %struct.img_par*, %struct.DecodingEnvironment*)* }
+%struct.MotionInfoContexts = type { [4 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+%struct.BiContextType = type { i16, i8 }
+%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+%struct.inp_par = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.macroblock = type { i32, [2 x i32], i32, i32, %struct.macroblock*, %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.DecRefPicMarking_s = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s* }
+%struct.timeb = type { i32, i16, i16, i16 }
+
+@assignSE2partition = external global [0 x [20 x i32]]
+@FIELD_SCAN8x8 = external constant [64 x [2 x i8]]
+
+
+define void @readLumaCoeff8x8_CABAC(%struct.img_par* %img, i32 %b8) {
+
+  %1 = load i32* undef, align 4
+  br i1 false, label %2, label %3
+
+; <label>:2                                       ; preds = %0
+  br label %3
+
+; <label>:3                                       ; preds = %2, %0
+  br i1 undef, label %switch.lookup, label %4
+
+switch.lookup:                                    ; preds = %3
+  br label %4
+
+; <label>:4                                       ; preds = %switch.lookup, %3
+  br i1 undef, label %5, label %6
+
+; <label>:5                                       ; preds = %4
+  br label %6
+
+; <label>:6                                       ; preds = %5, %4
+  %7 = phi [2 x i8]* [ getelementptr inbounds ([64 x [2 x i8]]* @FIELD_SCAN8x8, i32 0, i32 0), %4 ], [ null, %5 ]
+  br i1 undef, label %switch.lookup6, label %8
+
+switch.lookup6:                                   ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %switch.lookup6, %6
+  br i1 undef, label %.loopexit, label %9
+
+; <label>:9                                       ; preds = %8
+  %10 = and i32 %b8, 1
+  %11 = shl nuw nsw i32 %10, 3
+  %12 = getelementptr inbounds %struct.Slice* null, i32 0, i32 9
+  br i1 undef, label %.preheader, label %.preheader11
+
+.preheader11:                                     ; preds = %21, %9
+  %k.014 = phi i32 [ %27, %21 ], [ 0, %9 ]
+  %coef_ctr.013 = phi i32 [ %23, %21 ], [ -1, %9 ]
+  br i1 false, label %13, label %14
+
+; <label>:13                                      ; preds = %.preheader11
+  br label %15
+
+; <label>:14                                      ; preds = %.preheader11
+  br label %15
+
+; <label>:15                                      ; preds = %14, %13
+  %16 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+  %17 = load i32* %16, align 4
+  %18 = getelementptr inbounds %struct.datapartition* null, i32 %17, i32 2
+  %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
+  %20 = call i32 %19(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* undef)
+  br i1 false, label %.loopexit, label %21
+
+; <label>:21                                      ; preds = %15
+  %22 = add i32 %coef_ctr.013, 1
+  %23 = add i32 %22, 0
+  %24 = getelementptr inbounds [2 x i8]* %7, i32 %23, i32 0
+  %25 = add nsw i32 0, %11
+  %26 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %25
+  store i32 0, i32* %26, align 4
+  %27 = add nsw i32 %k.014, 1
+  %28 = icmp slt i32 %27, 65
+  br i1 %28, label %.preheader11, label %.loopexit
+
+.preheader:                                       ; preds = %36, %9
+  %k.110 = phi i32 [ %45, %36 ], [ 0, %9 ]
+  %coef_ctr.29 = phi i32 [ %39, %36 ], [ -1, %9 ]
+  br i1 false, label %29, label %30
+
+; <label>:29                                      ; preds = %.preheader
+  br label %31
+
+; <label>:30                                      ; preds = %.preheader
+  br label %31
+
+; <label>:31                                      ; preds = %30, %29
+  %32 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+  %33 = load i32* %32, align 4
+  %34 = getelementptr inbounds %struct.datapartition* null, i32 %33
+  %35 = call i32 undef(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* %34)
+  br i1 false, label %.loopexit, label %36
+
+; <label>:36                                      ; preds = %31
+  %37 = load i32* undef, align 4
+  %38 = add i32 %coef_ctr.29, 1
+  %39 = add i32 %38, %37
+  %40 = getelementptr inbounds [2 x i8]* %7, i32 %39, i32 0
+  %41 = load i8* %40, align 1
+  %42 = zext i8 %41 to i32
+  %43 = add nsw i32 %42, %11
+  %44 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %43
+  store i32 0, i32* %44, align 4
+  %45 = add nsw i32 %k.110, 1
+  %46 = icmp slt i32 %45, 65
+  br i1 %46, label %.preheader, label %.loopexit
+
+.loopexit:                                        ; preds = %36, %31, %21, %15, %8
+  ret void
+}
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index 06e2a86..eb2198b 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -1,135 +1,705 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -march=mipsel   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32R2
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc < %s -march=mips64el -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64R6
 
 @d2 = external global double
 @d3 = external global double
 
-define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
 entry:
-; CHECK: movn
+; ALL-LABEL: i32_icmp_ne_i32_val:
+
+; 32:            movn $5, $6, $4
+; 32:            move $2, $5
+
+; 32R2:          movn $5, $6, $4
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 32R6:          or $2, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, i32 %f1, i32 %f0
   ret i32 %cond
 }
 
-define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+entry:
+; ALL-LABEL: i32_icmp_ne_i64_val:
+
+; 32-DAG:        lw $[[F1:[0-9]+]], 16($sp)
+; 32-DAG:        movn $6, $[[F1]], $4
+; 32-DAG:        lw $[[F1H:[0-9]+]], 20($sp)
+; 32:            movn $7, $[[F1H]], $4
+; 32:            move $2, $6
+; 32:            move $3, $7
+
+; 32R2-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R2-DAG:      movn $6, $[[F1]], $4
+; 32R2-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R2:          movn $7, $[[F1H]], $4
+; 32R2:          move $2, $6
+; 32R2:          move $3, $7
+
+; 32R6-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $6, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1]], $4
+; 32R6:          or $2, $[[T1]], $[[T0]]
+; 32R6-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $7, $4
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1H]], $4
+; 32R6:          or $3, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; FIXME: This sll works around an implementation detail in the code generator
+;        (setcc's result is i32 so bits 32-63 are undefined). It's not really
+;        needed.
+; 64R6-DAG:      sll $[[CC:[0-9]+]], $4, 0
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $[[CC]]
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $[[CC]]
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, i64 %f1, i64 %f0
+  ret i64 %cond
+}
+
+define i64 @i64_icmp_ne_i64_val(i64 %s, i64 %f0, i64 %f1) nounwind readnone {
 entry:
-; CHECK: movn.s
+; ALL-LABEL: i64_icmp_ne_i64_val:
+
+; 32-DAG:        or $[[CC:[0-9]+]], $4
+; 32-DAG:        lw $[[F1:[0-9]+]], 16($sp)
+; 32-DAG:        movn $6, $[[F1]], $[[CC]]
+; 32-DAG:        lw $[[F1H:[0-9]+]], 20($sp)
+; 32:            movn $7, $[[F1H]], $[[CC]]
+; 32:            move $2, $6
+; 32:            move $3, $7
+
+; 32R2-DAG:      or $[[CC:[0-9]+]], $4
+; 32R2-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R2-DAG:      movn $6, $[[F1]], $[[CC]]
+; 32R2-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R2:          movn $7, $[[F1H]], $[[CC]]
+; 32R2:          move $2, $6
+; 32R2:          move $3, $7
+
+; 32R6-DAG:      lw $[[F1:[0-9]+]], 16($sp)
+; 32R6-DAG:      or $[[T2:[0-9]+]], $4, $5
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $6, $[[T2]]
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1]], $[[T2]]
+; 32R6:          or $2, $[[T1]], $[[T0]]
+; 32R6-DAG:      lw $[[F1H:[0-9]+]], 20($sp)
+; 32R6-DAG:      seleqz $[[T0:[0-9]+]], $7, $[[T2]]
+; 32R6-DAG:      selnez $[[T1:[0-9]+]], $[[F1H]], $[[T2]]
+; 32R6:          or $3, $[[T1]], $[[T0]]
+
+; 64:            movn $5, $6, $4
+; 64:            move $2, $5
+
+; 64R2:          movn $5, $6, $4
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      seleqz $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      selnez $[[T1:[0-9]+]], $6, $4
+; 64R6:          or $2, $[[T1]], $[[T0]]
+
+  %tobool = icmp ne i64 %s, 0
+  %cond = select i1 %tobool, i64 %f1, i64 %f0
+  ret i64 %cond
+}
+
+define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+entry:
+; ALL-LABEL: i32_icmp_ne_f32_val:
+
+; 32-DAG:        mtc1 $5, $[[F0:f[0-9]+]]
+; 32-DAG:        mtc1 $6, $[[F1:f0]]
+; 32:            movn.s $[[F1]], $[[F0]], $4
+
+; 32R2-DAG:      mtc1 $5, $[[F0:f[0-9]+]]
+; 32R2-DAG:      mtc1 $6, $[[F1:f0]]
+; 32R2:          movn.s $[[F1]], $[[F0]], $4
+
+; 32R6-DAG:      mtc1 $5, $[[F0:f[0-9]+]]
+; 32R6-DAG:      mtc1 $6, $[[F1:f[0-9]+]]
+; 32R6:          sltu $[[T0:[0-9]+]], $zero, $4
+; 32R6:          mtc1 $[[T0]], $[[CC:f0]]
+; 32R6:          sel.s $[[CC]], $[[F1]], $[[F0]]
+
+; 64:            movn.s $f14, $f13, $4
+; 64:            mov.s $f0, $f14
+
+; 64R2:          movn.s $f14, $f13, $4
+; 64R2:          mov.s $f0, $f14
+
+; 64R6:          sltu $[[T0:[0-9]+]], $zero, $4
+; 64R6:          mtc1 $[[T0]], $[[CC:f0]]
+; 64R6:          sel.s $[[CC]], $f14, $f13
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, float %f0, float %f1
   ret float %cond
 }
 
-define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
 entry:
-; CHECK: movn.d
+; ALL-LABEL: i32_icmp_ne_f64_val:
+
+; 32-DAG:        mtc1 $6, $[[F0:f[1-3]*[02468]+]]
+; 32-DAG:        mtc1 $7, $[[F0H:f[1-3]*[13579]+]]
+; 32-DAG:        ldc1 $[[F1:f0]], 16($sp)
+; 32:            movn.d $[[F1]], $[[F0]], $4
+
+; 32R2-DAG:      mtc1 $6, $[[F0:f[0-9]+]]
+; 32R2-DAG:      mthc1 $7, $[[F0]]
+; 32R2-DAG:      ldc1 $[[F1:f0]], 16($sp)
+; 32R2:          movn.d $[[F1]], $[[F0]], $4
+
+; 32R6-DAG:      mtc1 $6, $[[F0:f[0-9]+]]
+; 32R6-DAG:      mthc1 $7, $[[F0]]
+; 32R6-DAG:      sltu $[[T0:[0-9]+]], $zero, $4
+; 32R6-DAG:      mtc1 $[[T0]], $[[CC:f0]]
+; 32R6-DAG:      ldc1 $[[F1:f[0-9]+]], 16($sp)
+; 32R6:          sel.d $[[CC]], $[[F1]], $[[F0]]
+
+; 64:            movn.d $f14, $f13, $4
+; 64:            mov.d $f0, $f14
+
+; 64R2:          movn.d $f14, $f13, $4
+; 64R2:          mov.d $f0, $f14
+
+; 64R6-DAG:      sltu $[[T0:[0-9]+]], $zero, $4
+; 64R6-DAG:      mtc1 $[[T0]], $[[CC:f0]]
+; 64R6:          sel.d $[[CC]], $f14, $f13
+
   %tobool = icmp ne i32 %s, 0
   %cond = select i1 %tobool, double %f0, double %f1
   ret double %cond
 }
 
-define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_oeq_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.s
-; CHECK: movt.s
+; ALL-LABEL: f32_fcmp_oeq_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.eq.s $[[F2]], $[[F3]]
+; 32:            movt.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.eq.s $[[F2]], $[[F3]]
+; 32R2:          movt.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.eq.s $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.eq.s $f14, $f15
+; 64:            movt.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.eq.s $f14, $f15
+; 64R2:          movt.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.eq.s $[[CC:f0]], $f14, $f15
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp oeq float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_olt_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.s
-; CHECK: movt.s
+; ALL-LABEL: f32_fcmp_olt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.olt.s $[[F2]], $[[F3]]
+; 32:            movt.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.olt.s $[[F2]], $[[F3]]
+; 32R2:          movt.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.olt.s $f14, $f15
+; 64:            movt.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.olt.s $f14, $f15
+; 64R2:          movt.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f14, $f15
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp olt float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+define float @f32_fcmp_ogt_f32_val(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf.s
+; ALL-LABEL: f32_fcmp_ogt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f15, $f14
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
+define double @f32_fcmp_ogt_f64_val(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf.d
+; ALL-LABEL: f32_fcmp_ogt_f64_val:
+
+; 32-DAG:        lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      lwc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      lwc1 $[[F3:f[0-9]+]], 20($sp)
+; 32R6:          cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.s $[[CC:f0]], $f15, $f14
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_oeq_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.d
-; CHECK: movt.d
+; ALL-LABEL: f64_fcmp_oeq_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.eq.d $[[F2]], $[[F3]]
+; 32:            movt.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.eq.d $[[F2]], $[[F3]]
+; 32R2:          movt.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.eq.d $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.eq.d $f14, $f15
+; 64:            movt.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.eq.d $f14, $f15
+; 64R2:          movt.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.eq.d $[[CC:f0]], $f14, $f15
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp oeq double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_olt_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.d
-; CHECK: movt.d
+; ALL-LABEL: f64_fcmp_olt_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.olt.d $[[F2]], $[[F3]]
+; 32:            movt.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.olt.d $[[F2]], $[[F3]]
+; 32R2:          movt.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F2]], $[[F3]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.olt.d $f14, $f15
+; 64:            movt.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.olt.d $f14, $f15
+; 64R2:          movt.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f14, $f15
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp olt double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+define double @f64_fcmp_ogt_f64_val(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf.d
+; ALL-LABEL: f64_fcmp_ogt_f64_val:
+
+; 32-DAG:        ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32:            c.ule.d $[[F2]], $[[F3]]
+; 32:            movf.d $f14, $f12, $fcc0
+; 32:            mov.d $f0, $f14
+
+; 32R2-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R2:          c.ule.d $[[F2]], $[[F3]]
+; 32R2:          movf.d $f14, $f12, $fcc0
+; 32R2:          mov.d $f0, $f14
+
+; 32R6-DAG:      ldc1 $[[F2:f[0-9]+]], 16($sp)
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 24($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.d $[[CC]], $f14, $f12
+
+; 64:            c.ule.d $f14, $f15
+; 64:            movf.d $f13, $f12, $fcc0
+; 64:            mov.d $f0, $f13
+
+; 64R2:          c.ule.d $f14, $f15
+; 64R2:          movf.d $f13, $f12, $fcc0
+; 64R2:          mov.d $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f15, $f14
+; 64R6:          sel.d $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt double %f2, %f3
   %cond = select i1 %cmp, double %f0, double %f1
   ret double %cond
 }
 
-define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
+define float @f64_fcmp_ogt_f32_val(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf.s
+; ALL-LABEL: f64_fcmp_ogt_f32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[1-3]*[02468]+]]
+; 32-DAG:        mtc1 $7, $[[F2H:f[1-3]*[13579]+]]
+; 32-DAG:        ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32:            c.ule.d $[[F2]], $[[F3]]
+; 32:            movf.s $f14, $f12, $fcc0
+; 32:            mov.s $f0, $f14
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mthc1 $7, $[[F2]]
+; 32R2-DAG:      ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32R2:          c.ule.d $[[F2]], $[[F3]]
+; 32R2:          movf.s $f14, $f12, $fcc0
+; 32R2:          mov.s $f0, $f14
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mthc1 $7, $[[F2]]
+; 32R6-DAG:      ldc1 $[[F3:f[0-9]+]], 16($sp)
+; 32R6:          cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]]
+; 32R6:          sel.s $[[CC]], $f14, $f12
+
+; 64:            c.ule.d $f14, $f15
+; 64:            movf.s $f13, $f12, $fcc0
+; 64:            mov.s $f0, $f13
+
+; 64R2:          c.ule.d $f14, $f15
+; 64R2:          movf.s $f13, $f12, $fcc0
+; 64R2:          mov.s $f0, $f13
+
+; 64R6:          cmp.lt.d $[[CC:f0]], $f15, $f14
+; 64R6:          sel.s $[[CC]], $f13, $f12
+
   %cmp = fcmp ogt double %f2, %f3
   %cond = select i1 %cmp, float %f0, float %f1
   ret float %cond
 }
 
-define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.eq.s
-; CHECK: movt
+; ALL-LABEL: f32_fcmp_oeq_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.eq.s $[[F2]], $[[F3]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.eq.s $[[F2]], $[[F3]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.eq.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.eq.s $f14, $f15
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.eq.s $f14, $f15
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.eq.s $[[CC:f[0-9]+]], $f14, $f15
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %cmp = fcmp oeq float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.olt.s
-; CHECK: movt
+; ALL-LABEL: f32_fcmp_olt_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.olt.s $[[F2]], $[[F3]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.olt.s $[[F2]], $[[F3]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.olt.s $f14, $f15
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.olt.s $f14, $f15
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.lt.s $[[CC:f[0-9]+]], $f14, $f15
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
   %cmp = fcmp olt float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
 entry:
-; CHECK: c.ule.s
-; CHECK: movf
+; ALL-LABEL: f32_fcmp_ogt_i32_val:
+
+; 32-DAG:        mtc1 $6, $[[F2:f[0-9]+]]
+; 32-DAG:        mtc1 $7, $[[F3:f[0-9]+]]
+; 32:            c.ule.s $[[F2]], $[[F3]]
+; 32:            movf $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R2-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R2:          c.ule.s $[[F2]], $[[F3]]
+; 32R2:          movf $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      mtc1 $6, $[[F2:f[0-9]+]]
+; 32R6-DAG:      mtc1 $7, $[[F3:f[0-9]+]]
+; 32R6:          cmp.lt.s $[[CC:f[0-9]+]], $[[F3]], $[[F2]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64:            c.ule.s $f14, $f15
+; 64:            movf $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2:          c.ule.s $f14, $f15
+; 64R2:          movf $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6:          cmp.lt.s $[[CC:f[0-9]+]], $f15, $f14
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %cmp = fcmp ogt float %f2, %f3
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
 
-define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.eq.d
-; CHECK: movt
+; ALL-LABEL: f64_fcmp_oeq_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.eq.d $[[TMP]], $[[TMP1]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.eq.d $[[TMP]], $[[TMP1]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.eq.d $[[TMP]], $[[TMP1]]
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.eq.d $[[TMP]], $[[TMP1]]
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_oeq_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp oeq double %tmp, %tmp1
@@ -137,10 +707,76 @@ entry:
   ret i32 %cond
 }
 
-define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.olt.d
-; CHECK: movt
+; ALL-LABEL: f64_fcmp_olt_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.olt.d $[[TMP]], $[[TMP1]]
+; 32:            movt $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.olt.d $[[TMP]], $[[TMP1]]
+; 32R2:          movt $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.olt.d $[[TMP]], $[[TMP1]]
+; 64:            movt $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.olt.d $[[TMP]], $[[TMP1]]
+; 64R2:          movt $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_olt_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp olt double %tmp, %tmp1
@@ -148,10 +784,76 @@ entry:
   ret i32 %cond
 }
 
-define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
 entry:
-; CHECK: c.ule.d
-; CHECK: movf
+; ALL-LABEL: f64_fcmp_ogt_i32_val:
+
+; 32-DAG:        addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32-DAG:        addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32-DAG:        lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32-DAG:        lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32:            c.ule.d $[[TMP]], $[[TMP1]]
+; 32:            movf $5, $4, $fcc0
+; 32:            move $2, $5
+
+; 32R2-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R2-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R2-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R2-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R2:          c.ule.d $[[TMP]], $[[TMP1]]
+; 32R2:          movf $5, $4, $fcc0
+; 32R2:          move $2, $5
+
+; 32R6-DAG:      addiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(_gp_disp)
+; 32R6-DAG:      addu $[[GOT:[0-9]+]], $[[T0]], $25
+; 32R6-DAG:      lw $[[D2:[0-9]+]], %got(d2)($1)
+; 32R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 32R6-DAG:      lw $[[D3:[0-9]+]], %got(d3)($1)
+; 32R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 32R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]]
+; 32R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 32R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 32R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 32R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 32R6:          or $2, $[[NE]], $[[EQ]]
+
+; 64-DAG:        daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64-DAG:        daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64-DAG:        ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64-DAG:        ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64-DAG:        ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64-DAG:        ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64:            c.ule.d $[[TMP]], $[[TMP1]]
+; 64:            movf $5, $4, $fcc0
+; 64:            move $2, $5
+
+; 64R2-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64R2-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R2-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R2-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R2-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R2-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R2:          c.ule.d $[[TMP]], $[[TMP1]]
+; 64R2:          movf $5, $4, $fcc0
+; 64R2:          move $2, $5
+
+; 64R6-DAG:      daddiu $[[T0:[0-9]+]], ${{[0-9]+}}, %lo(%neg(%gp_rel(f64_fcmp_ogt_i32_val)))
+; 64R6-DAG:      daddu $[[GOT:[0-9]+]], $[[T0]], $25
+; 64R6-DAG:      ld $[[D2:[0-9]+]], %got_disp(d2)($1)
+; 64R6-DAG:      ldc1 $[[TMP:f[0-9]+]], 0($[[D2]])
+; 64R6-DAG:      ld $[[D3:[0-9]+]], %got_disp(d3)($1)
+; 64R6-DAG:      ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]])
+; 64R6:          cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]]
+; 64R6:          mfc1 $[[CCGPR:[0-9]+]], $[[CC]]
+; 64R6:          andi $[[CCGPR]], $[[CCGPR]], 1
+; 64R6:          seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]]
+; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
+; 64R6:          or $2, $[[NE]], $[[EQ]]
+
   %tmp = load double* @d2, align 8
   %tmp1 = load double* @d3, align 8
   %cmp = fcmp ogt double %tmp, %tmp1
diff --git a/test/CodeGen/Mips/selectcc.ll b/test/CodeGen/Mips/selectcc.ll
index aeef60e..9790a0a 100644
--- a/test/CodeGen/Mips/selectcc.ll
+++ b/test/CodeGen/Mips/selectcc.ll
@@ -1,5 +1,7 @@
-; RUN: llc -march=mipsel < %s
-; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
+; RUN: llc -march=mipsel -mcpu=mips32 < %s
+; RUN: llc -march=mipsel -mcpu=mips32 -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
 
 @gf0 = external global float
 @gf1 = external global float
@@ -16,13 +18,11 @@ entry:
 ; SOURCE-SCHED: lw
 ; SOURCE-SCHED: lui
 ; SOURCE-SCHED: sw
-; SOURCE-SCHED: addiu
-; SOURCE-SCHED: addiu
-; SOURCE-SCHED: c.olt.s
-; SOURCE-SCHED: movt
+; SOURCE-SCHED: lw
+; SOURCE-SCHED: lwc1
 ; SOURCE-SCHED: mtc1
+; SOURCE-SCHED: c.olt.s
 ; SOURCE-SCHED: jr
-
   store float 0.000000e+00, float* @gf0, align 4
   store float 1.000000e+00, float* @gf1, align 4
   %cmp = fcmp olt float %a, %b
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index 80fbe87..b61f84e 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s
 
 @foo = thread_local global i32 42
-@bar = hidden alias i32* @foo
+@bar = hidden thread_local alias i32* @foo
 
 define i32* @zed() {
 ; CHECK-DAG: __tls_get_addr
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index e0e93e2..a1b6cb0 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -1,21 +1,109 @@
-; RUN: llc < %s -march=mipsel | FileCheck %s
+; RUN: llc < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32R6
+; RUN: llc < %s -march=mipsel -mcpu=mips4    | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64   | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMOV
+; RUN: llc < %s -march=mipsel -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64R6
 
 @g1 = external global i32
 
-define i32 @foo0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
 entry:
-; CHECK:     movn ${{[0-9]+}}, $zero
+; ALL-LABEL: sel_icmp_nez_i32_z0:
+
+; 32-CMOV:       lw $2, 0(${{[0-9]+}})
+; 32-CMOV:       movn $2, $zero, $4
+
+; 32R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6:          seleqz $2, $[[R0]], $4
+
+; 64-CMOV:       lw $2, 0(${{[0-9]+}})
+; 64-CMOV:       movn $2, $zero, $4
+
+; 64R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          seleqz $2, $[[R0]], $4
+
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 0, i32 %0
   ret i32 %cond
 }
 
-define i32 @foo1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
 entry:
-; CHECK:     movz ${{[0-9]+}}, $zero
+; ALL-LABEL: sel_icmp_nez_i32_z1:
+
+; 32-CMOV:       lw $2, 0(${{[0-9]+}})
+; 32-CMOV:       movz $2, $zero, $4
+
+; 32R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6:          selnez $2, $[[R0]], $4
+
+; 64-CMOV:       lw $2, 0(${{[0-9]+}})
+; 64-CMOV:       movz $2, $zero, $4
+
+; 64R6:          lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          selnez $2, $[[R0]], $4
+
   %tobool = icmp ne i32 %s, 0
   %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 %0, i32 0
   ret i32 %cond
 }
+
+@g2 = external global i64
+
+define i64 @sel_icmp_nez_i64_z0(i64 %s) nounwind readonly {
+entry:
+; ALL-LABEL: sel_icmp_nez_i64_z0:
+
+; 32-CMOV-DAG:   lw $[[R0:2]], 0(${{[0-9]+}})
+; 32-CMOV-DAG:   lw $[[R1:3]], 4(${{[0-9]+}})
+; 32-CMOV-DAG:   movn $[[R0]], $zero, $4
+; 32-CMOV-DAG:   movn $[[R1]], $zero, $4
+
+; 32R6-DAG:      lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-DAG:      or $[[CC:[0-9]+]], $4, $5
+; 32R6-DAG:      seleqz $2, $[[R0]], $[[CC]]
+; 32R6-DAG:      seleqz $3, $[[R1]], $[[CC]]
+
+; 64-CMOV:       ld $2, 0(${{[0-9]+}})
+; 64-CMOV:       movn $2, $zero, $4
+
+; 64R6:          ld $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          seleqz $2, $[[R0]], $4
+
+  %tobool = icmp ne i64 %s, 0
+  %0 = load i64* @g2, align 4
+  %cond = select i1 %tobool, i64 0, i64 %0
+  ret i64 %cond
+}
+
+define i64 @sel_icmp_nez_i64_z1(i64 %s) nounwind readonly {
+entry:
+; ALL-LABEL: sel_icmp_nez_i64_z1:
+
+; 32-CMOV-DAG:   lw $[[R0:2]], 0(${{[0-9]+}})
+; 32-CMOV-DAG:   lw $[[R1:3]], 4(${{[0-9]+}})
+; 32-CMOV-DAG:   movz $[[R0]], $zero, $4
+; 32-CMOV-DAG:   movz $[[R1]], $zero, $4
+
+; 32R6-DAG:      lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 32R6-DAG:      lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; 32R6-DAG:      or $[[CC:[0-9]+]], $4, $5
+; 32R6-DAG:      selnez $2, $[[R0]], $[[CC]]
+; 32R6-DAG:      selnez $3, $[[R1]], $[[CC]]
+
+; 64-CMOV:       ld $2, 0(${{[0-9]+}})
+; 64-CMOV:       movz $2, $zero, $4
+
+; 64R6:          ld $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; 64R6:          selnez $2, $[[R0]], $4
+
+  %tobool = icmp ne i64 %s, 0
+  %0 = load i64* @g2, align 4
+  %cond = select i1 %tobool, i64 %0, i64 0
+  ret i64 %cond
+}
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll
index 0622aa3..c225abf 100644
--- a/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/test/CodeGen/NVPTX/access-non-generic.ll
@@ -74,13 +74,13 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
   ret float %sum5
 }
 
-; Verifies nvptx-favor-non-generic keeps addrspacecasts between pointers of
-; different element types.
+; When hoisting an addrspacecast between different pointer types, replace the
+; addrspacecast with a bitcast.
 define i32 @ld_int_from_float() {
 ; IR-LABEL: @ld_int_from_float
-; IR: addrspacecast
+; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
 ; PTX-LABEL: ld_int_from_float(
-; PTX: cvta.shared.u{{(32|64)}}
+; PTX: ld.shared.u{{(32|64)}}
   %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
   ret i32 %1
 }
diff --git a/test/CodeGen/NVPTX/arg-lowering.ll b/test/CodeGen/NVPTX/arg-lowering.ll
new file mode 100644
index 0000000..f7b8a14
--- /dev/null
+++ b/test/CodeGen/NVPTX/arg-lowering.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .visible .func  (.param .align 16 .b8 func_retval0[16]) foo0(
+; CHECK:          .param .align 4 .b8 foo0_param_0[8]
+define <4 x float> @foo0({float, float} %arg0) {
+  ret <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>
+}
+
+; CHECK: .visible .func  (.param .align 8 .b8 func_retval0[8]) foo1(
+; CHECK:          .param .align 8 .b8 foo1_param_0[16]
+define <2 x float> @foo1({float, float, i64} %arg0) {
+  ret <2 x float> <float 1.0, float 1.0>
+}
diff --git a/test/CodeGen/NVPTX/atomics.ll b/test/CodeGen/NVPTX/atomics.ll
new file mode 100644
index 0000000..10ab73d
--- /dev/null
+++ b/test/CodeGen/NVPTX/atomics.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+; CHECK: atom0
+define i32 @atom0(i32* %addr, i32 %val) {
+; CHECK: atom.add.u32
+  %ret = atomicrmw add i32* %addr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom1
+define i64 @atom1(i64* %addr, i64 %val) {
+; CHECK: atom.add.u64
+  %ret = atomicrmw add i64* %addr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom2
+define i32 @atom2(i32* %subr, i32 %val) {
+; CHECK: neg.s32
+; CHECK: atom.add.u32
+  %ret = atomicrmw sub i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom3
+define i64 @atom3(i64* %subr, i64 %val) {
+; CHECK: neg.s64
+; CHECK: atom.add.u64
+  %ret = atomicrmw sub i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom4
+define i32 @atom4(i32* %subr, i32 %val) {
+; CHECK: atom.and.b32
+  %ret = atomicrmw and i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom5
+define i64 @atom5(i64* %subr, i64 %val) {
+; CHECK: atom.and.b64
+  %ret = atomicrmw and i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+;; NAND not yet supported
+;define i32 @atom6(i32* %subr, i32 %val) {
+;  %ret = atomicrmw nand i32* %subr, i32 %val seq_cst
+;  ret i32 %ret
+;}
+
+;define i64 @atom7(i64* %subr, i64 %val) {
+;  %ret = atomicrmw nand i64* %subr, i64 %val seq_cst
+;  ret i64 %ret
+;}
+
+; CHECK: atom8
+define i32 @atom8(i32* %subr, i32 %val) {
+; CHECK: atom.or.b32
+  %ret = atomicrmw or i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom9
+define i64 @atom9(i64* %subr, i64 %val) {
+; CHECK: atom.or.b64
+  %ret = atomicrmw or i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom10
+define i32 @atom10(i32* %subr, i32 %val) {
+; CHECK: atom.xor.b32
+  %ret = atomicrmw xor i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom11
+define i64 @atom11(i64* %subr, i64 %val) {
+; CHECK: atom.xor.b64
+  %ret = atomicrmw xor i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom12
+define i32 @atom12(i32* %subr, i32 %val) {
+; CHECK: atom.max.s32
+  %ret = atomicrmw max i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom13
+define i64 @atom13(i64* %subr, i64 %val) {
+; CHECK: atom.max.s64
+  %ret = atomicrmw max i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom14
+define i32 @atom14(i32* %subr, i32 %val) {
+; CHECK: atom.min.s32
+  %ret = atomicrmw min i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom15
+define i64 @atom15(i64* %subr, i64 %val) {
+; CHECK: atom.min.s64
+  %ret = atomicrmw min i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom16
+define i32 @atom16(i32* %subr, i32 %val) {
+; CHECK: atom.max.u32
+  %ret = atomicrmw umax i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom17
+define i64 @atom17(i64* %subr, i64 %val) {
+; CHECK: atom.max.u64
+  %ret = atomicrmw umax i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
+
+; CHECK: atom18
+define i32 @atom18(i32* %subr, i32 %val) {
+; CHECK: atom.min.u32
+  %ret = atomicrmw umin i32* %subr, i32 %val seq_cst
+  ret i32 %ret
+}
+
+; CHECK: atom19
+define i64 @atom19(i64* %subr, i64 %val) {
+; CHECK: atom.min.u64
+  %ret = atomicrmw umin i64* %subr, i64 %val seq_cst
+  ret i64 %ret
+}
diff --git a/test/CodeGen/NVPTX/bfe.ll b/test/CodeGen/NVPTX/bfe.ll
new file mode 100644
index 0000000..2e816fe
--- /dev/null
+++ b/test/CodeGen/NVPTX/bfe.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+; CHECK: bfe0
+define i32 @bfe0(i32 %a) {
+; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 4, 4
+; CHECK-NOT: shr
+; CHECK-NOT: and
+  %val0 = ashr i32 %a, 4
+  %val1 = and i32 %val0, 15
+  ret i32 %val1
+}
+
+; CHECK: bfe1
+define i32 @bfe1(i32 %a) {
+; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 3, 3
+; CHECK-NOT: shr
+; CHECK-NOT: and
+  %val0 = ashr i32 %a, 3
+  %val1 = and i32 %val0, 7
+  ret i32 %val1
+}
+
+; CHECK: bfe2
+define i32 @bfe2(i32 %a) {
+; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 3
+; CHECK-NOT: shr
+; CHECK-NOT: and
+  %val0 = ashr i32 %a, 5
+  %val1 = and i32 %val0, 7
+  ret i32 %val1
+}
diff --git a/test/CodeGen/NVPTX/envreg.ll b/test/CodeGen/NVPTX/envreg.ll
new file mode 100644
index 0000000..a341b49
--- /dev/null
+++ b/test/CodeGen/NVPTX/envreg.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg0()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg1()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg2()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg3()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg4()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg5()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg6()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg7()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg8()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg9()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg10()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg11()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg12()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg13()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg14()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg15()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg16()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg17()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg18()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg19()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg20()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg21()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg22()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg23()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg24()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg25()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg26()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg27()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg28()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg29()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg30()
+declare i32 @llvm.nvvm.read.ptx.sreg.envreg31()
+
+
+; CHECK: foo
+define i32 @foo() {
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg0
+  %val0 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg0()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg1
+  %val1 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg1()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg2
+  %val2 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg2()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg3
+  %val3 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg3()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg4
+  %val4 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg4()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg5
+  %val5 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg5()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg6
+  %val6 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg6()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg7
+  %val7 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg7()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg8
+  %val8 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg8()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg9
+  %val9 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg9()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg10
+  %val10 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg10()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg11
+  %val11 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg11()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg12
+  %val12 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg12()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg13
+  %val13 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg13()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg14
+  %val14 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg14()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg15
+  %val15 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg15()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg16
+  %val16 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg16()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg17
+  %val17 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg17()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg18
+  %val18 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg18()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg19
+  %val19 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg19()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg20
+  %val20 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg20()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg21
+  %val21 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg21()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg22
+  %val22 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg22()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg23
+  %val23 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg23()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg24
+  %val24 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg24()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg25
+  %val25 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg25()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg26
+  %val26 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg26()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg27
+  %val27 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg27()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg28
+  %val28 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg28()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg29
+  %val29 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg29()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg30
+  %val30 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg30()
+; CHECK: mov.b32 %r{{[0-9]+}}, %envreg31
+  %val31 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg31()
+
+
+  %ret0 = add i32 %val0, %val1
+  %ret1 = add i32 %ret0, %val2
+  %ret2 = add i32 %ret1, %val3
+  %ret3 = add i32 %ret2, %val4
+  %ret4 = add i32 %ret3, %val5
+  %ret5 = add i32 %ret4, %val6
+  %ret6 = add i32 %ret5, %val7
+  %ret7 = add i32 %ret6, %val8
+  %ret8 = add i32 %ret7, %val9
+  %ret9 = add i32 %ret8, %val10
+  %ret10 = add i32 %ret9, %val11
+  %ret11 = add i32 %ret10, %val12
+  %ret12 = add i32 %ret11, %val13
+  %ret13 = add i32 %ret12, %val14
+  %ret14 = add i32 %ret13, %val15
+  %ret15 = add i32 %ret14, %val16
+  %ret16 = add i32 %ret15, %val17
+  %ret17 = add i32 %ret16, %val18
+  %ret18 = add i32 %ret17, %val19
+  %ret19 = add i32 %ret18, %val20
+  %ret20 = add i32 %ret19, %val21
+  %ret21 = add i32 %ret20, %val22
+  %ret22 = add i32 %ret21, %val23
+  %ret23 = add i32 %ret22, %val24
+  %ret24 = add i32 %ret23, %val25
+  %ret25 = add i32 %ret24, %val26
+  %ret26 = add i32 %ret25, %val27
+  %ret27 = add i32 %ret26, %val28
+  %ret28 = add i32 %ret27, %val29
+  %ret29 = add i32 %ret28, %val30
+  %ret30 = add i32 %ret29, %val31
+
+  ret i32 %ret30
+}
diff --git a/test/CodeGen/NVPTX/gvar-init.ll b/test/CodeGen/NVPTX/gvar-init.ll
new file mode 100644
index 0000000..8c95942
--- /dev/null
+++ b/test/CodeGen/NVPTX/gvar-init.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Error out if initializer is given for address spaces that do not support initializers
+; XFAIL: *
+@g0 = addrspace(3) global i32 42
diff --git a/test/CodeGen/NVPTX/imad.ll b/test/CodeGen/NVPTX/imad.ll
new file mode 100644
index 0000000..67421c7
--- /dev/null
+++ b/test/CodeGen/NVPTX/imad.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: imad
+define i32 @imad(i32 %a, i32 %b, i32 %c) {
+; CHECK: mad.lo.s32
+  %val0 = mul i32 %a, %b
+  %val1 = add i32 %val0, %c
+  ret i32 %val1
+}
diff --git a/test/CodeGen/NVPTX/inline-asm.ll b/test/CodeGen/NVPTX/inline-asm.ll
index d76eb42..6f0578d 100644
--- a/test/CodeGen/NVPTX/inline-asm.ll
+++ b/test/CodeGen/NVPTX/inline-asm.ll
@@ -7,3 +7,10 @@ entry:
   %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
   ret float %0
 }
+
+define i32 @foo(i1 signext %cond, i32 %a, i32 %b) #0 {
+entry:
+; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
+  %0 = tail call i32 asm "selp.b32 $0, $1, $2, $3;", "=r,r,r,b"(i32 %a, i32 %b, i1 %cond)
+  ret i32 %0
+}
diff --git a/test/CodeGen/NVPTX/isspacep.ll b/test/CodeGen/NVPTX/isspacep.ll
new file mode 100644
index 0000000..47fa7a6
--- /dev/null
+++ b/test/CodeGen/NVPTX/isspacep.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+declare i1 @llvm.nvvm.isspacep.const(i8*) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(i8*) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(i8*) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(i8*) readnone noinline
+
+; CHECK: is_const
+define i1 @is_const(i8* %addr) {
+; CHECK: isspacep.const
+  %v = tail call i1 @llvm.nvvm.isspacep.const(i8* %addr)
+  ret i1 %v
+}
+
+; CHECK: is_global
+define i1 @is_global(i8* %addr) {
+; CHECK: isspacep.global
+  %v = tail call i1 @llvm.nvvm.isspacep.global(i8* %addr)
+  ret i1 %v
+}
+
+; CHECK: is_local
+define i1 @is_local(i8* %addr) {
+; CHECK: isspacep.local
+  %v = tail call i1 @llvm.nvvm.isspacep.local(i8* %addr)
+  ret i1 %v
+}
+
+; CHECK: is_shared
+define i1 @is_shared(i8* %addr) {
+; CHECK: isspacep.shared
+  %v = tail call i1 @llvm.nvvm.isspacep.shared(i8* %addr)
+  ret i1 %v
+}
+
diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll
index 81a82b2..9cc6675 100644
--- a/test/CodeGen/NVPTX/ldu-i8.ll
+++ b/test/CodeGen/NVPTX/ldu-i8.ll
@@ -2,13 +2,15 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
-declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
 
 define i8 @foo(i8* %a) {
 ; Ensure we properly truncate off the high-order 24 bits
 ; CHECK:        ldu.global.u8
 ; CHECK:        cvt.u32.u16
 ; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
   ret i8 %val
 }
+
+!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll
new file mode 100644
index 0000000..3b0619f
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+
+
+; CHECK: func0
+define i8 @func0(i8 addrspace(1)* %ptr) {
+; ldu.global.u8
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  ret i8 %val
+}
+
+; CHECK: func1
+define i32 @func1(i32 addrspace(1)* %ptr) {
+; ldu.global.u32
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  ret i32 %val
+}
+
+; CHECK: func2
+define i8 @func2(i8 addrspace(1)* %ptr) {
+; ld.global.nc.u8
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  ret i8 %val
+}
+
+; CHECK: func3
+define i32 @func3(i32 addrspace(1)* %ptr) {
+; ld.global.nc.u32
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  ret i32 %val
+}
+
+
+
+!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index 26cadc4..55707ea 100644
--- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) {
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
   %p2 = getelementptr i32* %a, i32 8
-  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
+  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
   %p3 = getelementptr i32* %a, i32 9
-  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
+  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
   %t3 = mul i32 %t1, %t2
   store i32 %t3, i32* %a
   ret void
@@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) {
 
 !1 = metadata !{ i32 4 }
 
-declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
diff --git a/test/CodeGen/NVPTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg
index 85cf8c2..2cb98eb 100644
--- a/test/CodeGen/NVPTX/lit.local.cfg
+++ b/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'NVPTX' in targets:
+if not 'NVPTX' in config.root.targets:
     config.unsupported = True
diff --git a/test/CodeGen/NVPTX/managed.ll b/test/CodeGen/NVPTX/managed.ll
new file mode 100644
index 0000000..4d7e781
--- /dev/null
+++ b/test/CodeGen/NVPTX/managed.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+; CHECK: .visible .global .align 4 .u32 device_g;
+@device_g = addrspace(1) global i32 zeroinitializer
+; CHECK: .visible .global .attribute(.managed) .align 4 .u32 managed_g;
+@managed_g = addrspace(1) global i32 zeroinitializer
+
+
+!nvvm.annotations = !{!0}
+!0 = metadata !{i32 addrspace(1)* @managed_g, metadata !"managed", i32 1}
diff --git a/test/CodeGen/NVPTX/mulwide.ll b/test/CodeGen/NVPTX/mulwide.ll
new file mode 100644
index 0000000..927946c
--- /dev/null
+++ b/test/CodeGen/NVPTX/mulwide.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: mulwide16
+define i32 @mulwide16(i16 %a, i16 %b) {
+; CHECK: mul.wide.s16
+  %val0 = sext i16 %a to i32
+  %val1 = sext i16 %b to i32
+  %val2 = mul i32 %val0, %val1
+  ret i32 %val2
+}
+
+; CHECK: mulwideu16
+define i32 @mulwideu16(i16 %a, i16 %b) {
+; CHECK: mul.wide.u16
+  %val0 = zext i16 %a to i32
+  %val1 = zext i16 %b to i32
+  %val2 = mul i32 %val0, %val1
+  ret i32 %val2
+}
+
+; CHECK: mulwide32
+define i64 @mulwide32(i32 %a, i32 %b) {
+; CHECK: mul.wide.s32
+  %val0 = sext i32 %a to i64
+  %val1 = sext i32 %b to i64
+  %val2 = mul i64 %val0, %val1
+  ret i64 %val2
+}
+
+; CHECK: mulwideu32
+define i64 @mulwideu32(i32 %a, i32 %b) {
+; CHECK: mul.wide.u32
+  %val0 = zext i32 %a to i64
+  %val1 = zext i32 %b to i64
+  %val2 = mul i64 %val0, %val1
+  ret i64 %val2
+}
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
index 0d02194..21e9c69 100644
--- a/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
 ; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
 
-@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00"
+@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00"
 
 declare i32 @__nvvm_reflect(i8*)
 declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
@@ -32,3 +32,17 @@ exit:
   %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
   ret float %ret
 }
+
+declare i32 @llvm.nvvm.reflect.p0i8(i8*)
+
+; USE_MUL_0: define i32 @intrinsic
+; USE_MUL_1: define i32 @intrinsic
+define i32 @intrinsic() {
+; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
+; USE_MUL_0: ret i32 0
+; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
+; USE_MUL_1: ret i32 1
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+  %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
+  ret i32 %reflect
+}
diff --git a/test/CodeGen/NVPTX/rotate.ll b/test/CodeGen/NVPTX/rotate.ll
new file mode 100644
index 0000000..dfc8b4f
--- /dev/null
+++ b/test/CodeGen/NVPTX/rotate.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s
+
+
+declare i32 @llvm.nvvm.rotate.b32(i32, i32)
+declare i64 @llvm.nvvm.rotate.b64(i64, i32)
+declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
+
+; SM20: rotate32
+; SM35: rotate32
+define i32 @rotate32(i32 %a, i32 %b) {
+; SM20: shl.b32
+; SM20: sub.s32
+; SM20: shr.b32
+; SM20: add.u32
+; SM35: shf.l.wrap.b32
+  %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b)
+  ret i32 %val
+}
+
+; SM20: rotate64
+; SM35: rotate64
+define i64 @rotate64(i64 %a, i32 %b) {
+; SM20: shl.b64
+; SM20: sub.u32
+; SM20: shr.b64
+; SM20: add.u64
+; SM35: shf.l.wrap.b32
+; SM35: shf.l.wrap.b32
+  %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b)
+  ret i64 %val
+}
+
+; SM20: rotateright64
+; SM35: rotateright64
+define i64 @rotateright64(i64 %a, i32 %b) {
+; SM20: shr.b64
+; SM20: sub.u32
+; SM20: shl.b64
+; SM20: add.u64
+; SM35: shf.r.wrap.b32
+; SM35: shf.r.wrap.b32
+  %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b)
+  ret i64 %val
+}
+
+; SM20: rotl0
+; SM35: rotl0
+define i32 @rotl0(i32 %x) {
+; SM20: shl.b32
+; SM20: shr.b32
+; SM20: add.u32
+; SM35: shf.l.wrap.b32
+  %t0 = shl i32 %x, 8
+  %t1 = lshr i32 %x, 24
+  %t2 = or i32 %t0, %t1
+  ret i32 %t2
+}
diff --git a/test/CodeGen/NVPTX/shift-parts.ll b/test/CodeGen/NVPTX/shift-parts.ll
new file mode 100644
index 0000000..748297c
--- /dev/null
+++ b/test/CodeGen/NVPTX/shift-parts.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: shift_parts_left_128
+define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
+; CHECK: shl.b64
+; CHECK: mov.u32
+; CHECK: sub.s32
+; CHECK: shr.u64
+; CHECK: or.b64
+; CHECK: add.s32
+; CHECK: shl.b64
+; CHECK: setp.gt.s32
+; CHECK: selp.b64
+; CHECK: shl.b64
+  %amt = load i128* %amtptr
+  %a = load i128* %val
+  %val0 = shl i128 %a, %amt
+  store i128 %val0, i128* %val
+  ret void
+}
+
+; CHECK: shift_parts_right_128
+define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
+; CHECK: shr.u64
+; CHECK: sub.s32
+; CHECK: shl.b64
+; CHECK: or.b64
+; CHECK: add.s32
+; CHECK: shr.s64
+; CHECK: setp.gt.s32
+; CHECK: selp.b64
+; CHECK: shr.s64
+  %amt = load i128* %amtptr
+  %a = load i128* %val
+  %val0 = ashr i128 %a, %amt
+  store i128 %val0, i128* %val
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/weak-global.ll b/test/CodeGen/NVPTX/weak-global.ll
new file mode 100644
index 0000000..2bef4c5
--- /dev/null
+++ b/test/CodeGen/NVPTX/weak-global.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .weak .global .align 4 .u32 g
+@g = common addrspace(1) global i32 zeroinitializer
+
+define i32 @func0() {
+  %val = load i32 addrspace(1)* @g
+  ret i32 %val
+}
diff --git a/test/CodeGen/NVPTX/weak-linkage.ll b/test/CodeGen/NVPTX/weak-linkage.ll
new file mode 100644
index 0000000..7a13357
--- /dev/null
+++ b/test/CodeGen/NVPTX/weak-linkage.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+; CHECK: .weak .func foo
+define weak void @foo() {
+  ret void
+}
+
+; CHECK: .visible .func bar
+define void @bar() {
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index b5c03e2..b7f23b1 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -529,63 +529,73 @@ define void @test_compare_and_swap() nounwind {
 entry:
   %0 = load i8* @uc, align 1
   %1 = load i8* @sc, align 1
-  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
+  %pair2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
+  %2 = extractvalue { i8, i1 } %pair2, 0
   store i8 %2, i8* @sc, align 1
   %3 = load i8* @uc, align 1
   %4 = load i8* @sc, align 1
-  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
+  %pair5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
+  %5 = extractvalue { i8, i1 } %pair5, 0
   store i8 %5, i8* @uc, align 1
   %6 = load i8* @uc, align 1
   %7 = zext i8 %6 to i16
   %8 = load i8* @sc, align 1
   %9 = sext i8 %8 to i16
   %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
+  %pair11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
+  %11 = extractvalue { i16, i1 } %pair11, 0
   store i16 %11, i16* @ss, align 2
   %12 = load i8* @uc, align 1
   %13 = zext i8 %12 to i16
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
+  %pair17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
+  %17 = extractvalue { i16, i1 } %pair17, 0
   store i16 %17, i16* @us, align 2
   %18 = load i8* @uc, align 1
   %19 = zext i8 %18 to i32
   %20 = load i8* @sc, align 1
   %21 = sext i8 %20 to i32
   %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
+  %pair23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
+  %23 = extractvalue { i32, i1 } %pair23, 0
   store i32 %23, i32* @si, align 4
   %24 = load i8* @uc, align 1
   %25 = zext i8 %24 to i32
   %26 = load i8* @sc, align 1
   %27 = sext i8 %26 to i32
   %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
+  %pair29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
+  %29 = extractvalue { i32, i1 } %pair29, 0
   store i32 %29, i32* @ui, align 4
   %30 = load i8* @uc, align 1
   %31 = zext i8 %30 to i32
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i32
   %34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
+  %pair35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
+  %35 = extractvalue { i32, i1 } %pair35, 0
   store i32 %35, i32* @sl, align 4
   %36 = load i8* @uc, align 1
   %37 = zext i8 %36 to i32
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i32
   %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
+  %pair41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
+  %41 = extractvalue { i32, i1 } %pair41, 0
   store i32 %41, i32* @ul, align 4
   %42 = load i8* @uc, align 1
   %43 = load i8* @sc, align 1
-  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
+  %pair44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
+  %44 = extractvalue { i8, i1 } %pair44, 0
   %45 = icmp eq i8 %44, %42
   %46 = zext i1 %45 to i32
   store i32 %46, i32* @ui, align 4
   %47 = load i8* @uc, align 1
   %48 = load i8* @sc, align 1
-  %49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
+  %pair49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
+  %49 = extractvalue { i8, i1 } %pair49, 0
   %50 = icmp eq i8 %49, %47
   %51 = zext i1 %50 to i32
   store i32 %51, i32* @ui, align 4
@@ -594,7 +604,8 @@ entry:
   %54 = load i8* @sc, align 1
   %55 = sext i8 %54 to i16
   %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
+  %pair57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
+  %57 = extractvalue { i16, i1 } %pair57, 0
   %58 = icmp eq i16 %57, %53
   %59 = zext i1 %58 to i32
   store i32 %59, i32* @ui, align 4
@@ -603,7 +614,8 @@ entry:
   %62 = load i8* @sc, align 1
   %63 = sext i8 %62 to i16
   %64 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
+  %pair65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
+  %65 = extractvalue { i16, i1 } %pair65, 0
   %66 = icmp eq i16 %65, %61
   %67 = zext i1 %66 to i32
   store i32 %67, i32* @ui, align 4
@@ -612,7 +624,8 @@ entry:
   %70 = load i8* @sc, align 1
   %71 = sext i8 %70 to i32
   %72 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
+  %pair73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
+  %73 = extractvalue { i32, i1 } %pair73, 0
   %74 = icmp eq i32 %73, %69
   %75 = zext i1 %74 to i32
   store i32 %75, i32* @ui, align 4
@@ -621,7 +634,8 @@ entry:
   %78 = load i8* @sc, align 1
   %79 = sext i8 %78 to i32
   %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
+  %pair81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
+  %81 = extractvalue { i32, i1 } %pair81, 0
   %82 = icmp eq i32 %81, %77
   %83 = zext i1 %82 to i32
   store i32 %83, i32* @ui, align 4
@@ -630,7 +644,8 @@ entry:
   %86 = load i8* @sc, align 1
   %87 = sext i8 %86 to i32
   %88 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
+  %pair89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
+  %89 = extractvalue { i32, i1 } %pair89, 0
   %90 = icmp eq i32 %89, %85
   %91 = zext i1 %90 to i32
   store i32 %91, i32* @ui, align 4
@@ -639,7 +654,8 @@ entry:
   %94 = load i8* @sc, align 1
   %95 = sext i8 %94 to i32
   %96 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
+  %pair97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
+  %97 = extractvalue { i32, i1 } %pair97, 0
   %98 = icmp eq i32 %97, %93
   %99 = zext i1 %98 to i32
   store i32 %99, i32* @ui, align 4
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 4588bc0..c701fef 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -12,15 +12,15 @@
 ; CHECK-PPC32-NOFP: stw r31, -4(r1)
 ; CHECK-PPC32-NOFP: lwz r1, 0(r1)
 ; CHECK-PPC32-NOFP: lwz r31, -4(r1)
-; CHECK-PPC32-RS: stwu r1, -80(r1)
-; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+; CHECK-PPC32-RS: stwu r1, -48(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -48(r1)
 
 ; CHECK-PPC64: std r31, -8(r1)
-; CHECK-PPC64: stdu r1, -128(r1)
+; CHECK-PPC64: stdu r1, -64(r1)
 ; CHECK-PPC64: ld r1, 0(r1)
 ; CHECK-PPC64: ld r31, -8(r1)
 ; CHECK-PPC64-NOFP: std r31, -8(r1)
-; CHECK-PPC64-NOFP: stdu r1, -128(r1)
+; CHECK-PPC64-NOFP: stdu r1, -64(r1)
 ; CHECK-PPC64-NOFP: ld r1, 0(r1)
 ; CHECK-PPC64-NOFP: ld r31, -8(r1)
 
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index d07fea7..0ccea42 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -15,9 +15,9 @@ define i32* @f1() nounwind {
 
 ; PPC32-NOFP: _f1:
 ; PPC32-NOFP: 	lis r0, -1
-; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	ori r0, r0, 32736
 ; PPC32-NOFP: 	stwux r1, r1, r0
-; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	addi r3, r1, 36
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
@@ -25,10 +25,10 @@ define i32* @f1() nounwind {
 ; PPC32-FP: _f1:
 ; PPC32-FP:	lis r0, -1
 ; PPC32-FP:	stw r31, -4(r1)
-; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	ori r0, r0, 32736
 ; PPC32-FP:	stwux r1, r1, r0
 ; PPC32-FP:	mr r31, r1
-; PPC32-FP:	addi r3, r31, 64
+; PPC32-FP:	addi r3, r31, 32
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -36,9 +36,9 @@ define i32* @f1() nounwind {
 
 ; PPC64-NOFP: _f1:
 ; PPC64-NOFP: 	lis r0, -1
-; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	ori r0, r0, 32720
 ; PPC64-NOFP: 	stdux r1, r1, r0
-; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	addi r3, r1, 52
 ; PPC64-NOFP: 	ld r1, 0(r1)
 ; PPC64-NOFP: 	blr 
 
@@ -46,10 +46,10 @@ define i32* @f1() nounwind {
 ; PPC64-FP: _f1:
 ; PPC64-FP:	lis r0, -1
 ; PPC64-FP:	std r31, -8(r1)
-; PPC64-FP:	ori r0, r0, 32640
+; PPC64-FP:	ori r0, r0, 32704
 ; PPC64-FP:	stdux r1, r1, r0
 ; PPC64-FP:	mr r31, r1
-; PPC64-FP:	addi r3, r31, 124
+; PPC64-FP:	addi r3, r31, 60
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 0f6bd10..28c1a5b 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,25 +1,25 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
 ; RUN: not grep "stw r31, -4(r1)" %t1
-; RUN: grep "stwu r1, -16448(r1)" %t1
-; RUN: grep "addi r1, r1, 16448" %t1
+; RUN: grep "stwu r1, -16416(r1)" %t1
+; RUN: grep "addi r1, r1, 16416" %t1
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN: not grep "lwz r31, -4(r1)"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t2
 ; RUN: grep "stw r31, -4(r1)" %t2
-; RUN: grep "stwu r1, -16448(r1)" %t2
-; RUN: grep "addi r1, r1, 16448" %t2
+; RUN: grep "stwu r1, -16416(r1)" %t2
+; RUN: grep "addi r1, r1, 16416" %t2
 ; RUN: grep "lwz r31, -4(r1)" %t2
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
 ; RUN: not grep "std r31, -8(r1)" %t3
-; RUN: grep "stdu r1, -16496(r1)" %t3
-; RUN: grep "addi r1, r1, 16496" %t3
+; RUN: grep "stdu r1, -16432(r1)" %t3
+; RUN: grep "addi r1, r1, 16432" %t3
 ; RUN: not grep "ld r31, -8(r1)" %t3
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t4
 ; RUN: grep "std r31, -8(r1)" %t4
-; RUN: grep "stdu r1, -16512(r1)" %t4
-; RUN: grep "addi r1, r1, 16512" %t4
+; RUN: grep "stdu r1, -16448(r1)" %t4
+; RUN: grep "addi r1, r1, 16448" %t4
 ; RUN: grep "ld r31, -8(r1)" %t4
 
 define i32* @f1() {
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index 083df47..997a016 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -11,7 +11,8 @@ define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 define i32 @exchange_and_cmp(i32* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: lwarx
-  %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic monotonic
+  %tmppair = cmpxchg i32* %mem, i32 0, i32 1 monotonic monotonic
+  %tmp = extractvalue { i32, i1 } %tmppair, 0
 ; CHECK: stwcx.
 ; CHECK: stwcx.
   ret i32 %tmp
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 261335e..843250f 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -11,7 +11,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 define i64 @exchange_and_cmp(i64* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: ldarx
-  %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
+  %tmppair = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
+  %tmp = extractvalue { i64, i1 } %tmppair, 0
 ; CHECK: stdcx.
 ; CHECK: stdcx.
   ret i64 %tmp
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
index a8e456f..1784777 100644
--- a/test/CodeGen/PowerPC/early-ret2.ll
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -11,7 +11,7 @@ while.body.lr.ph:                                 ; preds = %entry
   br i1 undef, label %while.end, label %while.body
 
 while.body:                                       ; preds = %while.body, %while.body.lr.ph
-  br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
+  br i1 false, label %while.end, label %while.body, !llvm.loop.vectorize.already_vectorized !0
 
 while.end:                                        ; preds = %while.body, %while.body.lr.ph, %entry
   ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
index db0d8ed..ac41e8c 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -116,18 +116,6 @@ entry:
   ret void
 }
 
-define void @fptoui_float_i64(float %a) nounwind ssp {
-entry:
-; ELF64: fptoui_float_i64
-  %b.addr = alloca i64, align 4
-  %conv = fptoui float %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-  store i64 %conv, i64* %b.addr, align 4
-  ret void
-}
-
 define void @fptoui_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i32
@@ -140,14 +128,3 @@ entry:
   ret void
 }
 
-define void @fptoui_double_i64(double %a) nounwind ssp {
-entry:
-; ELF64: fptoui_double_i64
-  %b.addr = alloca i64, align 8
-  %conv = fptoui double %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-  store i64 %conv, i64* %b.addr, align 8
-  ret void
-}
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index a31c312..5e00675 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,15 +1,24 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
+
+;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
+;; to SelectionDAG in some cases.
 
 ; Test sitofp
 
 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i64
+; PPC970: sitofp_single_i64
   %b.addr = alloca float, align 4
   %conv = sitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -17,11 +26,16 @@ entry:
 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i32
+; PPC970: sitofp_single_i32
   %b.addr = alloca float, align 4
   %conv = sitofp i32 %a to float
 ; ELF64: std
 ; ELF64: lfiwax
 ; ELF64: fcfids
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -29,12 +43,18 @@ entry:
 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i16
+; PPC970: sitofp_single_i16
   %b.addr = alloca float, align 4
   %conv = sitofp i16 %a to float
 ; ELF64: extsh
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: extsh
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -42,12 +62,18 @@ entry:
 define void @sitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i8
+; PPC970: sitofp_single_i8
   %b.addr = alloca float, align 4
   %conv = sitofp i8 %a to float
 ; ELF64: extsb
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: extsb
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -55,11 +81,15 @@ entry:
 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i32
+; PPC970: sitofp_double_i32
   %b.addr = alloca double, align 8
   %conv = sitofp i32 %a to double
 ; ELF64: std
 ; ELF64: lfiwax
 ; ELF64: fcfid
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -67,11 +97,15 @@ entry:
 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i64
+; PPC970: sitofp_double_i64
   %b.addr = alloca double, align 8
   %conv = sitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -79,12 +113,17 @@ entry:
 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i16
+; PPC970: sitofp_double_i16
   %b.addr = alloca double, align 8
   %conv = sitofp i16 %a to double
 ; ELF64: extsh
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: extsh
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -92,12 +131,17 @@ entry:
 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i8
+; PPC970: sitofp_double_i8
   %b.addr = alloca double, align 8
   %conv = sitofp i8 %a to double
 ; ELF64: extsb
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: extsb
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -107,11 +151,13 @@ entry:
 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i64
+; PPC970: uitofp_single_i64
   %b.addr = alloca float, align 4
   %conv = uitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970-NOT: fcfidus
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -119,11 +165,14 @@ entry:
 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i32
+; PPC970: uitofp_single_i32
   %b.addr = alloca float, align 4
   %conv = uitofp i32 %a to float
 ; ELF64: std
 ; ELF64: lfiwzx
 ; ELF64: fcfidus
+; PPC970-NOT: lfiwzx
+; PPC970-NOT: fcfidus
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -131,12 +180,18 @@ entry:
 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i16
+; PPC970: uitofp_single_i16
   %b.addr = alloca float, align 4
   %conv = uitofp i16 %a to float
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -144,12 +199,18 @@ entry:
 define void @uitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i8
+; PPC970: uitofp_single_i8
   %b.addr = alloca float, align 4
   %conv = uitofp i8 %a to float
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -157,11 +218,13 @@ entry:
 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i64
+; PPC970: uitofp_double_i64
   %b.addr = alloca double, align 8
   %conv = uitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970-NOT: fcfidu
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -169,11 +232,14 @@ entry:
 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i32
+; PPC970: uitofp_double_i32
   %b.addr = alloca double, align 8
   %conv = uitofp i32 %a to double
 ; ELF64: std
 ; ELF64: lfiwzx
 ; ELF64: fcfidu
+; PPC970-NOT: lfiwzx
+; PPC970-NOT: fcfidu
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -181,12 +247,17 @@ entry:
 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i16
+; PPC970: uitofp_double_i16
   %b.addr = alloca double, align 8
   %conv = uitofp i16 %a to double
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -194,12 +265,17 @@ entry:
 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i8
+; PPC970: uitofp_double_i8
   %b.addr = alloca double, align 8
   %conv = uitofp i8 %a to double
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -209,11 +285,15 @@ entry:
 define void @fptosi_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i32
+; PPC970: fptosi_float_i32
   %b.addr = alloca i32, align 4
   %conv = fptosi float %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; PPC970: fctiwz
+; PPC970: stfd
+; PPC970: lwa
   store i32 %conv, i32* %b.addr, align 4
   ret void
 }
@@ -221,11 +301,15 @@ entry:
 define void @fptosi_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i64
+; PPC970: fptosi_float_i64
   %b.addr = alloca i64, align 4
   %conv = fptosi float %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: ld
   store i64 %conv, i64* %b.addr, align 4
   ret void
 }
@@ -233,11 +317,15 @@ entry:
 define void @fptosi_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i32
+; PPC970: fptosi_double_i32
   %b.addr = alloca i32, align 8
   %conv = fptosi double %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; PPC970: fctiwz
+; PPC970: stfd
+; PPC970: lwa
   store i32 %conv, i32* %b.addr, align 8
   ret void
 }
@@ -245,11 +333,15 @@ entry:
 define void @fptosi_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i64
+; PPC970: fptosi_double_i64
   %b.addr = alloca i64, align 8
   %conv = fptosi double %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: ld
   store i64 %conv, i64* %b.addr, align 8
   ret void
 }
@@ -259,11 +351,15 @@ entry:
 define void @fptoui_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i32
+; PPC970: fptoui_float_i32
   %b.addr = alloca i32, align 4
   %conv = fptoui float %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: lwz
   store i32 %conv, i32* %b.addr, align 4
   ret void
 }
@@ -271,11 +367,13 @@ entry:
 define void @fptoui_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i64
+; PPC970: fptoui_float_i64
   %b.addr = alloca i64, align 4
   %conv = fptoui float %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970-NOT: fctiduz
   store i64 %conv, i64* %b.addr, align 4
   ret void
 }
@@ -283,11 +381,15 @@ entry:
 define void @fptoui_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i32
+; PPC970: fptoui_double_i32
   %b.addr = alloca i32, align 8
   %conv = fptoui double %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: lwz
   store i32 %conv, i32* %b.addr, align 8
   ret void
 }
@@ -295,11 +397,13 @@ entry:
 define void @fptoui_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i64
+; PPC970: fptoui_double_i64
   %b.addr = alloca i64, align 8
   %conv = fptoui double %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970-NOT: fctiduz
   store i64 %conv, i64* %b.addr, align 8
   ret void
 }
diff --git a/test/CodeGen/PowerPC/func-addr.ll b/test/CodeGen/PowerPC/func-addr.ll
new file mode 100644
index 0000000..4533c62
--- /dev/null
+++ b/test/CodeGen/PowerPC/func-addr.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple powerpc64-linux < %s | FileCheck %s
+; RUN: llc -O0 -mtriple powerpc64-linux < %s | FileCheck %s
+
+define void @foo()  {
+  ret void
+}
+declare i32 @bar(i8*)
+
+; CHECK-LABEL: {{^}}zed:
+; CHECK:        addis 3, 2, foo@toc@ha
+; CHECK-NEXT:   addi 3, 3, foo@toc@l
+; CHECK-NEXT:   bl bar
+
+define  void @zed() {
+  call i32 @bar(i8* bitcast (void ()* @foo to i8*))
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/hello-reloc.s b/test/CodeGen/PowerPC/hello-reloc.s
index 1e3fb8f..97dfbb5 100644
--- a/test/CodeGen/PowerPC/hello-reloc.s
+++ b/test/CodeGen/PowerPC/hello-reloc.s
@@ -62,17 +62,17 @@ L_.str:                                 ; @.str
 ; DARWIN-G4-DUMP:AddressSize: 32bit
 ; DARWIN-G4-DUMP:Relocations [
 ; DARWIN-G4-DUMP:  Section __text {
-; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 -
-; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
-; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 0x3
+; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x74
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 0x14
+; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x74
+; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 0x14
 ; DARWIN-G4-DUMP:  }
 ; DARWIN-G4-DUMP:  Section __picsymbolstub1 {
-; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
-; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x70
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 0x58
+; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x70
+; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 0x58
 ; DARWIN-G4-DUMP:  }
 ; DARWIN-G4-DUMP:  Section __la_symbol_ptr {
 ; DARWIN-G4-DUMP:    0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/PowerPC/ppc64-altivec-abi.ll b/test/CodeGen/PowerPC/ppc64-altivec-abi.ll
new file mode 100644
index 0000000..0bed329
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-altivec-abi.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Verify that in the 64-bit Linux ABI, vector arguments take up space
+; in the parameter save area.
+
+define i64 @callee(i64 %a, <4 x i32> %b, i64 %c, <4 x i32> %d, i64 %e) {
+entry:
+  ret i64 %e
+}
+; CHECK-LABEL: callee:
+; CHECK: ld 3, 112(1)
+
+define void @caller(i64 %x, <4 x i32> %y) {
+entry:
+  tail call void @test(i64 %x, <4 x i32> %y, i64 %x, <4 x i32> %y, i64 %x)
+  ret void
+}
+; CHECK-LABEL: caller:
+; CHECK: std 3, 112(1)
+
+declare void @test(i64, <4 x i32>, i64, <4 x i32>, i64)
+
diff --git a/test/CodeGen/PowerPC/ppc64-byval-align.ll b/test/CodeGen/PowerPC/ppc64-byval-align.ll
new file mode 100644
index 0000000..0e73cf2
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-byval-align.ll
@@ -0,0 +1,56 @@
+; RUN: llc -O1 < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.test = type { i64, [8 x i8] }
+%struct.pad = type { [8 x i64] }
+
+@gt = common global %struct.test zeroinitializer, align 16
+@gp = common global %struct.pad zeroinitializer, align 8
+
+define signext i32 @callee1(i32 signext %x, %struct.test* byval align 16 nocapture readnone %y, i32 signext %z) {
+entry:
+  ret i32 %z
+}
+; CHECK-LABEL: @callee1
+; CHECK: mr 3, 7
+; CHECK: blr
+
+declare signext i32 @test1(i32 signext, %struct.test* byval align 16, i32 signext)
+define void @caller1(i32 signext %z) {
+entry:
+  %call = tail call signext i32 @test1(i32 signext 0, %struct.test* byval align 16 @gt, i32 signext %z)
+  ret void
+}
+; CHECK-LABEL: @caller1
+; CHECK: mr [[REG:[0-9]+]], 3
+; CHECK: mr 7, [[REG]]
+; CHECK: bl test1
+
+define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
+entry:
+  %x1 = getelementptr inbounds %struct.test* %z, i64 0, i32 0
+  %0 = load i64* %x1, align 16
+  ret i64 %0
+}
+; CHECK-LABEL: @callee2
+; CHECK: ld [[REG:[0-9]+]], 128(1)
+; CHECK: mr 3, [[REG]]
+; CHECK: blr
+
+declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16)
+define void @caller2(i64 %z) {
+entry:
+  %tmp = alloca %struct.test, align 16
+  %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test* %tmp, i64 0, i32 0
+  store i64 %z, i64* %.compoundliteral.sroa.0.0..sroa_idx, align 16
+  %call = call i64 @test2(%struct.pad* byval @gp, i32 signext 0, %struct.test* byval align 16 %tmp)
+  ret void
+}
+; CHECK-LABEL: @caller2
+; CHECK: std 3, [[OFF:[0-9]+]](1)
+; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
+; CHECK: std [[REG]], 128(1)
+; CHECK: bl test2
+
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 1f3bb71..31794be 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -42,12 +42,18 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
   ret void
 }
 
-; Absolute vales should be have the TOC restore 'nop'
+; Absolute values must use the regular indirect call sequence
+; The main purpose of this test is to ensure that BLA is not
+; used on 64-bit SVR4 (as e.g. on Darwin).
 define void @test_abs() nounwind {
 ; CHECK-LABEL: test_abs:
   tail call void inttoptr (i64 1024 to void ()*)() nounwind
-; CHECK: bla 1024
-; CHECK-NEXT: nop
+; CHECK: ld [[FP:[0-9]+]], 1024(0)
+; CHECK: ld 11, 1040(0)
+; CHECK: ld 2, 1032(0)
+; CHECK-NEXT: mtctr [[FP]]
+; CHECK-NEXT: bctrl
+; CHECK-NEXT: ld 2, 40(1)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-smallarg.ll b/test/CodeGen/PowerPC/ppc64-smallarg.ll
new file mode 100644
index 0000000..0d5b078
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 124(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 124(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 156(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 156(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
new file mode 100644
index 0000000..fcb1e92
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 120(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 120(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 152(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 152(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
diff --git a/test/CodeGen/PowerPC/ppcf128-endian.ll b/test/CodeGen/PowerPC/ppcf128-endian.ll
new file mode 100644
index 0000000..2a5f13a
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mcpu=pwr7 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+@g = common global ppc_fp128 0xM00000000000000000000000000000000, align 16
+
+define void @callee(ppc_fp128 %x) {
+entry:
+  %x.addr = alloca ppc_fp128, align 16
+  store ppc_fp128 %x, ppc_fp128* %x.addr, align 16
+  %0 = load ppc_fp128* %x.addr, align 16
+  store ppc_fp128 %0, ppc_fp128* @g, align 16
+  ret void
+}
+; CHECK: @callee
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: stfd 2, 8([[REG]])
+; CHECK: stfd 1, 0([[REG]])
+; CHECK: blr
+
+define void @caller() {
+entry:
+  %0 = load ppc_fp128* @g, align 16
+  call void @test(ppc_fp128 %0)
+  ret void
+}
+; CHECK: @caller
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: lfd 2, 8([[REG]])
+; CHECK: lfd 1, 0([[REG]])
+; CHECK: bl test
+
+declare void @test(ppc_fp128)
+
+define void @caller_const() {
+entry:
+  call void @test(ppc_fp128 0xM3FF00000000000000000000000000000)
+  ret void
+}
+; CHECK: .LCPI[[LC:[0-9]+]]_0:
+; CHECK: .long   1065353216
+; CHECK: .LCPI[[LC]]_1:
+; CHECK: .long   0
+; CHECK: @caller_const
+; CHECK: addi [[REG0:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_0
+; CHECK: addi [[REG1:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_1
+; CHECK: lfs 1, 0([[REG0]])
+; CHECK: lfs 2, 0([[REG1]])
+; CHECK: bl test
+
+define ppc_fp128 @result() {
+entry:
+  %0 = load ppc_fp128* @g, align 16
+  ret ppc_fp128 %0
+}
+; CHECK: @result
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: lfd 1, 0([[REG]])
+; CHECK: lfd 2, 8([[REG]])
+; CHECK: blr
+
+define void @use_result() {
+entry:
+  %call = tail call ppc_fp128 @test_result() #3
+  store ppc_fp128 %call, ppc_fp128* @g, align 16
+  ret void
+}
+; CHECK: @use_result
+; CHECK: bl test_result
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: stfd 2, 8([[REG]])
+; CHECK: stfd 1, 0([[REG]])
+; CHECK: blr
+
+declare ppc_fp128 @test_result()
+
+define void @caller_result() {
+entry:
+  %call = tail call ppc_fp128 @test_result()
+  tail call void @test(ppc_fp128 %call)
+  ret void
+}
+; CHECK: @caller_result
+; CHECK: bl test_result
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl test
+; CHECK-NEXT: nop
+
+define i128 @convert_from(ppc_fp128 %x) {
+entry:
+  %0 = bitcast ppc_fp128 %x to i128
+  ret i128 %0
+}
+; CHECK: @convert_from
+; CHECK: stfd 1, [[OFF1:.*]](1)
+; CHECK: stfd 2, [[OFF2:.*]](1)
+; CHECK: ld 3, [[OFF1]](1)
+; CHECK: ld 4, [[OFF2]](1)
+; CHECK: blr
+
+define ppc_fp128 @convert_to(i128 %x) {
+entry:
+  %0 = bitcast i128 %x to ppc_fp128
+  ret ppc_fp128 %0
+}
+; CHECK: @convert_to
+; CHECK: std 3, [[OFF1:.*]](1)
+; CHECK: std 4, [[OFF2:.*]](1)
+; CHECK: lfd 1, [[OFF1]](1)
+; CHECK: lfd 2, [[OFF2]](1)
+; CHECK: blr
+
+define ppc_fp128 @convert_to2(i128 %x) {
+entry:
+  %shl = shl i128 %x, 1
+  %0 = bitcast i128 %shl to ppc_fp128
+  ret ppc_fp128 %0
+}
+
+; CHECK: @convert_to
+; CHECK: std 3, [[OFF1:.*]](1)
+; CHECK: std 4, [[OFF2:.*]](1)
+; CHECK: lfd 1, [[OFF1]](1)
+; CHECK: lfd 2, [[OFF2]](1)
+; CHECK: blr
+
+define double @convert_vector(<4 x i32> %x) {
+entry:
+  %cast = bitcast <4 x i32> %x to ppc_fp128
+  %conv = fptrunc ppc_fp128 %cast to double
+  ret double %conv
+}
+; CHECK: @convert_vector
+; CHECK: addi [[REG:[0-9]+]], 1, [[OFF:.*]]
+; CHECK: stvx 2, 0, [[REG]]
+; CHECK: lfd 1, [[OFF]](1)
+; CHECK: blr
+
+declare void @llvm.va_start(i8*)
+
+define double @vararg(i32 %a, ...) {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %arg = va_arg i8** %va, ppc_fp128
+  %conv = fptrunc ppc_fp128 %arg to double
+  ret double %conv
+}
+; CHECK: @vararg
+; CHECK: lfd 1, 0({{[0-9]+}})
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/resolvefi-basereg.ll b/test/CodeGen/PowerPC/resolvefi-basereg.ll
new file mode 100644
index 0000000..62c2d13
--- /dev/null
+++ b/test/CodeGen/PowerPC/resolvefi-basereg.ll
@@ -0,0 +1,362 @@
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+
+; Due to a bug in resolveFrameIndex we ended up with invalid addresses
+; containing a base register 0.  Verify that this no longer happens.
+; CHECK-NOT: (0)
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1998 = type { [2 x i32*], i64, i64, double, i16, i32, [29 x %struct.anon], i16, i8, i32, [8 x i8] }
+%struct.anon = type { [16 x double], i32, i16, i32, [3 x i8], [6 x i8], [4 x i32], i8 }
+
+@info = global %struct.Info zeroinitializer, align 8
+@fails = global i32 0, align 4
+@intarray = global [256 x i32] zeroinitializer, align 4
+@s1998 = global %struct.S1998 zeroinitializer, align 16
+@a1998 = external global [5 x %struct.S1998]
+
+define void @test1998() {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %agg.tmp = alloca %struct.S1998, align 16
+  %agg.tmp111 = alloca %struct.S1998, align 16
+  %agg.tmp112 = alloca %struct.S1998, align 16
+  %agg.tmp113 = alloca %struct.S1998, align 16
+  %agg.tmp114 = alloca %struct.S1998, align 16
+  %agg.tmp115 = alloca %struct.S1998, align 16
+  %agg.tmp116 = alloca %struct.S1998, align 16
+  %agg.tmp117 = alloca %struct.S1998, align 16
+  %agg.tmp118 = alloca %struct.S1998, align 16
+  %agg.tmp119 = alloca %struct.S1998, align 16
+  call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.S1998* @s1998 to i8*), i8 0, i64 5168, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8 0, i64 25840, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.Info* @info to i8*), i8 0, i64 832, i32 8, i1 false)
+  store i8* bitcast (%struct.S1998* @s1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 2), align 8
+  store i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 3), align 8
+  store i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 4), align 8
+  store i64 5168, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 6), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 9), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 10), align 8
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %sub = sub i64 %0, 1
+  %and = and i64 ptrtoint (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i64), %sub
+  %tobool = icmp ne i64 %and, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* @fails, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @fails, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %j, align 4
+  %2 = load i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom
+  store i8* bitcast (i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1) to i8*), i8** %arrayidx, align 8
+  %3 = load i32* %i, align 4
+  %idxprom1 = sext i32 %3 to i64
+  %arrayidx2 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom1
+  store i64 8, i64* %arrayidx2, align 8
+  %4 = load i32* %i, align 4
+  %idxprom3 = sext i32 %4 to i64
+  %arrayidx4 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom3
+  store i64 8, i64* %arrayidx4, align 8
+  store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 190), i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1), align 8
+  store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 241), i32** getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 0, i64 1), align 8
+  %5 = load i32* %i, align 4
+  %inc5 = add nsw i32 %5, 1
+  store i32 %inc5, i32* %i, align 4
+  %6 = load i32* %i, align 4
+  %idxprom6 = sext i32 %6 to i64
+  %arrayidx7 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom6
+  store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1) to i8*), i8** %arrayidx7, align 8
+  %7 = load i32* %i, align 4
+  %idxprom8 = sext i32 %7 to i64
+  %arrayidx9 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom8
+  store i64 8, i64* %arrayidx9, align 8
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom10
+  store i64 8, i64* %arrayidx11, align 8
+  store i64 -3866974208859106459, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1), align 8
+  store i64 -185376695371304091, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 1), align 8
+  %9 = load i32* %i, align 4
+  %inc12 = add nsw i32 %9, 1
+  store i32 %inc12, i32* %i, align 4
+  %10 = load i32* %i, align 4
+  %idxprom13 = sext i32 %10 to i64
+  %arrayidx14 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom13
+  store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2) to i8*), i8** %arrayidx14, align 8
+  %11 = load i32* %i, align 4
+  %idxprom15 = sext i32 %11 to i64
+  %arrayidx16 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom15
+  store i64 8, i64* %arrayidx16, align 8
+  %12 = load i32* %i, align 4
+  %idxprom17 = sext i32 %12 to i64
+  %arrayidx18 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom17
+  store i64 8, i64* %arrayidx18, align 8
+  store i64 -963638028680427187, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2), align 8
+  store i64 7510542175772455554, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 2), align 8
+  %13 = load i32* %i, align 4
+  %inc19 = add nsw i32 %13, 1
+  store i32 %inc19, i32* %i, align 4
+  %14 = load i32* %i, align 4
+  %idxprom20 = sext i32 %14 to i64
+  %arrayidx21 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom20
+  store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3) to i8*), i8** %arrayidx21, align 8
+  %15 = load i32* %i, align 4
+  %idxprom22 = sext i32 %15 to i64
+  %arrayidx23 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom22
+  store i64 8, i64* %arrayidx23, align 8
+  %16 = load i32* %i, align 4
+  %idxprom24 = sext i32 %16 to i64
+  %arrayidx25 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom24
+  store i64 16, i64* %arrayidx25, align 8
+  store double 0xC0F8783300000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3), align 16
+  store double 0xC10DF3CCC0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 3), align 16
+  %17 = load i32* %i, align 4
+  %inc26 = add nsw i32 %17, 1
+  store i32 %inc26, i32* %i, align 4
+  %18 = load i32* %i, align 4
+  %idxprom27 = sext i32 %18 to i64
+  %arrayidx28 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom27
+  store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4) to i8*), i8** %arrayidx28, align 8
+  %19 = load i32* %i, align 4
+  %idxprom29 = sext i32 %19 to i64
+  %arrayidx30 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom29
+  store i64 2, i64* %arrayidx30, align 8
+  %20 = load i32* %i, align 4
+  %idxprom31 = sext i32 %20 to i64
+  %arrayidx32 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom31
+  store i64 2, i64* %arrayidx32, align 8
+  store i16 -15897, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4), align 2
+  store i16 30935, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 4), align 2
+  %21 = load i32* %i, align 4
+  %inc33 = add nsw i32 %21, 1
+  store i32 %inc33, i32* %i, align 4
+  store i32 -419541644, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 5), align 4
+  store i32 2125926812, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 5), align 4
+  %22 = load i32* %j, align 4
+  %inc34 = add nsw i32 %22, 1
+  store i32 %inc34, i32* %j, align 4
+  %23 = load i32* %i, align 4
+  %idxprom35 = sext i32 %23 to i64
+  %arrayidx36 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom35
+  store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0) to i8*), i8** %arrayidx36, align 8
+  %24 = load i32* %i, align 4
+  %idxprom37 = sext i32 %24 to i64
+  %arrayidx38 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom37
+  store i64 8, i64* %arrayidx38, align 8
+  %25 = load i32* %i, align 4
+  %idxprom39 = sext i32 %25 to i64
+  %arrayidx40 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom39
+  store i64 8, i64* %arrayidx40, align 8
+  store double 0xC0FC765780000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0), align 8
+  store double 0xC1025CD7A0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 0, i64 0), align 8
+  %26 = load i32* %i, align 4
+  %inc41 = add nsw i32 %26, 1
+  store i32 %inc41, i32* %i, align 4
+  %bf.load = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+  %bf.clear = and i32 %bf.load, 7
+  %bf.set = or i32 %bf.clear, 16
+  store i32 %bf.set, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+  %bf.load42 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+  %bf.clear43 = and i32 %bf.load42, 7
+  %bf.set44 = or i32 %bf.clear43, 24
+  store i32 %bf.set44, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+  %27 = load i32* %j, align 4
+  %inc45 = add nsw i32 %27, 1
+  store i32 %inc45, i32* %j, align 4
+  %bf.load46 = load i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+  %bf.clear47 = and i16 %bf.load46, 127
+  store i16 %bf.clear47, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+  %bf.load48 = load i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+  %bf.clear49 = and i16 %bf.load48, 127
+  store i16 %bf.clear49, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+  %28 = load i32* %j, align 4
+  %inc50 = add nsw i32 %28, 1
+  store i32 %inc50, i32* %j, align 4
+  %bf.load51 = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+  %bf.clear52 = and i32 %bf.load51, 63
+  store i32 %bf.clear52, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+  %bf.load53 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+  %bf.clear54 = and i32 %bf.load53, 63
+  %bf.set55 = or i32 %bf.clear54, 64
+  store i32 %bf.set55, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+  %29 = load i32* %j, align 4
+  %inc56 = add nsw i32 %29, 1
+  store i32 %inc56, i32* %j, align 4
+  %bf.load57 = load i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.clear58 = and i24 %bf.load57, 63
+  store i24 %bf.clear58, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.load59 = load i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.clear60 = and i24 %bf.load59, 63
+  store i24 %bf.clear60, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+  %30 = load i32* %j, align 4
+  %inc61 = add nsw i32 %30, 1
+  store i32 %inc61, i32* %j, align 4
+  %31 = load i32* %i, align 4
+  %idxprom62 = sext i32 %31 to i64
+  %arrayidx63 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom62
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), i8** %arrayidx63, align 8
+  %32 = load i32* %i, align 4
+  %idxprom64 = sext i32 %32 to i64
+  %arrayidx65 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom64
+  store i64 1, i64* %arrayidx65, align 8
+  %33 = load i32* %i, align 4
+  %idxprom66 = sext i32 %33 to i64
+  %arrayidx67 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom66
+  store i64 1, i64* %arrayidx67, align 8
+  store i8 -83, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), align 1
+  store i8 -67, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 5), align 1
+  %34 = load i32* %i, align 4
+  %inc68 = add nsw i32 %34, 1
+  store i32 %inc68, i32* %i, align 4
+  %35 = load i32* %i, align 4
+  %idxprom69 = sext i32 %35 to i64
+  %arrayidx70 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom69
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), i8** %arrayidx70, align 8
+  %36 = load i32* %i, align 4
+  %idxprom71 = sext i32 %36 to i64
+  %arrayidx72 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom71
+  store i64 1, i64* %arrayidx72, align 8
+  %37 = load i32* %i, align 4
+  %idxprom73 = sext i32 %37 to i64
+  %arrayidx74 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom73
+  store i64 1, i64* %arrayidx74, align 8
+  store i8 34, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), align 1
+  store i8 64, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 1), align 1
+  %38 = load i32* %i, align 4
+  %inc75 = add nsw i32 %38, 1
+  store i32 %inc75, i32* %i, align 4
+  %39 = load i32* %i, align 4
+  %idxprom76 = sext i32 %39 to i64
+  %arrayidx77 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom76
+  store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3) to i8*), i8** %arrayidx77, align 8
+  %40 = load i32* %i, align 4
+  %idxprom78 = sext i32 %40 to i64
+  %arrayidx79 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom78
+  store i64 4, i64* %arrayidx79, align 8
+  %41 = load i32* %i, align 4
+  %idxprom80 = sext i32 %41 to i64
+  %arrayidx81 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom80
+  store i64 4, i64* %arrayidx81, align 8
+  store i32 -3, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3), align 4
+  store i32 -3, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 6, i64 3), align 4
+  %42 = load i32* %i, align 4
+  %inc82 = add nsw i32 %42, 1
+  store i32 %inc82, i32* %i, align 4
+  %43 = load i32* %i, align 4
+  %idxprom83 = sext i32 %43 to i64
+  %arrayidx84 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom83
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), i8** %arrayidx84, align 8
+  %44 = load i32* %i, align 4
+  %idxprom85 = sext i32 %44 to i64
+  %arrayidx86 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom85
+  store i64 1, i64* %arrayidx86, align 8
+  %45 = load i32* %i, align 4
+  %idxprom87 = sext i32 %45 to i64
+  %arrayidx88 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom87
+  store i64 1, i64* %arrayidx88, align 8
+  store i8 106, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), align 1
+  store i8 -102, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 7), align 1
+  %46 = load i32* %i, align 4
+  %inc89 = add nsw i32 %46, 1
+  store i32 %inc89, i32* %i, align 4
+  %47 = load i32* %i, align 4
+  %idxprom90 = sext i32 %47 to i64
+  %arrayidx91 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom90
+  store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7) to i8*), i8** %arrayidx91, align 8
+  %48 = load i32* %i, align 4
+  %idxprom92 = sext i32 %48 to i64
+  %arrayidx93 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom92
+  store i64 2, i64* %arrayidx93, align 8
+  %49 = load i32* %i, align 4
+  %idxprom94 = sext i32 %49 to i64
+  %arrayidx95 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom94
+  store i64 2, i64* %arrayidx95, align 8
+  store i16 29665, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7), align 2
+  store i16 7107, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 7), align 2
+  %50 = load i32* %i, align 4
+  %inc96 = add nsw i32 %50, 1
+  store i32 %inc96, i32* %i, align 4
+  %51 = load i32* %i, align 4
+  %idxprom97 = sext i32 %51 to i64
+  %arrayidx98 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom97
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), i8** %arrayidx98, align 8
+  %52 = load i32* %i, align 4
+  %idxprom99 = sext i32 %52 to i64
+  %arrayidx100 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom99
+  store i64 1, i64* %arrayidx100, align 8
+  %53 = load i32* %i, align 4
+  %idxprom101 = sext i32 %53 to i64
+  %arrayidx102 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom101
+  store i64 1, i64* %arrayidx102, align 8
+  store i8 52, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), align 1
+  store i8 -86, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 8), align 1
+  %54 = load i32* %i, align 4
+  %inc103 = add nsw i32 %54, 1
+  store i32 %inc103, i32* %i, align 4
+  %55 = load i32* %i, align 4
+  %idxprom104 = sext i32 %55 to i64
+  %arrayidx105 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom104
+  store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9) to i8*), i8** %arrayidx105, align 8
+  %56 = load i32* %i, align 4
+  %idxprom106 = sext i32 %56 to i64
+  %arrayidx107 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom106
+  store i64 4, i64* %arrayidx107, align 8
+  %57 = load i32* %i, align 4
+  %idxprom108 = sext i32 %57 to i64
+  %arrayidx109 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom108
+  store i64 4, i64* %arrayidx109, align 8
+  store i32 -54118453, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9), align 4
+  store i32 1668755823, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 9), align 4
+  %58 = load i32* %i, align 4
+  %inc110 = add nsw i32 %58, 1
+  store i32 %inc110, i32* %i, align 4
+  store i32 %inc110, i32* %tmp
+  %59 = load i32* %tmp
+  %60 = load i32* %i, align 4
+  store i32 %60, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 0), align 4
+  %61 = load i32* %j, align 4
+  store i32 %61, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 1), align 4
+  %62 = bitcast %struct.S1998* %agg.tmp111 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %62, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %63 = bitcast %struct.S1998* %agg.tmp112 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %63, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  call void @check1998(%struct.S1998* sret %agg.tmp, %struct.S1998* byval align 16 %agg.tmp111, %struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 1), %struct.S1998* byval align 16 %agg.tmp112)
+  call void @checkx1998(%struct.S1998* byval align 16 %agg.tmp)
+  %64 = bitcast %struct.S1998* %agg.tmp113 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %64, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %65 = bitcast %struct.S1998* %agg.tmp114 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %65, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  %66 = bitcast %struct.S1998* %agg.tmp115 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %66, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  call void (i32, ...)* @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
+  %67 = bitcast %struct.S1998* %agg.tmp116 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %67, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %68 = bitcast %struct.S1998* %agg.tmp117 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %68, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %69 = bitcast %struct.S1998* %agg.tmp118 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %69, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  %70 = bitcast %struct.S1998* %agg.tmp119 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %70, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  call void (i32, ...)* @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+declare void @check1998(%struct.S1998* sret, %struct.S1998* byval align 16, %struct.S1998*, %struct.S1998* byval align 16)
+declare void @check1998va(i32 signext, ...)
+declare void @checkx1998(%struct.S1998* byval align 16 %arg)
+
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
index 7c51b67..bee3ac3 100644
--- a/test/CodeGen/PowerPC/svr4-redzone.ll
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -36,4 +36,4 @@ entry:
 ; PPC32: stwu 1, -240(1)
 
 ; PPC64-LABEL: bigstack:
-; PPC64: stdu 1, -352(1)
+; PPC64: stdu 1, -288(1)
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 4bce8c8..2733089 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -36,7 +36,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v16i8 since it is a altivec native type
+; Additional tests for v16i8 since it is a altivec native type
 
 define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %cmp = icmp eq <16 x i8> %x, %y
@@ -165,7 +165,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v8i16 since it is an altivec native type
+; Additional tests for v8i16 since it is an altivec native type
 
 define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -298,7 +298,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4si32 since it is an altivec native type
+; Additional tests for v4si32 since it is an altivec native type
 
 define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -449,7 +449,7 @@ entry:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4f32 since it is a altivec native type
+; Additional tests for v4f32 since it is a altivec native type
 
 define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index d7ed64a..304a84d 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
@@ -8,6 +10,8 @@ target triple = "powerpc-apple-darwin8"
 
 define void @foo(i32 %x, ...) {
 entry:
+; CHECK: foo:
+; CHECK-LE: foo:
 	%x_addr = alloca i32		; <i32*> [#uses=1]
 	%ap = alloca i8*		; <i8**> [#uses=3]
 	%ap.0 = alloca i8*		; <i8**> [#uses=3]
@@ -27,6 +31,10 @@ entry:
 	%tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
 	%tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
 	%tmp10 = load <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
+; CHECK: lvsl
+; CHECK: vperm
+; CHECK-LE: lvsr
+; CHECK-LE: vperm
 	store <16 x i8> %tmp10, <16 x i8>* %tmp8, align 4
 	br label %return
 
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index c376751..8a44815 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -9,6 +11,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 ; CHECK-LABEL: test_v4i32:
 ; CHECK: vmsumuhm
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v4i32:
+; CHECK-LE: vmsumuhm
+; CHECK-LE-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -19,6 +24,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 ; CHECK-LABEL: test_v8i16:
 ; CHECK: vmladduhm
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v8i16:
+; CHECK-LE: vmladduhm
+; CHECK-LE-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -30,6 +38,11 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 ; CHECK: vmuloub
 ; CHECK: vmuleub
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v16i8:
+; CHECK-LE: vmuloub [[REG1:[0-9]+]]
+; CHECK-LE: vmuleub [[REG2:[0-9]+]]
+; CHECK-LE: vperm {{[0-9]+}}, [[REG2]], [[REG1]]
+; CHECK-LE-NOT: mullw
 
 define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 	%tmp = load <4 x float>* %X
@@ -44,3 +57,7 @@ define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 ; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
 ; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
 ; CHECK: vmaddfp
+; CHECK-LE-LABEL: test_float:
+; CHECK-LE: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK-LE: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK-LE: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll
new file mode 100644
index 0000000..635721c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+
+define void @VPKUHUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VPKUHUM_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK: vpkuhum
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUHUM_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VPKUHUM_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK: vpkuhum
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUWUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VPKUWUM_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29>
+; CHECK: vpkuwum
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUWUM_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VPKUWUM_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
+; CHECK: vpkuwum
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLB_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLB_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+; CHECK: vmrglb
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLB_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLB_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; CHECK: vmrglb
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHB_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHB_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK: vmrghb
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHB_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHB_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; CHECK: vmrghb
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLH_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLH_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23>
+; CHECK: vmrglh
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLH_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLH_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 4, i32 5, i32 4, i32 5, i32 6, i32 7, i32 6, i32 7>
+; CHECK: vmrglh
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHH_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHH_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
+; CHECK: vmrghh
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHH_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHH_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13, i32 14, i32 15, i32 14, i32 15>
+; CHECK: vmrghh
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLW_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLW_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
+; CHECK: vmrglw
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLW_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLW_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+; CHECK: vmrglw
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHW_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHW_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
+; CHECK: vmrghw
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHW_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHW_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+; CHECK: vmrghw
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VSLDOI_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VSLDOI_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+; CHECK: vsldoi
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VSLDOI_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VSLDOI_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+; CHECK: vsldoi
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
diff --git a/test/CodeGen/PowerPC/vperm-instcombine.ll b/test/CodeGen/PowerPC/vperm-instcombine.ll
new file mode 100644
index 0000000..d9084c8
--- /dev/null
+++ b/test/CodeGen/PowerPC/vperm-instcombine.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <16 x i8> @foo() nounwind ssp {
+; CHECK: @foo
+;; Arguments are {0,1,...,15},{16,17,...,31},{30,28,26,...,0}
+  %1 = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>, <4 x i32> <i32 319951120, i32 387323156, i32 454695192, i32 522067228>, <16 x i8> <i8 30, i8 28, i8 26, i8 24, i8 22, i8 20, i8 18, i8 16, i8 14, i8 12, i8 10, i8 8, i8 6, i8 4, i8 2, i8 0>)
+  %2 = bitcast <4 x i32> %1 to <16 x i8>
+  ret <16 x i8> %2
+;; Revised arguments are {16,17,...31},{0,1,...,15},{1,3,5,...,31}
+;; optimized into the following:
+; CHECK: ret <16 x i8> <i8 17, i8 19, i8 21, i8 23, i8 25, i8 27, i8 29, i8 31, i8 1, i8 3, i8 5, i8 7, i8 9, i8 11, i8 13, i8 15>
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
diff --git a/test/CodeGen/PowerPC/vperm-lowering.ll b/test/CodeGen/PowerPC/vperm-lowering.ll
new file mode 100644
index 0000000..d55d26c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vperm-lowering.ll
@@ -0,0 +1,66 @@
+; RUN: llc -O0 -fast-isel=false -mcpu=ppc64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <16 x i8> @foo() nounwind ssp {
+  %1 = shufflevector <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 3, i32 8, i32 13, i32 18, i32 23, i32 28, i32 1, i32 6, i32 11>
+  ret <16 x i8> %1
+}
+
+; CHECK: .LCPI0_0:
+; CHECK: .byte 31
+; CHECK: .byte 26
+; CHECK: .byte 21
+; CHECK: .byte 16
+; CHECK: .byte 11
+; CHECK: .byte 6
+; CHECK: .byte 1
+; CHECK: .byte 28
+; CHECK: .byte 23
+; CHECK: .byte 18
+; CHECK: .byte 13
+; CHECK: .byte 8
+; CHECK: .byte 3
+; CHECK: .byte 30
+; CHECK: .byte 25
+; CHECK: .byte 20
+; CHECK: .LCPI0_1:
+; CHECK: .byte 0
+; CHECK: .byte 1
+; CHECK: .byte 2
+; CHECK: .byte 3
+; CHECK: .byte 4
+; CHECK: .byte 5
+; CHECK: .byte 6
+; CHECK: .byte 7
+; CHECK: .byte 8
+; CHECK: .byte 9
+; CHECK: .byte 10
+; CHECK: .byte 11
+; CHECK: .byte 12
+; CHECK: .byte 13
+; CHECK: .byte 14
+; CHECK: .byte 15
+; CHECK: .LCPI0_2:
+; CHECK: .byte 16
+; CHECK: .byte 17
+; CHECK: .byte 18
+; CHECK: .byte 19
+; CHECK: .byte 20
+; CHECK: .byte 21
+; CHECK: .byte 22
+; CHECK: .byte 23
+; CHECK: .byte 24
+; CHECK: .byte 25
+; CHECK: .byte 26
+; CHECK: .byte 27
+; CHECK: .byte 28
+; CHECK: .byte 29
+; CHECK: .byte 30
+; CHECK: .byte 31
+; CHECK: foo:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
+; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
+; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index c9eaeda..f733d90 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -70,9 +70,9 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
 }
 
 ; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
-; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
 ; SI-NOT: ADDC
 ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 ; SI: BUFFER_STORE_DWORD [[VRESULT]],
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index ee9bc83..cf11481 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,13 +1,12 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-;EG-CHECK: @test2
-;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: @test2
+; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test2
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK: @test4
-;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: @test4
+; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test4
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -38,3 +36,75 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: @s_and_i32
+; SI: S_AND_B32
+define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %and = and i32 %a, %b
+  store i32 %and, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_and_constant_i32
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
+define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
+  %and = and i32 %a, 1234567
+  store i32 %and, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_and_i32
+; SI: V_AND_B32
+define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %and = and i32 %a, %b
+  store i32 %and, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_and_constant_i32
+; SI: V_AND_B32
+define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %and = and i32 %a, 1234567
+  store i32 %and, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_and_i64
+; SI: S_AND_B64
+define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %and = and i64 %a, %b
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_and_constant_i64
+; SI: S_AND_B64
+define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+  %and = and i64 %a, 281474976710655
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_and_i64
+; SI: V_AND_B32
+; SI: V_AND_B32
+define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %b = load i64 addrspace(1)* %bptr, align 8
+  %and = and i64 %a, %b
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_and_constant_i64
+; SI: V_AND_B32
+; SI: V_AND_B32
+define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %and = and i64 %a, 1234567
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index c2362da..3230353 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -10,7 +10,12 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 
 ; SI-LABEL: @test_private_array_ptr_calc:
 ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-; SI: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
+;
+; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
+; alloca to a vector.  It currently fails because it does not know how
+; to interpret:
+; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %alloca = alloca [4 x i32], i32 4, align 16
   %tid = call i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll
new file mode 100644
index 0000000..eb9539e
--- /dev/null
+++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
+; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
+; SI: S_ENDPGM
+define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+  %result = extractvalue { i32, i1 } %pair, 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i64_offset:
+; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: S_MOV_B64  s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
+; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
+; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
+; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
+  %result = extractvalue { i64, i1 } %pair, 0
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index cb0242c..c26f9cd 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -1,23 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; R600-CHECK-LABEL: @atomic_add_local
-; R600-CHECK: LDS_ADD *
-; SI-CHECK-LABEL: @atomic_add_local
-; SI-CHECK: DS_ADD_U32_RTN
+; FUNC-LABEL: @atomic_add_local
+; R600: LDS_ADD *
+; SI: DS_ADD_RTN_U32
 define void @atomic_add_local(i32 addrspace(3)* %local) {
-entry:
-   %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+   %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
    ret void
 }
 
-; R600-CHECK-LABEL: @atomic_add_ret_local
-; R600-CHECK: LDS_ADD_RET *
-; SI-CHECK-LABEL: @atomic_add_ret_local
-; SI-CHECK: DS_ADD_U32_RTN
+; FUNC-LABEL: @atomic_add_local_const_offset
+; R600: LDS_ADD *
+; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
+  %gep = getelementptr i32 addrspace(3)* %local, i32 4
+  %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: @atomic_add_ret_local
+; R600: LDS_ADD_RET *
+; SI: DS_ADD_RTN_U32
 define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
-entry:
-  %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
-  store i32 %0, i32 addrspace(1)* %out
+  %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @atomic_add_ret_local_const_offset
+; R600: LDS_ADD_RET *
+; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+  %gep = getelementptr i32 addrspace(3)* %local, i32 5
+  %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
+  store i32 %val, i32 addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 7c26e52..3569d91 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -1,23 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; R600-CHECK-LABEL: @atomic_sub_local
-; R600-CHECK: LDS_SUB *
-; SI-CHECK-LABEL: @atomic_sub_local
-; SI-CHECK: DS_SUB_U32_RTN
+; FUNC-LABEL: @atomic_sub_local
+; R600: LDS_SUB *
+; SI: DS_SUB_RTN_U32
 define void @atomic_sub_local(i32 addrspace(3)* %local) {
-entry:
-   %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+   %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
    ret void
 }
 
-; R600-CHECK-LABEL: @atomic_sub_ret_local
-; R600-CHECK: LDS_SUB_RET *
-; SI-CHECK-LABEL: @atomic_sub_ret_local
-; SI-CHECK: DS_SUB_U32_RTN
+; FUNC-LABEL: @atomic_sub_local_const_offset
+; R600: LDS_SUB *
+; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
+  %gep = getelementptr i32 addrspace(3)* %local, i32 4
+  %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: @atomic_sub_ret_local
+; R600: LDS_SUB_RET *
+; SI: DS_SUB_RTN_U32
 define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
-entry:
-  %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
-  store i32 %0, i32 addrspace(1)* %out
+  %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @atomic_sub_ret_local_const_offset
+; R600: LDS_SUB_RET *
+; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+  %gep = getelementptr i32 addrspace(3)* %local, i32 5
+  %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
+  store i32 %val, i32 addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 6b68376..511e8ef 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -101,7 +101,7 @@ IF137:                                            ; preds = %main_body
   %88 = insertelement <4 x float> %87, float %32, i32 2
   %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
   %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
-  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
   %92 = fmul float %30, %91
   %93 = fmul float %31, %91
   %94 = fmul float %32, %91
@@ -344,7 +344,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %325 = insertelement <4 x float> %324, float %318, i32 2
   %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
   %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
-  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
   %329 = fmul float %314, %328
   %330 = fmul float %316, %328
   %331 = fmul float %318, %328
@@ -377,7 +377,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %358 = insertelement <4 x float> %357, float %45, i32 2
   %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
   %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
-  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
   %362 = fmul float %45, %361
   %363 = call float @fabs(float %362)
   %364 = fmul float %176, 0x3FECCCCCC0000000
@@ -403,7 +403,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %384 = insertelement <4 x float> %383, float %45, i32 2
   %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
   %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
-  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
   %388 = fmul float %45, %387
   %389 = call float @fabs(float %388)
   %390 = fmul float %176, 0x3FF51EB860000000
@@ -1041,7 +1041,7 @@ IF179:                                            ; preds = %ENDIF175
   %896 = insertelement <4 x float> %895, float %45, i32 2
   %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
   %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
-  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
   %900 = fmul float %45, %899
   %901 = call float @fabs(float %900)
   %902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1150,7 +1150,7 @@ ENDIF178:                                         ; preds = %ENDIF175, %IF179
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 5bfc008..0be79e6 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; This test just checks that the compiler doesn't crash.
-; CHECK-LABEL: @v32i8_to_v8i32
-; CHECK: S_ENDPGM
 
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+; FUNC-LABEL: @v32i8_to_v8i32
+; SI: S_ENDPGM
 define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
 entry:
   %1 = load <32 x i8> addrspace(2)* %0
@@ -15,12 +17,8 @@ entry:
   ret void
 }
 
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-
-; CHECK-LABEL: @i8ptr_v16i8ptr
-; CHECK: S_ENDPGM
+; FUNC-LABEL: @i8ptr_v16i8ptr
+; SI: S_ENDPGM
 define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
@@ -28,3 +26,53 @@ entry:
   store <16 x i8> %1, <16 x i8> addrspace(1)* %out
   ret void
 }
+
+define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+  %load = load float addrspace(1)* %in, align 4
+  %bc = bitcast float %load to <2 x i16>
+  store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+  %load = load <2 x i16> addrspace(1)* %in, align 4
+  %bc = bitcast <2 x i16> %load to float
+  store float %bc, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %bc = bitcast <4 x i8> %load to i32
+  store i32 %bc, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %bc = bitcast i32 %load to <4 x i8>
+  store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bitcast_v2i32_to_f64
+; SI: S_ENDPGM
+define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %add = add <2 x i32> %val, <i32 4, i32 9>
+  %bc = bitcast <2 x i32> %add to double
+  store double %bc, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @bitcast_f64_to_v2i32
+; SI: S_ENDPGM
+define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
+  %val = load double addrspace(1)* %in, align 8
+  %add = fadd double %val, 4.0
+  %bc = bitcast double %add to <2 x i32>
+  store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
new file mode 100644
index 0000000..6aebe85
--- /dev/null
+++ b/test/CodeGen/R600/bswap.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=r600 -mcpu=SI < %s
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
+
+define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
+  store i32 %bswap, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
+  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
+  store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
+  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
+  store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
+
+define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+  %val = load i64 addrspace(1)* %in, align 8
+  %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
+  store i64 %bswap, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
+  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
+  store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
+  ret void
+}
+
+define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
+  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
+  store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
+  ret void
+}
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
new file mode 100644
index 0000000..15b5188
--- /dev/null
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_ctlz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
new file mode 100644
index 0000000..15be8e1
--- /dev/null
+++ b/test/CodeGen/R600/ctpop.ll
@@ -0,0 +1,284 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
+
+; FUNC-LABEL: @s_ctpop_i32:
+; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
+; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  store i32 %ctpop, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; XXX - Why 0 in register?
+; FUNC-LABEL: @v_ctpop_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
+; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  store i32 %ctpop, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_add_chain_i32
+; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
+; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
+; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
+; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI-NOT: ADD
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
+  %val0 = load i32 addrspace(1)* %in0, align 4
+  %val1 = load i32 addrspace(1)* %in1, align 4
+  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
+  %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
+  %add = add i32 %ctpop0, %ctpop1
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v2i32:
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
+  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
+  store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v4i32:
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
+  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
+  store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v8i32:
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
+  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
+  store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v16i32:
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: V_BCNT_U32_B32_e32
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
+  %val = load <16 x i32> addrspace(1)* %in, align 32
+  %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
+  store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_inline_constant:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %add = add i32 %ctpop, 4
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %add = add i32 4, %ctpop
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_literal:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
+; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %add = add i32 %ctpop, 99999
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_var:
+; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %add = add i32 %ctpop, %const
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_var_inv:
+; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %add = add i32 %const, %ctpop
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
+; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0
+; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10
+; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BCNT_INT
+define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
+  %gep = getelementptr i32 addrspace(1)* %constptr, i32 4
+  %const = load i32 addrspace(1)* %gep, align 4
+  %add = add i32 %const, %ctpop
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FIXME: We currently disallow SALU instructions in all branches,
+; but there are some cases when the should be allowed.
+
+; FUNC-LABEL: @ctpop_i32_in_br
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+; EG: BCNT_INT
+define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i32 addrspace(1)* %in
+  %2 = call i32 @llvm.ctpop.i32(i32 %1)
+  br label %endif
+
+else:
+  %3 = getelementptr i32 addrspace(1)* %in, i32 1
+  %4 = load i32 addrspace(1)* %3
+  br label %endif
+
+endif:
+  %5 = phi i32 [%2, %if], [%4, %else]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
new file mode 100644
index 0000000..b36ecc6
--- /dev/null
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -0,0 +1,122 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
+declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
+declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
+
+; FUNC-LABEL: @s_ctpop_i64:
+; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]],
+; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
+  %truncctpop = trunc i64 %ctpop to i32
+  store i32 %truncctpop, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_i64:
+; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
+; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %val = load i64 addrspace(1)* %in, align 8
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
+  %truncctpop = trunc i64 %ctpop to i32
+  store i32 %truncctpop, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_ctpop_v2i64:
+; SI: S_BCNT1_I32_B64
+; SI: S_BCNT1_I32_B64
+; SI: S_ENDPGM
+define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
+  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
+  %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
+  store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_ctpop_v4i64:
+; SI: S_BCNT1_I32_B64
+; SI: S_BCNT1_I32_B64
+; SI: S_BCNT1_I32_B64
+; SI: S_BCNT1_I32_B64
+; SI: S_ENDPGM
+define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
+  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
+  %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
+  store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v2i64:
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: S_ENDPGM
+define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
+  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
+  %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
+  store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_ctpop_v4i64:
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: V_BCNT_U32_B32
+; SI: S_ENDPGM
+define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
+  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
+  %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
+  store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FIXME: We currently disallow SALU instructions in all branches,
+; but there are some cases when the should be allowed.
+
+; FUNC-LABEL: @ctpop_i64_in_br
+; SI: V_BCNT_U32_B32_e64 [[BCNT_LO:v[0-9]+]], v{{[0-9]+}}, 0
+; SI: V_BCNT_U32_B32_e32 v[[BCNT:[0-9]+]], v{{[0-9]+}}, [[BCNT_LO]]
+; SI: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0
+; SI: BUFFER_STORE_DWORDX2 v[
+; SI: [[BCNT]]:[[ZERO]]]
+; SI: S_ENDPGM
+define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i64 addrspace(1)* %in
+  %2 = call i64 @llvm.ctpop.i64(i64 %1)
+  br label %endif
+
+else:
+  %3 = getelementptr i64 addrspace(1)* %in, i32 1
+  %4 = load i64 addrspace(1)* %3
+  br label %endif
+
+endif:
+  %5 = phi i64 [%2, %if], [%4, %else]
+  store i64 %5, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
new file mode 100644
index 0000000..cf44f8e
--- /dev/null
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_cttz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
new file mode 100644
index 0000000..fe97a44
--- /dev/null
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -0,0 +1,171 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @load_i8_to_f32:
+; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
+; SI-NOT: BFE
+; SI-NOT: LSHR
+; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
+; SI: BUFFER_STORE_DWORD [[CONV]],
+define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
+  %load = load i8 addrspace(1)* %in, align 1
+  %cvt = uitofp i8 %load to float
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @load_v2i8_to_v2f32:
+; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
+; SI-NOT: BFE
+; SI-NOT: LSHR
+; SI-NOT: AND
+; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <2 x i8> addrspace(1)* %in, align 1
+  %cvt = uitofp <2 x i8> %load to <2 x float>
+  store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: @load_v3i8_to_v3f32:
+; SI-NOT: BFE
+; SI-NOT: V_CVT_F32_UBYTE3_e32
+; SI-DAG: V_CVT_F32_UBYTE2_e32
+; SI-DAG: V_CVT_F32_UBYTE1_e32
+; SI-DAG: V_CVT_F32_UBYTE0_e32
+; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <3 x i8> addrspace(1)* %in, align 1
+  %cvt = uitofp <3 x i8> %load to <3 x float>
+  store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: @load_v4i8_to_v4f32:
+; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
+; SI-NOT: BFE
+; SI-NOT: LSHR
+; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %cvt = uitofp <4 x i8> %load to <4 x float>
+  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
+; for each component, but computeKnownBits doesn't handle vectors very
+; well.
+
+; SI-LABEL: @load_v4i8_to_v4f32_2_uses:
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_CVT_F32_UBYTE0_e32
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_CVT_F32_UBYTE0_e32
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_CVT_F32_UBYTE0_e32
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_CVT_F32_UBYTE0_e32
+
+; XXX - replace with this when v4i8 loads aren't scalarized anymore.
+; XSI: BUFFER_LOAD_DWORD
+; XSI: V_CVT_F32_U32_e32
+; XSI: V_CVT_F32_U32_e32
+; XSI: V_CVT_F32_U32_e32
+; XSI: V_CVT_F32_U32_e32
+; SI: S_ENDPGM
+define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %cvt = uitofp <4 x i8> %load to <4 x float>
+  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
+  store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4
+  ret void
+}
+
+; Make sure this doesn't crash.
+; SI-LABEL: @load_v7i8_to_v7f32:
+; SI: S_ENDPGM
+define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <7 x i8> addrspace(1)* %in, align 1
+  %cvt = uitofp <7 x i8> %load to <7 x float>
+  store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: @load_v8i8_to_v8f32:
+; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
+; SI-NOT: BFE
+; SI-NOT: LSHR
+; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-NOT: BFE
+; SI-NOT: LSHR
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <8 x i8> addrspace(1)* %in, align 1
+  %cvt = uitofp <8 x i8> %load to <8 x float>
+  store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: @i8_zext_inreg_i32_to_f32:
+; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
+; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
+; SI: BUFFER_STORE_DWORD [[CONV]],
+define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 2
+  %inreg = and i32 %add, 255
+  %cvt = uitofp i32 %inreg to float
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @i8_zext_inreg_hi1_to_f32:
+define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %inreg = and i32 %load, 65280
+  %shr = lshr i32 %inreg, 8
+  %cvt = uitofp i32 %shr to float
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+
+; We don't get these ones because of the zext, but instcombine removes
+; them so it shouldn't really matter.
+define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
+  %load = load i8 addrspace(1)* %in, align 1
+  %ext = zext i8 %load to i32
+  %cvt = uitofp i32 %ext to float
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %ext = zext <4 x i8> %load to <4 x i32>
+  %cvt = uitofp <4 x i32> %ext to <4 x float>
+  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll
new file mode 100644
index 0000000..214b2c2
--- /dev/null
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @test_kernel
+; SI: FloatMode: 240
+; SI: IeeeMode: 0
+define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index b8b945f..458363a 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,84 +1,131 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare double @llvm.ceil.f64(double) nounwind readnone
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
+declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
 
-; CI-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
-define void @fceil_f64(double addrspace(1)* %out, double %x) {
-  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
-  store double %y, double addrspace(1)* %out
+; FUNC-LABEL: @fceil_f32:
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_f32(float addrspace(1)* %out, float %x) {
+  %y = call float @llvm.ceil.f32(float %x) nounwind readnone
+  store float %y, float addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
-  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
-  store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v2f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
+  %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
+  store <2 x float> %y, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FIXME-CI-LABEL: @fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
-;   store <3 x double> %y, <3 x double> addrspace(1)* %out
-;   ret void
-; }
+; FUNC-LABEL: @fceil_v3f32:
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
+  %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
+  store <3 x float> %y, <3 x float> addrspace(1)* %out
+  ret void
+}
 
-; CI-LABEL: @fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
-  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
-  store <4 x double> %y, <4 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v4f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
+  %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+  store <4 x float> %y, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
-  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
-  store <8 x double> %y, <8 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v8f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
+  %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
+  store <8 x float> %y, <8 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
-  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
-  store <16 x double> %y, <16 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v16f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
+  %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
+  store <16 x float> %y, <16 x float> addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
new file mode 100644
index 0000000..b42aefa
--- /dev/null
+++ b/test/CodeGen/R600/fceil64.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: @fceil_f64:
+; CI: V_CEIL_F64_e32
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
+define void @fceil_f64(double addrspace(1)* %out, double %x) {
+  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v2f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
+  store <2 x double> %y, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FIXME-FUNC-LABEL: @fceil_v3f64:
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
+;   store <3 x double> %y, <3 x double> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: @fceil_v4f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+  store <4 x double> %y, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v8f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
+  store <8 x double> %y, <8 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v16f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
+  store <16 x double> %y, <16 x double> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll
new file mode 100644
index 0000000..7b4425b
--- /dev/null
+++ b/test/CodeGen/R600/fcopysign.f32.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; Try to identify arg based on higher address.
+; FUNC-LABEL: @test_copysign_f32:
+; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
+; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
+; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
+  %result = call float @llvm.copysign.f32(float %mag, float %sign)
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v2f32:
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+; EG: BFI_INT
+define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind {
+  %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign)
+  store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v4f32:
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+; EG: BFI_INT
+; EG: BFI_INT
+; EG: BFI_INT
+define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind {
+  %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign)
+  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll
new file mode 100644
index 0000000..ea7a6db
--- /dev/null
+++ b/test/CodeGen/R600/fcopysign.f64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.copysign.f64(double, double) nounwind readnone
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
+
+; FUNC-LABEL: @test_copysign_f64:
+; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; SI: S_ENDPGM
+define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
+  %result = call double @llvm.copysign.f64(double %mag, double %sign)
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v2f64:
+; SI: S_ENDPGM
+define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
+  %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
+  store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v4f64:
+; SI: S_ENDPGM
+define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
+  %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
+  store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 51d2b89..31c6116 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.floor.f64(double) nounwind readnone
 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,34 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @ffloor_f64:
+; FUNC-LABEL: @ffloor_f64:
 ; CI: V_FLOOR_F64_e32
+
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_LT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.floor.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ffloor_v2f64:
+; FUNC-LABEL: @ffloor_v2f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @ffloor_v3f64:
+; FIXME-FUNC-LABEL: @ffloor_v3f64:
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
@@ -34,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @ffloor_v4f64:
+; FUNC-LABEL: @ffloor_v4f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -45,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v8f64:
+; FUNC-LABEL: @ffloor_v8f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -60,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v16f64:
+; FUNC-LABEL: @ffloor_v16f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
index 51e9d29..d72ffec 100644
--- a/test/CodeGen/R600/fma.ll
+++ b/test/CodeGen/R600/fma.ll
@@ -1,8 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; CHECK: @fma_f32
-; CHECK: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+; FUNC-LABEL: @fma_f32
+; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
 define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                      float addrspace(1)* %in2, float addrspace(1)* %in3) {
    %r0 = load float addrspace(1)* %in1
@@ -13,11 +20,36 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
    ret void
 }
 
-declare float @llvm.fma.f32(float, float, float)
+; FUNC-LABEL: @fma_v2f32
+; SI: V_FMA_F32
+; SI: V_FMA_F32
+define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
+                       <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
+   %r0 = load <2 x float> addrspace(1)* %in1
+   %r1 = load <2 x float> addrspace(1)* %in2
+   %r2 = load <2 x float> addrspace(1)* %in3
+   %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
+   store <2 x float> %r3, <2 x float> addrspace(1)* %out
+   ret void
+}
 
-; CHECK: @fma_f64
-; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; FUNC-LABEL: @fma_v4f32
+; SI: V_FMA_F32
+; SI: V_FMA_F32
+; SI: V_FMA_F32
+; SI: V_FMA_F32
+define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
+                       <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
+   %r0 = load <4 x float> addrspace(1)* %in1
+   %r1 = load <4 x float> addrspace(1)* %in2
+   %r2 = load <4 x float> addrspace(1)* %in3
+   %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
+   store <4 x float> %r3, <4 x float> addrspace(1)* %out
+   ret void
+}
 
+; FUNC-LABEL: @fma_f64
+; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2, double addrspace(1)* %in3) {
    %r0 = load double addrspace(1)* %in1
@@ -28,4 +60,30 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
    ret void
 }
 
-declare double @llvm.fma.f64(double, double, double)
+; FUNC-LABEL: @fma_v2f64
+; SI: V_FMA_F64
+; SI: V_FMA_F64
+define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
+   %r0 = load <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double> addrspace(1)* %in2
+   %r2 = load <2 x double> addrspace(1)* %in3
+   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
+   store <2 x double> %r3, <2 x double> addrspace(1)* %out
+   ret void
+}
+
+; FUNC-LABEL: @fma_v4f64
+; SI: V_FMA_F64
+; SI: V_FMA_F64
+; SI: V_FMA_F64
+; SI: V_FMA_F64
+define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
+                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
+   %r0 = load <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double> addrspace(1)* %in2
+   %r2 = load <4 x double> addrspace(1)* %in3
+   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
+   store <4 x double> %r3, <4 x double> addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
new file mode 100644
index 0000000..1c1d731
--- /dev/null
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s
+
+; This should have the exactly the same output as the test for rint,
+; so no need to check anything.
+
+declare float @llvm.nearbyint.f32(float) #0
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0
+declare double @llvm.nearbyint.f64(double) #0
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
+
+
+define void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 {
+entry:
+  %0 = call float @llvm.nearbyint.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+entry:
+  %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
+entry:
+  %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_f64(double addrspace(1)* %out, double %in) {
+entry:
+  %0 = call double @llvm.nearbyint.f64(double %in)
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+define void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+entry:
+  %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
+  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+entry:
+  %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in)
+  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/fp16_to_fp32.ll b/test/CodeGen/R600/fp16_to_fp32.ll
new file mode 100644
index 0000000..fa2e379
--- /dev/null
+++ b/test/CodeGen/R600/fp16_to_fp32.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+
+; SI-LABEL: @test_convert_fp16_to_fp32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
+; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+define void @test_convert_fp16_to_fp32(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+  %val = load float addrspace(1)* %in, align 4
+  %cvt = call i16 @llvm.convert.to.fp16(float %val) nounwind readnone
+  store i16 %cvt, i16 addrspace(1)* %out, align 2
+  ret void
+}
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
new file mode 100644
index 0000000..9997cd3
--- /dev/null
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
+
+; SI-LABEL: @test_convert_fp16_to_fp32:
+; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
+; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+  %val = load i16 addrspace(1)* %in, align 2
+  %cvt = call float @llvm.convert.from.fp16(i16 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint_i64.ll b/test/CodeGen/R600/fp_to_sint_i64.ll
new file mode 100644
index 0000000..ec3e198
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint_i64.ll
@@ -0,0 +1,12 @@
+; FIXME: Merge into fp_to_sint.ll when EG/NI supports 64-bit types
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI-LABEL: @fp_to_sint_i64
+; Check that the compiler doesn't crash with a "cannot select" error
+; SI: S_ENDPGM
+define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fptosi float %in to i64
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index 1445a20..f5e5708 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: @fsub_f64
-; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2
+; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
+; SI-LABEL: @fsub_f64:
+; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 6b235ff..0d7d467 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,84 +1,119 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
 
-declare double @llvm.trunc.f64(double) nounwind readnone
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.trunc.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
+declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
 
-; CI-LABEL: @ftrunc_f64:
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
-  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
-  store double %y, double addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_f32:
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
+  %y = call float @llvm.trunc.f32(float %x) nounwind readnone
+  store float %y, float addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ftrunc_v2f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
-  %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
-  store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v2f32:
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
+  %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
+  store <2 x float> %y, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FIXME-CI-LABEL: @ftrunc_v3f64:
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
-; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-;   %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
-;   store <3 x double> %y, <3 x double> addrspace(1)* %out
+; FIXME-FUNC-LABEL: @ftrunc_v3f32:
+; FIXME-EG: TRUNC
+; FIXME-EG: TRUNC
+; FIXME-EG: TRUNC
+; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: V_TRUNC_F32_e32
+; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
+;   %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
+;   store <3 x float> %y, <3 x float> addrspace(1)* %out
 ;   ret void
 ; }
 
-; CI-LABEL: @ftrunc_v4f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
-  %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
-  store <4 x double> %y, <4 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v4f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
+  %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+  store <4 x float> %y, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ftrunc_v8f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
-  %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
-  store <8 x double> %y, <8 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v8f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
+  %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
+  store <8 x float> %y, <8 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ftrunc_v16f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
-  %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
-  store <16 x double> %y, <16 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v16f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
+  %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
+  store <16 x float> %y, <16 x float> addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index 0176061..db64a6f 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -6,7 +6,7 @@
 
 ; XXX: Test on SI once 64-bit adds are supportes.
 
-@float_gv = internal addrspace(2) unnamed_addr constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
+@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
 
 ; FUNC-LABEL: @float
 
@@ -25,7 +25,7 @@ entry:
   ret void
 }
 
-@i32_gv = internal addrspace(2) unnamed_addr constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
+@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
 
 ; FUNC-LABEL: @i32
 
@@ -47,7 +47,7 @@ entry:
 
 %struct.foo = type { float, [5 x i32] }
 
-@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
+@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
 
 ; FUNC-LABEL: @struct_foo_gv_load
 
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 4d1f734..b127b7e 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -3,10 +3,8 @@
 declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 
 ; SI-LABEL: @private_access_f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
@@ -19,14 +17,10 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 }
 
 ; SI-LABEL: @private_access_v2f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
@@ -39,10 +33,8 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
 }
 
 ; SI-LABEL: @private_access_i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
@@ -55,14 +47,10 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 }
 
 ; SI-LABEL: @private_access_v2i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll
new file mode 100644
index 0000000..13bfbab
--- /dev/null
+++ b/test/CodeGen/R600/input-mods.ll
@@ -0,0 +1,26 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
+
+;EG-CHECK-LABEL: @test
+;EG-CHECK: EXP_IEEE *
+;CM-CHECK-LABEL: @test
+;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
+;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
+;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
+;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
+
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = call float @llvm.fabs.f32(float %r0)
+   %r2 = fsub float -0.000000e+00, %r1
+   %r3 = call float @llvm.exp2.f32(float %r2)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+   ret void
+}
+
+declare float @llvm.exp2.f32(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
new file mode 100644
index 0000000..d8be6d4
--- /dev/null
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -0,0 +1,14 @@
+; XFAIL: *
+; REQUIRES: asserts
+; RUN: llc -march=r600 -mcpu=SI < %s
+
+define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191
+  store i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y
+  %0 = load i32* %gep1
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll
new file mode 100644
index 0000000..552cd05
--- /dev/null
+++ b/test/CodeGen/R600/large-constant-initializer.ll
@@ -0,0 +1,19 @@
+; XFAIL: *
+; REQUIRES: asserts
+; RUN: llc -march=r600 -mcpu=SI < %s
+
+@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
+
+define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
+  %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+  %mul12 = mul nsw i32 %val, 7
+  br i1 undef, label %exit, label %bb
+
+bb:
+  %cmp = icmp slt i32 %x, 0
+  br label %exit
+
+exit:
+  ret void
+}
+
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
index af0db0d..d5dc061 100644
--- a/test/CodeGen/R600/lds-output-queue.ll
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: ALU clause
 ; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
 
-@local_mem = internal addrspace(3) unnamed_addr global [2 x i32] [i32 1, i32 2], align 4
+@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4
 
 define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
 entry:
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
index 2185180..9182e25 100644
--- a/test/CodeGen/R600/lds-size.ll
+++ b/test/CodeGen/R600/lds-size.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: @test
 ; CHECK: .long   166120
 ; CHECK-NEXT: .long   1
-@lds = internal addrspace(3) unnamed_addr global i32 zeroinitializer, align 4
+@lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4
 
 define void @test(i32 addrspace(1)* %out, i32 %cond) {
 entry:
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
index 2d8930a..ad9ce25 100644
--- a/test/CodeGen/R600/lit.local.cfg
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'R600' in targets:
+if not 'R600' in config.root.targets:
     config.unsupported = True
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
new file mode 100644
index 0000000..a0a47b7
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
+
+; Legacy name
+declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
+
+; FUNC-LABEL: @s_abs_i32
+; SI: S_SUB_I32
+; SI: S_MAX_I32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %src) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_abs_i32
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @abs_i32_legacy_amdil
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
new file mode 100644
index 0000000..68a5ad0
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
+
+; FUNC-LABEL: @s_brev_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_brev_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
new file mode 100644
index 0000000..d608953
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
+declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
+
+; FUNC-LABEL: @clamp_0_1_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: S_ENDPGM
+
+; EG: MOV_SAT
+define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
new file mode 100644
index 0000000..6facb47
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
+declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
+
+; SI-LABEL: @test_unpack_byte0_to_float:
+; SI: V_CVT_F32_UBYTE0
+define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte1_to_float:
+; SI: V_CVT_F32_UBYTE1
+define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte2_to_float:
+; SI: V_CVT_F32_UBYTE2
+define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_unpack_byte3_to_float:
+; SI: V_CVT_F32_UBYTE3
+define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
+  store float %cvt, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
new file mode 100644
index 0000000..c8c7357
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fixup_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fixup_f64:
+; SI: V_DIV_FIXUP_F64
+define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
new file mode 100644
index 0000000..4f1e827
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fmas_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fmas_f64:
+; SI: V_DIV_FMAS_F64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
new file mode 100644
index 0000000..527c8da
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
+declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
+
+; SI-LABEL @test_div_scale_f32_1:
+; SI: V_DIV_SCALE_F32
+define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_2:
+; SI: V_DIV_SCALE_F32
+define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_1:
+; SI: V_DIV_SCALE_F64
+define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_1:
+; SI: V_DIV_SCALE_F64
+define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
new file mode 100644
index 0000000..72ec1c5
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
+
+; Legacy name
+declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
+
+; FUNC-LABEL: @fract_f32
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fract_f32_legacy_amdil
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
new file mode 100644
index 0000000..51964ee
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
+
+; FUNC-LABEL: @rsq_legacy_f32
+; SI: V_RSQ_LEGACY_F32_e32
+; EG: RECIPSQRT_IEEE
+define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
new file mode 100644
index 0000000..ca5260d
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rcp_f32
+; SI: V_RCP_F32_e32
+define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_f64
+; SI: V_RCP_F64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f32
+; SI: V_RCP_F32_e32
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = fdiv float 1.0, %src
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f64
+; SI: V_RCP_F64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = fdiv double 1.0, %src
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f32
+; SI: V_RSQ_F32_e32
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f64
+; SI: V_RSQ_F64_e32
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
new file mode 100644
index 0000000..100d6ff
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rsq_clamped_f64
+; SI: V_RSQ_CLAMP_F64_e32
+define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
+  store double %rsq_clamped, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
new file mode 100644
index 0000000..683df73
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
+
+; FUNC-LABEL: @rsq_clamped_f32
+; SI: V_RSQ_CLAMP_F32_e32
+; EG: RECIPSQRT_CLAMPED
+define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
+  store float %rsq_clamped, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
new file mode 100644
index 0000000..27cf6b2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
+
+; FUNC-LABEL: @rsq_f32
+; SI: V_RSQ_F32_e32
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
new file mode 100644
index 0000000..1c736d4
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+
+; SI-LABEL: @test_trig_preop_f64:
+; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
+; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @test_trig_preop_f64_imm_segment:
+; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll
new file mode 100644
index 0000000..8402faa
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.gather4.ll
@@ -0,0 +1,508 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @gather4_v2
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl
+;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l
+;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b
+;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_v8
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_v2
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_o
+;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o_v8
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o_v8
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o_v8
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_o
+;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_o
+;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c
+;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_v8
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_v8
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_v8
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl
+;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz
+;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c_o
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_o_v8
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_o
+;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_o
+;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_o
+;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl_o
+;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o_v8
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll
new file mode 100644
index 0000000..a7a17ec
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.getlod.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @getlod
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v2
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v4
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll
index 13bfbab..119d5ef 100644
--- a/test/CodeGen/R600/llvm.exp2.ll
+++ b/test/CodeGen/R600/llvm.exp2.ll
@@ -1,26 +1,79 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-;EG-CHECK-LABEL: @test
-;EG-CHECK: EXP_IEEE *
-;CM-CHECK-LABEL: @test
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
+;FUNC-LABEL: @test
+;EG-CHECK: EXP_IEEE
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_EXP_F32
 
-define void @test(<4 x float> inreg %reg0) #0 {
-   %r0 = extractelement <4 x float> %reg0, i32 0
-   %r1 = call float @llvm.fabs.f32(float %r0)
-   %r2 = fsub float -0.000000e+00, %r1
-   %r3 = call float @llvm.exp2.f32(float %r2)
-   %vec = insertelement <4 x float> undef, float %r3, i32 0
-   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+define void @test(float addrspace(1)* %out, float %in) {
+entry:
+   %0 = call float @llvm.exp2.f32(float %in)
+   store float %0, float addrspace(1)* %out
    ret void
 }
 
-declare float @llvm.exp2.f32(float) readnone
-declare float @llvm.fabs.f32(float) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+;FUNC-LABEL: @testv2
+;EG-CHECK: EXP_IEEE
+;EG-CHECK: EXP_IEEE
+; FIXME: We should be able to merge these packets together on Cayman so we
+; have a maximum of 4 instructions.
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_EXP_F32
+;SI-CHECK: V_EXP_F32
+
+define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
 
-attributes #0 = { "ShaderType"="0" }
+;FUNC-LABEL: @testv4
+;EG-CHECK: EXP_IEEE
+;EG-CHECK: EXP_IEEE
+;EG-CHECK: EXP_IEEE
+;EG-CHECK: EXP_IEEE
+; FIXME: We should be able to merge these packets together on Cayman so we
+; have a maximum of 4 instructions.
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_EXP_F32
+;SI-CHECK: V_EXP_F32
+;SI-CHECK: V_EXP_F32
+;SI-CHECK: V_EXP_F32
+define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.exp2.f32(float) readnone
+declare <2 x float> @llvm.exp2.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll
new file mode 100644
index 0000000..4cba2d4
--- /dev/null
+++ b/test/CodeGen/R600/llvm.log2.ll
@@ -0,0 +1,79 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+
+;FUNC-LABEL: @test
+;EG-CHECK: LOG_IEEE
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_LOG_F32
+
+define void @test(float addrspace(1)* %out, float %in) {
+entry:
+   %0 = call float @llvm.log2.f32(float %in)
+   store float %0, float addrspace(1)* %out
+   ret void
+}
+
+;FUNC-LABEL: @testv2
+;EG-CHECK: LOG_IEEE
+;EG-CHECK: LOG_IEEE
+; FIXME: We should be able to merge these packets together on Cayman so we
+; have a maximum of 4 instructions.
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_LOG_F32
+;SI-CHECK: V_LOG_F32
+
+define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+;FUNC-LABEL: @testv4
+;EG-CHECK: LOG_IEEE
+;EG-CHECK: LOG_IEEE
+;EG-CHECK: LOG_IEEE
+;EG-CHECK: LOG_IEEE
+; FIXME: We should be able to merge these packets together on Cayman so we
+; have a maximum of 4 instructions.
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
+;SI-CHECK: V_LOG_F32
+;SI-CHECK: V_LOG_F32
+;SI-CHECK: V_LOG_F32
+;SI-CHECK: V_LOG_F32
+define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.log2.f32(float) readnone
+declare <2 x float> @llvm.log2.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index a7a909a..3e2884b 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,30 +1,38 @@
 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @f64
+; FUNC-LABEL: @rint_f64
 ; CI: V_RNDNE_F64_e32
-define void @f64(double addrspace(1)* %out, double %in) {
+
+; SI-DAG: V_ADD_F64
+; SI-DAG: V_ADD_F64
+; SI-DAG V_CMP_GT_F64_e64
+; SI: V_CNDMASK_B32
+; SI: V_CNDMASK_B32
+; SI: S_ENDPGM
+define void @rint_f64(double addrspace(1)* %out, double %in) {
 entry:
   %0 = call double @llvm.rint.f64(double %in)
   store double %0, double addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v2f64
+; FUNC-LABEL: @rint_v2f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
 entry:
   %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
   store <2 x double> %0, <2 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v4f64
+; FUNC-LABEL: @rint_v4f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
 entry:
   %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
   store <4 x double> %0, <4 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
index db8352f..209bb43 100644
--- a/test/CodeGen/R600/llvm.rint.ll
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -1,31 +1,31 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @f32
+; FUNC-LABEL: @rint_f32
 ; R600: RNDNE
 
 ; SI: V_RNDNE_F32_e32
-define void @f32(float addrspace(1)* %out, float %in) {
+define void @rint_f32(float addrspace(1)* %out, float %in) {
 entry:
-  %0 = call float @llvm.rint.f32(float %in)
+  %0 = call float @llvm.rint.f32(float %in) #0
   store float %0, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v2f32
+; FUNC-LABEL: @rint_v2f32
 ; R600: RNDNE
 ; R600: RNDNE
 
 ; SI: V_RNDNE_F32_e32
 ; SI: V_RNDNE_F32_e32
-define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
-  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in)
+  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v4f32
+; FUNC-LABEL: @rint_v4f32
 ; R600: RNDNE
 ; R600: RNDNE
 ; R600: RNDNE
@@ -35,15 +35,27 @@ entry:
 ; SI: V_RNDNE_F32_e32
 ; SI: V_RNDNE_F32_e32
 ; SI: V_RNDNE_F32_e32
-define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
-  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in)
+  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
   store <4 x float> %0, <4 x float> addrspace(1)* %out
   ret void
 }
 
+; FUNC-LABEL: @legacy_amdil_round_nearest_f32
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
+define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.AMDIL.round.nearest.f32(float) #0
 declare float @llvm.rint.f32(float) #0
 declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
 declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
 
-attributes #0 = { nounwind readonly }
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index 1486c4d..a57df5c 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -696,8 +696,7 @@ entry:
 ; R600-CHECK: LDS_READ_RET
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B64
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
 entry:
   %0 = load <2 x float> addrspace(3)* %in
diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll
new file mode 100644
index 0000000..5a44951
--- /dev/null
+++ b/test/CodeGen/R600/local-atomics.ll
@@ -0,0 +1,254 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
+; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
+; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
+; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset:
+; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; XXX - Is it really necessary to load 4 into VGPR?
+; FUNC-LABEL: @lds_atomic_add_ret_i32:
+; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
+; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
+; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_add_ret_i32_offset:
+; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_inc_ret_i32:
+; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
+; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
+; SI: S_ENDPGM
+define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset:
+; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
+; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_sub_ret_i32:
+; SI: DS_SUB_RTN_U32
+; SI: S_ENDPGM
+define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset:
+; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_dec_ret_i32:
+; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
+; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: DS_DEC_RTN_U32  v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
+; SI: S_ENDPGM
+define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset:
+; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
+; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_and_ret_i32:
+; SI: DS_AND_RTN_B32
+; SI: S_ENDPGM
+define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_and_ret_i32_offset:
+; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_or_ret_i32:
+; SI: DS_OR_RTN_B32
+; SI: S_ENDPGM
+define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_or_ret_i32_offset:
+; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xor_ret_i32:
+; SI: DS_XOR_RTN_B32
+; SI: S_ENDPGM
+define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset:
+; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
+;   store i32 %result, i32 addrspace(1)* %out, align 4
+;   ret void
+; }
+
+; FUNC-LABEL: @lds_atomic_min_ret_i32:
+; SI: DS_MIN_RTN_I32
+; SI: S_ENDPGM
+define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_min_ret_i32_offset:
+; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_max_ret_i32:
+; SI: DS_MAX_RTN_I32
+; SI: S_ENDPGM
+define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_max_ret_i32_offset:
+; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umin_ret_i32:
+; SI: DS_MIN_RTN_U32
+; SI: S_ENDPGM
+define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset:
+; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umax_ret_i32:
+; SI: DS_MAX_RTN_U32
+; SI: S_ENDPGM
+define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset:
+; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: S_ENDPGM
+define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll
new file mode 100644
index 0000000..849b033
--- /dev/null
+++ b/test/CodeGen/R600/local-atomics64.ll
@@ -0,0 +1,251 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; FUNC-LABEL: @lds_atomic_xchg_ret_i64:
+; SI: DS_WRXCHG_RTN_B64
+; SI: S_ENDPGM
+define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xchg_ret_i64_offset:
+; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_add_ret_i64:
+; SI: DS_ADD_RTN_U64
+; SI: S_ENDPGM
+define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_add_ret_i64_offset:
+; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
+; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_inc_ret_i64:
+; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_inc_ret_i64_offset:
+; SI: DS_INC_RTN_U64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_sub_ret_i64:
+; SI: DS_SUB_RTN_U64
+; SI: S_ENDPGM
+define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_sub_ret_i64_offset:
+; SI: DS_SUB_RTN_U64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_dec_ret_i64:
+; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_dec_ret_i64_offset:
+; SI: DS_DEC_RTN_U64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_and_ret_i64:
+; SI: DS_AND_RTN_B64
+; SI: S_ENDPGM
+define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_and_ret_i64_offset:
+; SI: DS_AND_RTN_B64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_or_ret_i64:
+; SI: DS_OR_RTN_B64
+; SI: S_ENDPGM
+define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_or_ret_i64_offset:
+; SI: DS_OR_RTN_B64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xor_ret_i64:
+; SI: DS_XOR_RTN_B64
+; SI: S_ENDPGM
+define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_xor_ret_i64_offset:
+; SI: DS_XOR_RTN_B64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: @lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
+;   store i64 %result, i64 addrspace(1)* %out, align 8
+;   ret void
+; }
+
+; FUNC-LABEL: @lds_atomic_min_ret_i64:
+; SI: DS_MIN_RTN_I64
+; SI: S_ENDPGM
+define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_min_ret_i64_offset:
+; SI: DS_MIN_RTN_I64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_max_ret_i64:
+; SI: DS_MAX_RTN_I64
+; SI: S_ENDPGM
+define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_max_ret_i64_offset:
+; SI: DS_MAX_RTN_I64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umin_ret_i64:
+; SI: DS_MIN_RTN_U64
+; SI: S_ENDPGM
+define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umin_ret_i64_offset:
+; SI: DS_MIN_RTN_U64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umax_ret_i64:
+; SI: DS_MAX_RTN_U64
+; SI: S_ENDPGM
+define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @lds_atomic_umax_ret_i64_offset:
+; SI: DS_MAX_RTN_U64 {{.*}} 0x20
+; SI: S_ENDPGM
+define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 1e42285..e29e4cc 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-@local_memory_two_objects.local_mem0 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
-@local_memory_two_objects.local_mem1 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
+@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
 
 ; EG-CHECK: @local_memory_two_objects
 
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 6ebe41d..51af484 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 ; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
 
-@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4
+@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] zeroinitializer, align 4
 
 ; EG-CHECK-LABEL: @local_memory
 ; SI-CHECK-LABEL: @local_memory
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index 6ed754c..d231e92 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; mul24 and mad24 are affected
 
-;FUNC-LABEL: @test2
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: @test2
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -19,16 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;FUNC-LABEL: @test4
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: @test4
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,11 +39,11 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; SI-CHECK-LABEL: @trunc_i64_mul_to_i32
-; SI-CHECK: S_LOAD_DWORD
-; SI-CHECK: S_LOAD_DWORD
-; SI-CHECK: V_MUL_LO_I32
-; SI-CHECK: BUFFER_STORE_DWORD
+; FUNC-LABEL: @trunc_i64_mul_to_i32
+; SI: S_LOAD_DWORD
+; SI: S_LOAD_DWORD
+; SI: V_MUL_LO_I32
+; SI: BUFFER_STORE_DWORD
 define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
   %mul = mul i64 %b, %a
   %trunc = trunc i64 %mul to i32
diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
new file mode 100644
index 0000000..ab82e7e
--- /dev/null
+++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=r600 -mcpu=SI -o /dev/null %s
+; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s
+
+@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
+
+; FUNC-LABEL: @load_extern_const_init
+define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
+  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
+
+; FUNC-LABEL: @load_undef_const_init
+define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
+  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 9878366..91a70b7 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -116,9 +116,9 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
 }
 
 ; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
-; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
 ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 ; SI: BUFFER_STORE_DWORD [[VRESULT]],
 define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 4afaf68..8a269e0 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -7,6 +7,12 @@
 ; CHECK: AND_INT
 ; CHECK-NEXT: AND_INT
 ; CHECK-NEXT: OR_INT
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
+
 define void @_Z9chk1D_512v() #0 {
 entry:
   %a0 = alloca i32, align 4
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
index b0db7cd..feca688 100644
--- a/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -3,6 +3,11 @@
 ;
 ; CFG flattening should use parallel-or to generate branch conditions and
 ; then merge if-regions with the same bodies.
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
 ;
 ; CHECK: OR_INT
 ; CHECK-NEXT: OR_INT
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
new file mode 100644
index 0000000..def4f9d
--- /dev/null
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s
+
+; This works because promote allocas pass replaces these with LDS atomics.
+
+; Private atomics have no real use, but at least shouldn't crash on it.
+define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+  %tmp = alloca [2 x i32]
+  %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+  %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+  store i32 0, i32* %tmp1
+  store i32 1, i32* %tmp2
+  %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+  %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
+  store i32 %tmp4, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+  %tmp = alloca [2 x i32]
+  %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+  %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+  store i32 0, i32* %tmp1
+  store i32 1, i32* %tmp2
+  %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+  %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
+  %val = extractvalue { i32, i1 } %tmp4, 0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
new file mode 100644
index 0000000..4086085
--- /dev/null
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -0,0 +1,20 @@
+; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
+
+; Make sure promote alloca pass doesn't crash
+
+; CHECK: unsupported call
+
+declare i32 @foo(i32*) nounwind
+
+define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+  %tmp = alloca [2 x i32]
+  %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+  %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+  store i32 0, i32* %tmp1
+  store i32 1, i32* %tmp2
+  %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+  %val = call i32 @foo(i32* %tmp3) nounwind
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index d3453f2..89122be 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,24 +1,19 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-; This test checks that uses and defs of the AR register happen in the same
-; instruction clause.
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 ; FUNC-LABEL: @mova_same_clause
 
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_READ
+; R600-CHECK: LDS_READ
+
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -114,12 +109,8 @@ for.end:
 
 ; FUNC-LABEL: @short_array
 
-; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal
-; R600-CHECK: 65536
-; R600-CHECK: *
 ; R600-CHECK: MOVA_INT
 
-; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000
 ; SI-CHECK: V_MOVRELS_B32_e32
 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
@@ -137,10 +128,7 @@ entry:
 
 ; FUNC-LABEL: @char_array
 
-; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal
-; R600-CHECK: 256
-; R600-CHECK: *
-; R600-CHECK-NEXT: MOVA_INT
+; R600-CHECK: MOVA_INT
 
 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
 ; SI-CHECK: V_MOVRELS_B32_e32
@@ -185,7 +173,9 @@ entry:
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
 ; FUNC-LABEL: @no_overlap
-; R600-CHECK: MOV {{\** *}}T3.X
+; R600_CHECK: MOV
+; R600_CHECK: [[CHAN:[XYZW]]]+
+; R600-CHECK-NOT: [[CHAN]]+
 ; SI-CHECK: V_MOV_B32_e32 v3
 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -211,6 +201,85 @@ entry:
   ret void
 }
 
+define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %alloca = alloca [2 x [2 x i8]]
+  %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
+  %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
+  store i8 0, i8* %gep0
+  store i8 1, i8* %gep1
+  %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
+  %load = load i8* %gep2
+  %sext = sext i8 %load to i32
+  store i32 %sext, i32 addrspace(1)* %out
+  ret void
+}
 
+define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %alloca = alloca [2 x [2 x i32]]
+  %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
+  %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
+  store i32 0, i32* %gep0
+  store i32 1, i32* %gep1
+  %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
+  %load = load i32* %gep2
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
+entry:
+  %alloca = alloca [2 x [2 x i64]]
+  %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
+  %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
+  store i64 0, i64* %gep0
+  store i64 1, i64* %gep1
+  %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
+  %load = load i64* %gep2
+  store i64 %load, i64 addrspace(1)* %out
+  ret void
+}
+
+%struct.pair32 = type { i32, i32 }
+
+define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %alloca = alloca [2 x [2 x %struct.pair32]]
+  %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
+  %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
+  store i32 0, i32* %gep0
+  store i32 1, i32* %gep1
+  %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
+  %load = load i32* %gep2
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %alloca = alloca [2 x %struct.pair32]
+  %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
+  %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
+  store i32 0, i32* %gep0
+  store i32 1, i32* %gep1
+  %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
+  %load = load i32* %gep2
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+entry:
+  %tmp = alloca [2 x i32]
+  %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
+  %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+  store i32 0, i32* %tmp1
+  store i32 1, i32* %tmp2
+  %cmp = icmp eq i32 %in, 0
+  %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
+  %load = load i32* %sel
+  store i32 %load, i32 addrspace(1)* %out
+  ret void
+}
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index f322bc7..55eb56d 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -103,7 +103,7 @@ main_body:
   %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
   %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
   %97 = call float @fabs(float %96)
-  %98 = call float @llvm.AMDGPU.rsq(float %97)
+  %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
   %99 = fmul float %4, %98
   %100 = fmul float %5, %98
   %101 = fmul float %6, %98
@@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 declare float @fabs(float) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.clamp.(float, float, float) #1
diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll
new file mode 100644
index 0000000..be2fcc6
--- /dev/null
+++ b/test/CodeGen/R600/reorder-stores.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @no_reorder_v2f64_global_load_store
+; SI: BUFFER_LOAD_DWORDX2
+; SI: BUFFER_LOAD_DWORDX2
+; SI: BUFFER_LOAD_DWORDX2
+; SI: BUFFER_LOAD_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
+  %tmp1 = load <2 x double> addrspace(1)* %x, align 16
+  %tmp4 = load <2 x double> addrspace(1)* %y, align 16
+  store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
+  store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
+  ret void
+}
+
+; SI-LABEL: @no_reorder_scalarized_v2f64_local_load_store
+; SI: DS_READ_B64
+; SI: DS_READ_B64
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: S_ENDPGM
+define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
+  %tmp1 = load <2 x double> addrspace(3)* %x, align 16
+  %tmp4 = load <2 x double> addrspace(3)* %y, align 16
+  store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
+  store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
+  ret void
+}
+
+; SI-LABEL: @no_reorder_split_v8i32_global_load_store
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+; SI: BUFFER_LOAD_DWORD
+
+
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
+  %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
+  %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
+  store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
+  store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
+  ret void
+}
+
+; SI-LABEL: @no_reorder_extload_64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
+; SI: DS_WRITE_B64
+; SI-NOT: DS_READ
+; SI: DS_WRITE_B64
+; SI: S_ENDPGM
+define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
+  %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
+  %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
+  %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
+  %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
+  %tmp9 = add <2 x i64> %tmp4ext, <i64 1, i64 1>
+  %trunctmp9 = trunc <2 x i64> %tmp9 to <2 x i32>
+  %trunctmp7 = trunc <2 x i64> %tmp7 to <2 x i32>
+  store <2 x i32> %trunctmp9, <2 x i32> addrspace(3)* %x, align 8
+  store <2 x i32> %trunctmp7, <2 x i32> addrspace(3)* %y, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
new file mode 100644
index 0000000..bda0b66
--- /dev/null
+++ b/test/CodeGen/R600/rotl.i64.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @s_rotl_i64:
+; SI: S_LSHL_B64
+; SI: S_SUB_I32
+; SI: S_LSHR_B64
+; SI: S_OR_B64
+define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
+entry:
+  %0 = shl i64 %x, %y
+  %1 = sub i64 64, %y
+  %2 = lshr i64 %x, %1
+  %3 = or i64 %0, %2
+  store i64 %3, i64 addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @v_rotl_i64:
+; SI: V_LSHL_B64
+; SI: V_SUB_I32
+; SI: V_LSHR_B64
+; SI: V_OR_B32
+; SI: V_OR_B32
+define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
+entry:
+  %x = load i64 addrspace(1)* %xptr, align 8
+  %y = load i64 addrspace(1)* %yptr, align 8
+  %tmp0 = shl i64 %x, %y
+  %tmp1 = sub i64 64, %y
+  %tmp2 = lshr i64 %x, %tmp1
+  %tmp3 = or i64 %tmp0, %tmp2
+  store i64 %tmp3, i64 addrspace(1)* %in, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll
new file mode 100644
index 0000000..83f657f
--- /dev/null
+++ b/test/CodeGen/R600/rotl.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @rotl_i32:
+; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
+; R600-NEXT: 32
+; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+
+; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+entry:
+  %0 = shl i32 %x, %y
+  %1 = sub i32 32, %y
+  %2 = lshr i32 %x, %1
+  %3 = or i32 %0, %2
+  store i32 %3, i32 addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @rotl_v2i32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+entry:
+  %0 = shl <2 x i32> %x, %y
+  %1 = sub <2 x i32> <i32 32, i32 32>, %y
+  %2 = lshr <2 x i32> %x, %1
+  %3 = or <2 x i32> %0, %2
+  store <2 x i32> %3, <2 x i32> addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @rotl_v4i32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+; SI: S_SUB_I32
+; SI: V_ALIGNBIT_B32
+define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+entry:
+  %0 = shl <4 x i32> %x, %y
+  %1 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
+  %2 = lshr <4 x i32> %x, %1
+  %3 = or <4 x i32> %0, %2
+  store <4 x i32> %3, <4 x i32> addrspace(1)* %in
+  ret void
+}
diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
new file mode 100644
index 0000000..c264751
--- /dev/null
+++ b/test/CodeGen/R600/rotr.i64.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @s_rotr_i64
+; SI: S_LSHR_B64
+; SI: S_SUB_I32
+; SI: S_LSHL_B64
+; SI: S_OR_B64
+define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
+entry:
+  %tmp0 = sub i64 64, %y
+  %tmp1 = shl i64 %x, %tmp0
+  %tmp2 = lshr i64 %x, %y
+  %tmp3 = or i64 %tmp1, %tmp2
+  store i64 %tmp3, i64 addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @v_rotr_i64
+; SI: V_LSHR_B64
+; SI: V_SUB_I32
+; SI: V_LSHL_B64
+; SI: V_OR_B32
+; SI: V_OR_B32
+define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
+entry:
+  %x = load i64 addrspace(1)* %xptr, align 8
+  %y = load i64 addrspace(1)* %yptr, align 8
+  %tmp0 = sub i64 64, %y
+  %tmp1 = shl i64 %x, %tmp0
+  %tmp2 = lshr i64 %x, %y
+  %tmp3 = or i64 %tmp1, %tmp2
+  store i64 %tmp3, i64 addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @s_rotr_v2i64
+define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
+entry:
+  %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
+  %tmp1 = shl <2 x i64> %x, %tmp0
+  %tmp2 = lshr <2 x i64> %x, %y
+  %tmp3 = or <2 x i64> %tmp1, %tmp2
+  store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @v_rotr_v2i64
+define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
+entry:
+  %x = load <2 x i64> addrspace(1)* %xptr, align 8
+  %y = load <2 x i64> addrspace(1)* %yptr, align 8
+  %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
+  %tmp1 = shl <2 x i64> %x, %tmp0
+  %tmp2 = lshr <2 x i64> %x, %y
+  %tmp3 = or <2 x i64> %tmp1, %tmp2
+  store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in
+  ret void
+}
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll
index edf7aee..a5a4da4 100644
--- a/test/CodeGen/R600/rotr.ll
+++ b/test/CodeGen/R600/rotr.ll
@@ -1,37 +1,52 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; R600-CHECK-LABEL: @rotr:
-; R600-CHECK: BIT_ALIGN_INT
+; FUNC-LABEL: @rotr_i32:
+; R600: BIT_ALIGN_INT
 
-; SI-CHECK-LABEL: @rotr:
-; SI-CHECK: V_ALIGNBIT_B32
-define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+; SI: V_ALIGNBIT_B32
+define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
-  %0 = sub i32 32, %y
-  %1 = shl i32 %x, %0
-  %2 = lshr i32 %x, %y
-  %3 = or i32 %1, %2
-  store i32 %3, i32 addrspace(1)* %in
+  %tmp0 = sub i32 32, %y
+  %tmp1 = shl i32 %x, %tmp0
+  %tmp2 = lshr i32 %x, %y
+  %tmp3 = or i32 %tmp1, %tmp2
+  store i32 %tmp3, i32 addrspace(1)* %in
   ret void
 }
 
-; R600-CHECK-LABEL: @rotl:
-; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
-; R600-CHECK-NEXT: 32
-; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+; FUNC-LABEL: @rotr_v2i32:
+; R600: BIT_ALIGN_INT
+; R600: BIT_ALIGN_INT
 
+; SI: V_ALIGNBIT_B32
+; SI: V_ALIGNBIT_B32
+define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+entry:
+  %tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
+  %tmp1 = shl <2 x i32> %x, %tmp0
+  %tmp2 = lshr <2 x i32> %x, %y
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  store <2 x i32> %tmp3, <2 x i32> addrspace(1)* %in
+  ret void
+}
+
+; FUNC-LABEL: @rotr_v4i32:
+; R600: BIT_ALIGN_INT
+; R600: BIT_ALIGN_INT
+; R600: BIT_ALIGN_INT
+; R600: BIT_ALIGN_INT
 
-; SI-CHECK-LABEL: @rotl:
-; SI-CHECK: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
-; SI-CHECK: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
-define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+; SI: V_ALIGNBIT_B32
+; SI: V_ALIGNBIT_B32
+; SI: V_ALIGNBIT_B32
+; SI: V_ALIGNBIT_B32
+define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
 entry:
-  %0 = shl i32 %x, %y
-  %1 = sub i32 32, %y
-  %2 = lshr i32 %x, %1
-  %3 = or i32 %0, %2
-  store i32 %3, i32 addrspace(1)* %in
+  %tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
+  %tmp1 = shl <4 x i32> %x, %tmp0
+  %tmp2 = lshr <4 x i32> %x, %y
+  %tmp3 = or <4 x i32> %tmp1, %tmp2
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %in
   ret void
 }
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
new file mode 100644
index 0000000..87c0570
--- /dev/null
+++ b/test/CodeGen/R600/rsq.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; SI-LABEL: @rsq_f32
+; SI: V_RSQ_F32_e32
+; SI: S_ENDPGM
+define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+  %val = load float addrspace(1)* %in, align 4
+  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv float 1.0, %sqrt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @rsq_f64
+; SI: V_RSQ_F64_e32
+; SI: S_ENDPGM
+define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+  %val = load double addrspace(1)* %in, align 4
+  %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
+  %div = fdiv double 1.0, %sqrt
+  store double %div, double addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll
new file mode 100644
index 0000000..c80480e
--- /dev/null
+++ b/test/CodeGen/R600/saddo.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+
+; FUNC-LABEL: @saddo_i64_zext
+define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %sadd, 0
+  %carry = extractvalue { i64, i1 } %sadd, 1
+  %ext = zext i1 %carry to i64
+  %add2 = add i64 %val, %ext
+  store i64 %add2, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_saddo_i32
+define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+  %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %sadd, 0
+  %carry = extractvalue { i32, i1 } %sadd, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_saddo_i32
+define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %sadd, 0
+  %carry = extractvalue { i32, i1 } %sadd, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @s_saddo_i64
+define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+  %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %sadd, 0
+  %carry = extractvalue { i64, i1 } %sadd, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_saddo_i64
+; SI: V_ADD_I32
+; SI: V_ADDC_U32
+define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %a = load i64 addrspace(1)* %aptr, align 4
+  %b = load i64 addrspace(1)* %bptr, align 4
+  %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %sadd, 0
+  %carry = extractvalue { i64, i1 } %sadd, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll
new file mode 100644
index 0000000..bcccb06
--- /dev/null
+++ b/test/CodeGen/R600/scalar_to_vector.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: @scalar_to_vector_v2i32
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: S_ENDPGM
+define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tmp1 = load i32 addrspace(1)* %in, align 4
+  %bc = bitcast i32 %tmp1 to <2 x i16>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @scalar_to_vector_v2f32
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: S_ENDPGM
+define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+  %tmp1 = load float addrspace(1)* %in, align 4
+  %bc = bitcast float %tmp1 to <2 x i16>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
+  ret void
+}
+
+; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
+; to produce one, but for some reason never made it to selection.
+
+
+; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+;   %tmp1 = load i32 addrspace(1)* %in, align 4
+;   %bc = bitcast i32 %tmp1 to <4 x i8>
+
+;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+;   store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
+;   ret void
+; }
+
+; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
+;   %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
+;   %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
+;   %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
+;   %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
+;   store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
+;   ret void
+; }
+
+; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
+;   %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
+;   %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
+;   %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
+;   store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
+;   ret void
+; }
+
+; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
+;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
+;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
+;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
+;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
+;   ret void
+; }
+
+; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind {
+;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
+;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
+;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
+;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
+;   ret void
+; }
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index 3dd10c8..e922d5c 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 ; The code generated by sdiv is long and complex and may frequently change.
 ; The goal of this test is to make sure the ISel doesn't fail.
@@ -9,9 +10,9 @@
 ; This was fixed by adding an additional pattern in R600Instructions.td to
 ; match this pattern with a CNDGE_INT.
 
-; CHECK: CF_END
-
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+; FUNC-LABEL: @sdiv_i32
+; EG: CF_END
+define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
   %num = load i32 addrspace(1) * %in
   %den = load i32 addrspace(1) * %den_ptr
@@ -19,3 +20,84 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: @sdiv_i32_4
+define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %num = load i32 addrspace(1) * %in
+  %result = sdiv i32 %num, 4
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+; Multiply by a weird constant to make sure setIntDivIsCheap is
+; working.
+
+; FUNC-LABEL: @slow_sdiv_i32_3435
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_MOV_B32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
+; SI: V_MUL_HI_I32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
+; SI: V_ADD_I32
+; SI: V_LSHRREV_B32
+; SI: V_ASHRREV_I32
+; SI: V_ADD_I32
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %num = load i32 addrspace(1) * %in
+  %result = sdiv i32 %num, 3435
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %num = load <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %result = sdiv <2 x i32> %num, %den
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %num = load <2 x i32> addrspace(1) * %in
+  %result = sdiv <2 x i32> %num, <i32 4, i32 4>
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %num = load <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %result = sdiv <4 x i32> %num, %den
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %num = load <4 x i32> addrspace(1) * %in
+  %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Tests for 64-bit divide bypass.
+; define void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+;   %result = sdiv i64 %a, %b
+;   store i64 %result, i64 addrspace(1)* %out, align 8
+;   ret void
+; }
+
+; define void @test_get_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+;   %result = srem i64 %a, %b
+;   store i64 %result, i64 addrspace(1)* %out, align 8
+;   ret void
+; }
+
+; define void @test_get_quotient_and_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+;   %resultdiv = sdiv i64 %a, %b
+;   %resultrem = srem i64 %a, %b
+;   %result = add i64 %resultdiv, %resultrem
+;   store i64 %result, i64 addrspace(1)* %out, align 8
+;   ret void
+; }
diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/R600/setcc-equivalent.ll
index 4c50aa3..f796748 100644
--- a/test/CodeGen/R600/setcc-equivalent.ll
+++ b/test/CodeGen/R600/setcc-equivalent.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+; XFAIL: *
 
 ; EG-LABEL: @and_setcc_setcc_i32
 ; EG: AND_INT
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c581d86..c7d5bf9 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -70,7 +70,7 @@ main_body:
   %55 = fadd float %54, %53
   %56 = fmul float %45, %45
   %57 = fadd float %55, %56
-  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
   %59 = fmul float %43, %58
   %60 = fmul float %44, %58
   %61 = fmul float %45, %58
@@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #3
+declare float @llvm.AMDGPU.rsq.f32(float) #3
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.exp.(float) #3
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 4a6aab4..43fab2a 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -39,5 +39,118 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-; XXX: Add SI test for i64 shl once i64 stores and i64 function arguments are
-; supported.
+;EG-CHECK: @shl_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @shl_i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = shl i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v2i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = shl <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v4i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = shl <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index b34a757..53a0965 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -203,7 +203,7 @@ main_body:
   %198 = fadd float %197, %196
   %199 = fmul float %97, %97
   %200 = fadd float %198, %199
-  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
   %202 = fmul float %95, %201
   %203 = fmul float %96, %201
   %204 = fmul float %202, %29
@@ -384,7 +384,7 @@ IF67:                                             ; preds = %LOOP65
   %355 = fadd float %354, %353
   %356 = fmul float %352, %352
   %357 = fadd float %355, %356
-  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
   %359 = fmul float %350, %358
   %360 = fmul float %351, %358
   %361 = fmul float %352, %358
@@ -512,7 +512,7 @@ IF67:                                             ; preds = %LOOP65
   %483 = fadd float %482, %481
   %484 = fmul float %109, %109
   %485 = fadd float %483, %484
-  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
   %487 = fmul float %107, %486
   %488 = fmul float %108, %486
   %489 = fmul float %109, %486
@@ -541,7 +541,7 @@ IF67:                                             ; preds = %LOOP65
   %512 = fadd float %511, %510
   %513 = fmul float %97, %97
   %514 = fadd float %512, %513
-  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
   %516 = fmul float %95, %515
   %517 = fmul float %96, %515
   %518 = fmul float %97, %515
@@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2
 declare float @ceil(float) #3
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #2
+declare float @llvm.AMDGPU.rsq.f32(float) #2
 
 ; Function Attrs: nounwind readnone
 declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -887,7 +887,7 @@ main_body:
   %212 = fadd float %211, %210
   %213 = fmul float %209, %209
   %214 = fadd float %212, %213
-  %215 = call float @llvm.AMDGPU.rsq(float %214)
+  %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
   %216 = fmul float %205, %215
   %217 = fmul float %207, %215
   %218 = fmul float %209, %215
@@ -1123,7 +1123,7 @@ IF189:                                            ; preds = %LOOP
   %434 = fsub float -0.000000e+00, %433
   %435 = fadd float 0x3FF00068E0000000, %434
   %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
-  %437 = call float @llvm.AMDGPU.rsq(float %436)
+  %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
   %438 = fmul float %437, %436
   %439 = fsub float -0.000000e+00, %436
   %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1147,7 +1147,7 @@ IF189:                                            ; preds = %LOOP
   %458 = fadd float %457, %456
   %459 = fmul float %455, %455
   %460 = fadd float %458, %459
-  %461 = call float @llvm.AMDGPU.rsq(float %460)
+  %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
   %462 = fmul float %451, %461
   %463 = fmul float %453, %461
   %464 = fmul float %455, %461
@@ -1257,7 +1257,7 @@ ENDIF197:                                         ; preds = %IF189, %IF198
   %559 = fadd float %558, %557
   %560 = fmul float %556, %556
   %561 = fadd float %559, %560
-  %562 = call float @llvm.AMDGPU.rsq(float %561)
+  %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
   %563 = fmul float %562, %561
   %564 = fsub float -0.000000e+00, %561
   %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index 1212cee..e3bee50 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -1,12 +1,61 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; SI-LABEL: @s_sext_i1_to_i32:
+; SI: V_CNDMASK_B32_e64
+; SI: S_ENDPGM
+define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp eq i32 %a, %b
+  %sext = sext i1 %cmp to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
 
-; CHECK: V_ASHR
-define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c)  {
+; SI-LABEL: @test:
+; SI: V_ASHR
+; SI: S_ENDPG
+define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
 entry:
-  %0 = mul i32 %a, %b
-  %1 = add i32 %0, %c
-  %2 = sext i32 %1 to i64
-  store i64 %2, i64 addrspace(1)* %out
+  %mul = mul i32 %a, %b
+  %add = add i32 %mul, %c
+  %sext = sext i32 %add to i64
+  store i64 %sext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @s_sext_i1_to_i64:
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: S_ENDPGM
+define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp eq i32 %a, %b
+  %sext = sext i1 %cmp to i64
+  store i64 %sext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @s_sext_i32_to_i64:
+; SI: S_ASHR_I32
+; SI: S_ENDPGM
+define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
+  %sext = sext i32 %a to i64
+  store i64 %sext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @v_sext_i32_to_i64:
+; SI: V_ASHR
+; SI: S_ENDPGM
+define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %val = load i32 addrspace(1)* %in, align 4
+  %sext = sext i32 %val to i64
+  store i64 %sext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @s_sext_i16_to_i64:
+; SI: S_ENDPGM
+define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
+  %sext = sext i16 %a to i64
+  store i64 %sext, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index d9f60ea..dee4326 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
+; XFAIL: *
+
 ; 64-bit select was originally lowered with a build_pair, and this
 ; could be simplified to 1 cndmask instead of 2, but that broken when
 ; it started being implemented with a v2i32 build_vector and
@@ -12,9 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
   ret void
 }
 
+; FIXME: Fix truncating store for local memory
 ; SI-LABEL: @trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
+; SI: DS_READ_B32
+; SI-NOT: DS_READ_B64
 ; SI: S_ENDPGM
 define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
   %idx = add i32 %a, %b
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index 9241799..b27dfda 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -29,3 +29,25 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: @sint_to_fp_i1_f32:
+; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00, [[CMP]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %fp = uitofp i1 %cmp to float
+  store float %fp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sint_to_fp_i1_f32_load:
+; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
+  %fp = sitofp i1 %in to float
+  store float %fp, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll
index 5abc9d1..12b8cf5 100644
--- a/test/CodeGen/R600/sint_to_fp64.ll
+++ b/test/CodeGen/R600/sint_to_fp64.ll
@@ -1,9 +1,35 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; CHECK: @sint_to_fp64
-; CHECK: V_CVT_F64_I32_e32
+; SI: @sint_to_fp64
+; SI: V_CVT_F64_I32_e32
 define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) {
   %result = sitofp i32 %in to double
   store double %result, double addrspace(1)* %out
   ret void
 }
+
+; SI-LABEL: @sint_to_fp_i1_f64:
+; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
+; we should be able to fold the SGPRs into the V_CNDMASK instructions.
+; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %fp = sitofp i1 %cmp to double
+  store double %fp, double addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @sint_to_fp_i1_f64_load:
+; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1
+; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI: S_ENDPGM
+define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+  %fp = sitofp i1 %in to double
+  store double %fp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index fe9df10..9eb3dc5 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -52,3 +52,133 @@ entry:
   ret void
 }
 
+;EG-CHECK-LABEL: @ashr_i64_2
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @ashr_i64_2
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = ashr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v2i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = ashr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v4i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = ashr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll
new file mode 100644
index 0000000..65e3395
--- /dev/null
+++ b/test/CodeGen/R600/srem.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=r600 -mcpu=redwood < %s
+
+define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in
+  %den = load i32 addrspace(1) * %den_ptr
+  %result = srem i32 %num, %den
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %num = load i32 addrspace(1) * %in
+  %result = srem i32 %num, 4
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %num = load <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %result = srem <2 x i32> %num, %den
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %num = load <2 x i32> addrspace(1) * %in
+  %result = srem <2 x i32> %num, <i32 4, i32 4>
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %num = load <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %result = srem <4 x i32> %num, %den
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %num = load <4 x i32> addrspace(1) * %in
+  %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 7637355..44ad73f 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -39,3 +39,129 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @lshr_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @lshr_i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = lshr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @lshr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v2i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = lshr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+
+;EG-CHECK: @lshr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v4i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = lshr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll
new file mode 100644
index 0000000..b330276
--- /dev/null
+++ b/test/CodeGen/R600/ssubo.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+
+; FUNC-LABEL: @ssubo_i64_zext
+define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %ssub, 0
+  %carry = extractvalue { i64, i1 } %ssub, 1
+  %ext = zext i1 %carry to i64
+  %add2 = add i64 %val, %ext
+  store i64 %add2, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_ssubo_i32
+define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+  %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %ssub, 0
+  %carry = extractvalue { i32, i1 } %ssub, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_ssubo_i32
+define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %ssub, 0
+  %carry = extractvalue { i32, i1 } %ssub, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @s_ssubo_i64
+; SI: S_SUB_I32
+; SI: S_SUBB_U32
+define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+  %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %ssub, 0
+  %carry = extractvalue { i64, i1 } %ssub, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_ssubo_i64
+; SI: V_SUB_I32_e32
+; SI: V_SUBB_U32_e32
+define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %a = load i64 addrspace(1)* %aptr, align 4
+  %b = load i64 addrspace(1)* %bptr, align 4
+  %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %ssub, 0
+  %carry = extractvalue { i64, i1 } %ssub, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index c0c8ccc..dd27533 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -263,8 +263,7 @@ entry:
 ; CM-CHECK: LDS_WRITE
 ; CM-CHECK: LDS_WRITE
 ; SI-CHECK-LABEL: @store_local_v2i32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B64
 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
 entry:
   store <2 x i32> %in, <2 x i32> addrspace(3)* %out
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index e321ed6..8e64148 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,5 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() readnone
 
 ;FUNC-LABEL: @test2
 ;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -37,23 +39,37 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;FUNC_LABEL: @test5
+; FUNC-LABEL: @s_sub_i64:
+; SI: S_SUB_I32
+; SI: S_SUBB_U32
 
-;EG-DAG: SETGE_UINT
-;EG-DAG: CNDE_INT
-;EG-DAG: SUB_INT
-;EG-DAG: SUB_INT
-;EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: CNDE_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
+  %result = sub i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out, align 8
+  ret void
+}
 
-;SI: S_XOR_B64
-;SI-DAG: S_ADD_I32
-;SI-DAG: S_ADDC_U32
-;SI-DAG: S_ADD_I32
-;SI-DAG: S_ADDC_U32
+; FUNC-LABEL: @v_sub_i64:
+; SI: V_SUB_I32_e32
+; SI: V_SUBB_U32_e32
 
-define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
-  %0 = sub i64 %a, %b
-  store i64 %0, i64 addrspace(1)* %out
+; EG-DAG: SETGE_UINT
+; EG-DAG: CNDE_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
+  %a = load i64 addrspace(1)* %a_ptr
+  %b = load i64 addrspace(1)* %b_ptr
+  %result = sub i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
index 3b69687..a80e502 100644
--- a/test/CodeGen/R600/uaddo.ll
+++ b/test/CodeGen/R600/uaddo.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
 
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
-; SI-LABEL: @uaddo_i64_zext
+; FUNC-LABEL: @uaddo_i64_zext
 ; SI: ADD
 ; SI: ADDC
 ; SI: ADDC
@@ -15,3 +17,53 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   store i64 %add2, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+; FUNC-LABEL: @s_uaddo_i32
+; SI: S_ADD_I32
+define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %uadd, 0
+  %carry = extractvalue { i32, i1 } %uadd, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_uaddo_i32
+; SI: V_ADD_I32
+define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %uadd, 0
+  %carry = extractvalue { i32, i1 } %uadd, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @s_uaddo_i64
+; SI: S_ADD_I32
+; SI: S_ADDC_U32
+define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %uadd, 0
+  %carry = extractvalue { i64, i1 } %uadd, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_uaddo_i64
+; SI: V_ADD_I32
+; SI: V_ADDC_U32
+define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %a = load i64 addrspace(1)* %aptr, align 4
+  %b = load i64 addrspace(1)* %bptr, align 4
+  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %uadd, 0
+  %carry = extractvalue { i64, i1 } %uadd, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll
new file mode 100644
index 0000000..5f5753a
--- /dev/null
+++ b/test/CodeGen/R600/udivrem.ll
@@ -0,0 +1,358 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+
+; FUNC-LABEL: @test_udivrem
+; EG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG: CNDE_INT
+; EG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG: CNDE_INT
+; EG: MULHI
+; EG: MULLO_INT
+; EG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+
+; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]]
+; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]]
+; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI: V_CNDMASK_B32_e64
+; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI: V_CNDMASK_B32_e64
+; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]]
+; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI: S_ENDPGM
+define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
+  %result0 = udiv i32 %x, %y
+  store i32 %result0, i32 addrspace(1)* %out
+  %result1 = urem i32 %x, %y
+  store i32 %result1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @test_udivrem_v2
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI: S_ENDPGM
+define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
+  %result0 = udiv <2 x i32> %x, %y
+  store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
+  %result1 = urem <2 x i32> %x, %y
+  store <2 x i32> %result1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+
+; FUNC-LABEL: @test_udivrem_v4
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: RECIP_UINT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: MULHI
+; EG-DAG: MULLO_INT
+; EG-DAG: SUB_INT
+; EG-DAG: SETGE_UINT
+; EG-DAG: SETGE_UINT
+; EG-DAG: AND_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: ADD_INT
+; EG-DAG: SUB_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]]
+; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
+; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]]
+; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]]
+; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
+; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: V_CNDMASK_B32_e64
+; SI-DAG: V_CNDMASK_B32_e64
+; SI: S_ENDPGM
+define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
+  %result0 = udiv <4 x i32> %x, %y
+  store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
+  %result1 = urem <4 x i32> %x, %y
+  store <4 x i32> %result1, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
index 75150c2..9a41796 100644
--- a/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -2,8 +2,35 @@
 
 ; SI-LABEL: @uint_to_fp_f64_i32
 ; SI: V_CVT_F64_U32_e32
+; SI: S_ENDPGM
 define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
   %cast = uitofp i32 %in to double
   store double %cast, double addrspace(1)* %out, align 8
   ret void
 }
+
+; SI-LABEL: @uint_to_fp_i1_f64:
+; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
+; we should be able to fold the SGPRs into the V_CNDMASK instructions.
+; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %fp = uitofp i1 %cmp to double
+  store double %fp, double addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @uint_to_fp_i1_f64_load:
+; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1
+; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI: S_ENDPGM
+define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+  %fp = uitofp i1 %in to double
+  store double %fp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index a5ac355..8f5d42d 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,28 +1,30 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; R600-CHECK-LABEL: @uint_to_fp_v2i32
-; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI-CHECK-LABEL: @uint_to_fp_v2i32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_CVT_F32_U32_e32
+; FUNC-LABEL: @uint_to_fp_v2i32
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+
+; SI: V_CVT_F32_U32_e32
+; SI: V_CVT_F32_U32_e32
+; SI: S_ENDPGM
 define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
   %result = uitofp <2 x i32> %in to <2 x float>
   store <2 x float> %result, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK-LABEL: @uint_to_fp_v4i32
-; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK-LABEL: @uint_to_fp_v4i32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_CVT_F32_U32_e32
+; FUNC-LABEL: @uint_to_fp_v4i32
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: V_CVT_F32_U32_e32
+; SI: V_CVT_F32_U32_e32
+; SI: V_CVT_F32_U32_e32
+; SI: V_CVT_F32_U32_e32
+; SI: S_ENDPGM
 define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %value = load <4 x i32> addrspace(1) * %in
   %result = uitofp <4 x i32> %value to <4 x float>
@@ -30,17 +32,39 @@ define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
   ret void
 }
 
-; R600-CHECK-LABEL: @uint_to_fp_i64_f32
-; R600-CHECK: UINT_TO_FLT
-; R600-CHECK: UINT_TO_FLT
-; R600-CHECK: MULADD_IEEE
-; SI-CHECK-LABEL: @uint_to_fp_i64_f32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_CVT_F32_U32_e32
-; SI-CHECK: V_MAD_F32
+; FUNC-LABEL: @uint_to_fp_i64_f32
+; R600: UINT_TO_FLT
+; R600: UINT_TO_FLT
+; R600: MULADD_IEEE
+; SI: V_CVT_F32_U32_e32
+; SI: V_CVT_F32_U32_e32
+; SI: V_MAD_F32
+; SI: S_ENDPGM
 define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
 entry:
   %0 = uitofp i64 %in to float
   store float %0, float addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: @uint_to_fp_i1_f32:
+; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00, [[CMP]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %fp = uitofp i1 %cmp to float
+  store float %fp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @uint_to_fp_i1_f32_load:
+; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
+  %fp = uitofp i1 %in to float
+  store float %fp, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll
new file mode 100644
index 0000000..d57a2c7
--- /dev/null
+++ b/test/CodeGen/R600/usubo.ll
@@ -0,0 +1,66 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+
+; FUNC-LABEL: @usubo_i64_zext
+define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %usub, 0
+  %carry = extractvalue { i64, i1 } %usub, 1
+  %ext = zext i1 %carry to i64
+  %add2 = add i64 %val, %ext
+  store i64 %add2, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @s_usubo_i32
+; SI: S_SUB_I32
+define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %usub, 0
+  %carry = extractvalue { i32, i1 } %usub, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_usubo_i32
+; SI: V_SUBREV_I32_e32
+define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load i32 addrspace(1)* %aptr, align 4
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
+  %val = extractvalue { i32, i1 } %usub, 0
+  %carry = extractvalue { i32, i1 } %usub, 1
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @s_usubo_i64
+; SI: S_SUB_I32
+; SI: S_SUBB_U32
+define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %usub, 0
+  %carry = extractvalue { i64, i1 } %usub, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
+
+; FUNC-LABEL: @v_usubo_i64
+; SI: V_SUB_I32
+; SI: V_SUBB_U32
+define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %a = load i64 addrspace(1)* %aptr, align 4
+  %b = load i64 addrspace(1)* %bptr, align 4
+  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %usub, 0
+  %carry = extractvalue { i64, i1 } %usub, 1
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  store i1 %carry, i1 addrspace(1)* %carryout
+  ret void
+}
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
new file mode 100644
index 0000000..6543f6d
--- /dev/null
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: @vector_read
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 1, i32* %y
+  store i32 2, i32* %z
+  store i32 3, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index
+  %2 = load i32* %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @vector_write
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+; EG: MOVA_INT
+define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index
+  store i32 1, i32* %1
+  %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index
+  %3 = load i32* %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; This test should be optimize to:
+; store i32 0, i32 addrspace(1)* %out
+; FUNC-LABEL: @bitcast_gep
+; CHECK: STORE_RAW
+define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %2 = bitcast i32* %1 to [4 x i32]*
+  %3 = getelementptr [4 x i32]* %2, i32 0, i32 0
+  %4 = load i32* %3
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index 5a5c86d..ab618cf 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -90,3 +90,69 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @vector_xor_i64
+; SI-CHECK: V_XOR_B32_e32
+; SI-CHECK: V_XOR_B32_e32
+; SI-CHECK: S_ENDPGM
+define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
+  %a = load i64 addrspace(1)* %in0
+  %b = load i64 addrspace(1)* %in1
+  %result = xor i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @scalar_xor_i64
+; SI-CHECK: S_XOR_B64
+; SI-CHECK: S_ENDPGM
+define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %result = xor i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @scalar_not_i64
+; SI-CHECK: S_NOT_B64
+define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
+  %result = xor i64 %a, -1
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @vector_not_i64
+; SI-CHECK: V_NOT_B32
+; SI-CHECK: V_NOT_B32
+define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
+  %a = load i64 addrspace(1)* %in0
+  %b = load i64 addrspace(1)* %in1
+  %result = xor i64 %a, -1
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; Test that we have a pattern to match xor inside a branch.
+; Note that in the future the backend may be smart enough to
+; use an SALU instruction for this.
+
+; SI-CHECK-LABEL: @xor_cf
+; SI-CHECK: V_XOR
+; SI-CHECK: V_XOR
+define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) {
+entry:
+  %0 = icmp eq i64 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = xor i64 %a, %b
+  br label %endif
+
+else:
+  %2 = load i64 addrspace(1)* %in
+  br label %endif
+
+endif:
+  %3 = phi i64 [%1, %if], [%2, %else]
+  store i64 %3, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/SPARC/atomics.ll b/test/CodeGen/SPARC/atomics.ll
index 5e41300..ee6c1f8 100644
--- a/test/CodeGen/SPARC/atomics.ll
+++ b/test/CodeGen/SPARC/atomics.ll
@@ -38,7 +38,8 @@ entry:
 
 define i32 @test_cmpxchg_i32(i32 %a, i32* %ptr) {
 entry:
-  %b = cmpxchg i32* %ptr, i32 %a, i32 123 monotonic monotonic
+  %pair = cmpxchg i32* %ptr, i32 %a, i32 123 monotonic monotonic
+  %b = extractvalue { i32, i1 } %pair, 0
   ret i32 %b
 }
 
@@ -48,7 +49,8 @@ entry:
 
 define i64 @test_cmpxchg_i64(i64 %a, i64* %ptr) {
 entry:
-  %b = cmpxchg i64* %ptr, i64 %a, i64 123 monotonic monotonic
+  %pair = cmpxchg i64* %ptr, i64 %a, i64 123 monotonic monotonic
+  %b = extractvalue { i64, i1 } %pair, 0
   ret i64 %b
 }
 
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 4d344fa..fa6a54e 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/SystemZ/Large/lit.local.cfg b/test/CodeGen/SystemZ/Large/lit.local.cfg
index 9a02f84..4f22a97 100644
--- a/test/CodeGen/SystemZ/Large/lit.local.cfg
+++ b/test/CodeGen/SystemZ/Large/lit.local.cfg
@@ -5,6 +5,5 @@ config.suffixes = ['.py']
 if config.root.host_arch not in ['SystemZ']:
     config.unsupported = True
 
-targets = set(config.root.targets_to_build.split())
-if not 'SystemZ' in targets:
+if not 'SystemZ' in config.root.targets:
     config.unsupported = True
diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll
index bb0b18a..5118aad 100644
--- a/test/CodeGen/SystemZ/cmpxchg-01.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-01.ll
@@ -32,7 +32,8 @@ define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) {
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
 ; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -8([[NEGSHIFT]])
-  %res = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst
+  %pair = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst
+  %res = extractvalue { i8, i1 } %pair, 0
   ret i8 %res
 }
 
@@ -50,6 +51,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0
 ; CHECK-SHIFT: br %r14
-  %res = cmpxchg i8 *%src, i8 42, i8 88 seq_cst seq_cst
+  %pair = cmpxchg i8 *%src, i8 42, i8 88 seq_cst seq_cst
+  %res = extractvalue { i8, i1 } %pair, 0
   ret i8 %res
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll
index 8d46a8c..9eb0628 100644
--- a/test/CodeGen/SystemZ/cmpxchg-02.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-02.ll
@@ -32,7 +32,8 @@ define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) {
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
 ; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -16([[NEGSHIFT]])
-  %res = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst
+  %pair = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst
+  %res = extractvalue { i16, i1 } %pair, 0
   ret i16 %res
 }
 
@@ -50,6 +51,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0
 ; CHECK-SHIFT: br %r14
-  %res = cmpxchg i16 *%src, i16 42, i16 88 seq_cst seq_cst
+  %pair = cmpxchg i16 *%src, i16 42, i16 88 seq_cst seq_cst
+  %res = extractvalue { i16, i1 } %pair, 0
   ret i16 %res
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll
index f6a2ad0..c5fab4d 100644
--- a/test/CodeGen/SystemZ/cmpxchg-03.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -7,7 +7,8 @@ define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK-LABEL: f1:
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -17,7 +18,8 @@ define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 4092(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -27,7 +29,8 @@ define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, 4096(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -37,7 +40,8 @@ define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -49,7 +53,8 @@ define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131072
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -59,7 +64,8 @@ define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -69,7 +75,8 @@ define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -81,7 +88,8 @@ define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131073
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -93,7 +101,8 @@ define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %ptr = inttoptr i64 %add1 to i32 *
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -106,7 +115,8 @@ define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -116,7 +126,8 @@ define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) {
 ; CHECK: lhi %r2, 1001
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
 
@@ -126,6 +137,7 @@ define i32 @f12(i32 %cmp, i32 *%ptr) {
 ; CHECK: lhi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: cs %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst seq_cst
+  %pair = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst seq_cst
+  %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll
index 069bad6..ba1493e 100644
--- a/test/CodeGen/SystemZ/cmpxchg-04.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -7,7 +7,8 @@ define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK-LABEL: f1:
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -17,7 +18,8 @@ define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -29,7 +31,8 @@ define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65536
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -39,7 +42,8 @@ define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -49,7 +53,8 @@ define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -61,7 +66,8 @@ define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65537
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -73,7 +79,8 @@ define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) {
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %ptr = inttoptr i64 %add1 to i64 *
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -83,7 +90,8 @@ define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) {
 ; CHECK: lghi %r2, 1001
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
 
@@ -93,6 +101,7 @@ define i64 @f9(i64 %cmp, i64 *%ptr) {
 ; CHECK: lghi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: csg %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst seq_cst
+  %pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst seq_cst
+  %val = extractvalue { i64, i1 } %pairval, 0
   ret i64 %val
 }
diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg
index b12af09..5c02dd3 100644
--- a/test/CodeGen/SystemZ/lit.local.cfg
+++ b/test/CodeGen/SystemZ/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'SystemZ' in targets:
+if not 'SystemZ' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
new file mode 100644
index 0000000..ae66369
--- /dev/null
+++ b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
+; XFAIL: *
+
+define void @foo(i32* %A) #0 {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: push {r7, lr}
+; CHECK: ldm [[REG0:r[0-9]]]!,
+; CHECK-NEXT: subs [[REG0]]
+; CHECK-NEXT: bl
+  %0 = load i32* %A, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  tail call void @bar(i32* %A, i32 %0, i32 %1) #2
+  ret void
+}
+
+declare void @bar(i32*, i32, i32) #1
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 6bc39af..6c6de55 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@@ -45,8 +45,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 ; CHECK: sub sp, #
 ; CHECK: mov r[[R0:[0-9]+]], sp
 ; CHECK: str r{{[0-9+]}}, [r[[R0]]
-; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
-; RA_BASIC: stm r[[R0]]!
+; CHECK: str r{{[0-9+]}}, [r[[R0]]
 ; CHECK-NOT: ldr r0, [sp
 ; CHECK: mov r[[R1:[0-9]+]], sp
 ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
diff --git a/test/CodeGen/Thumb/fastcc.ll b/test/CodeGen/Thumb/fastcc.ll
new file mode 100644
index 0000000..98ff684
--- /dev/null
+++ b/test/CodeGen/Thumb/fastcc.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mcpu=arm926ej-s -mattr=+vfp2
+
+; This is a regression test, to ensure that fastcc functions are correctly
+; handled when compiling for a processor which has a floating-point unit which
+; is not accessible from the selected instruction set.
+
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv5e-none-linux-gnueabi"
+
+; Function Attrs: optsize
+define fastcc void @_foo(float %walpha) #0 {
+entry:
+  br label %for.body13
+
+for.body13:                                       ; preds = %for.body13, %entry
+  br i1 undef, label %for.end182.critedge, label %for.body13
+
+for.end182.critedge:                              ; preds = %for.body13
+  %conv183 = fpext float %walpha to double
+  %mul184 = fmul double %conv183, 8.200000e-01
+  %conv185 = fptrunc double %mul184 to float
+  %conv188 = fpext float %conv185 to double
+  %mul189 = fmul double %conv188, 6.000000e-01
+  %conv190 = fptrunc double %mul189 to float
+  br label %for.body193
+
+for.body193:                                      ; preds = %for.body193, %for.end182.critedge
+  %mul195 = fmul float %conv190, undef
+  br label %for.body193
+}
+
+attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 "}
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll
index dd98e6f..95f3edc 100644
--- a/test/CodeGen/Thumb/thumb-ldm.ll
+++ b/test/CodeGen/Thumb/thumb-ldm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
+; XFAIL: *
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
index 06cfd9b..dedc82b 100644
--- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
+++ b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s
+; XFAIL: *
 
 @d = external global [64 x i32]
 @s = external global [64 x i32]
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
index e014453..09e0ed1 100644
--- a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 @csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
 @vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index 940cfd1..c8eac8d 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; PR4659
 ; PR4682
 
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index 52066d3..a9a2478 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 %struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index 1b8bdb1..8beb5b1 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -O3 | FileCheck %s
 ; rdar://7493908
 
 ; Make sure the result of the first dynamic_alloc isn't copied back to sp more
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index 810bfb7..f3046e1 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; rdar://8115404
 ; Tail merging must not split an IT block.
 
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index 75f5439..3d89390 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
index b1ce3bb..240df83 100644
--- a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
+++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-darwin10 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
 
 %struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
 
diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll
index 8a3c895..16e2298 100644
--- a/test/CodeGen/Thumb2/buildvector-crash.ll
+++ b/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 | FileCheck %s
 ; Formerly crashed, 3573915.
 
 define void @RotateStarsFP_Vec() nounwind {
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index a9f948c..88c7f0f 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
 ; CHECK-LABEL: fht:
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 36544d1..d20eef0 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7352504
 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
 
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/CodeGen/Thumb2/lit.local.cfg
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index a00b22d..332ed50 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; If-conversion defeats the purpose of this test, which is to check
 ; conditional branch generation, so a call to make sure it doesn't
 ; happen and we get actual branches.
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 893bd0f..f0f7916 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; rdar://7354379
 
 declare double @foo(double) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 403cd48..a861912 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-default-it | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-no-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index a71aa3f..79667d4 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-no-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s
 
 ; There shouldn't be a unconditional branch at end of bb52.
 ; rdar://7184787
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 52c1063..94f4725 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s
 ; PR4789
 
 %bar = type { float, float, float }
diff --git a/test/CodeGen/Thumb2/tpsoft.ll b/test/CodeGen/Thumb2/tpsoft.ll
new file mode 100644
index 0000000..6ab8bf0
--- /dev/null
+++ b/test/CodeGen/Thumb2/tpsoft.ll
@@ -0,0 +1,54 @@
+; RUN: llc  %s -mtriple=thumbv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s
+; RUN: llc  %s -mtriple=thumbebv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s
+; RUN: llc  %s -mtriple=thumbv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=ELFOBJ -check-prefix=ELFOBJ-LE %s
+; RUN: llc  %s -mtriple=thumbebv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=ELFOBJ -check-prefix=ELFOBJ-BE %s
+
+;; Make sure that bl __aeabi_read_tp is materialized and fixed up correctly
+;; in the obj case.
+
+@i = external thread_local global i32
+@a = external global i8
+@b = external global [10 x i8]
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  switch i32 %0, label %bb2 [
+    i32 12, label %bb
+    i32 13, label %bb1
+  ]
+
+bb:                                               ; preds = %entry
+  %1 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %1
+; ELFASM:       	bl	__aeabi_read_tp
+
+
+; ELFOBJ:      Sections [
+; ELFOBJ:        Section {
+; ELFOBJ:          Name: .text
+; ELFOBJ-LE:          SectionData (
+;;;                  BL __aeabi_read_tp is ---------+
+;;;                                                 V
+; ELFOBJ-LE-NEXT:     0000: 2DE90048 0E487844 0168FFF7 FEFF4058
+; ELFOBJ-BE:          SectionData (
+;;;                  BL __aeabi_read_tp is ---------+
+;;;                                                 V
+; ELFOBJ-BE-NEXT:     0000: E92D4800 480E4478 6801F7FF FFFE5840
+
+
+bb1:                                              ; preds = %entry
+  %2 = tail call arm_aapcs_vfpcc  i32 @bar(i32* bitcast ([10 x i8]* @b to i32*)) nounwind
+  ret i32 %2
+
+bb2:                                              ; preds = %entry
+  ret i32 -1
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
+declare arm_aapcs_vfpcc i32 @bar(i32*)
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
index 4dca246..a028dee 100644
--- a/test/CodeGen/Thumb2/v8_IT_3.ll
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
-; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
 
 %struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
 %struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
index 2f352d6..2da75ad 100644
--- a/test/CodeGen/Thumb2/v8_IT_5.ll
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s
 ; CHECK: it	ne
 ; CHECK-NEXT: cmpne
 ; CHECK-NEXT: bne [[JUMPTARGET:.LBB[0-9]+_[0-9]+]]
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index 5b8fe72..b99c58c 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -1,12 +1,14 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o -     | FileCheck %s  --check-prefix=LIN
-; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s  --check-prefix=LIN
 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o -       | FileCheck %s  --check-prefix=WIN
 ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o -   | FileCheck %s  --check-prefix=WIN
+; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s  --check-prefix=WIN64
 
 ; LIN: .cfi_personality 0, __gnat_eh_personality
 ; LIN: .cfi_lsda 0, .Lexception0
 ; WIN: .cfi_personality 0, ___gnat_eh_personality
 ; WIN: .cfi_lsda 0, Lexception0
+; WIN64: .seh_handler __gnat_eh_personality
+; WIN64: .seh_handlerdata
 
 @error = external global i8
 
@@ -15,7 +17,7 @@ entry:
   invoke void @raise()
           to label %eh_then unwind label %unwind
 
-unwind:                                           ; preds = %entry 
+unwind:                                           ; preds = %entry
   %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
               catch i8* @error
   %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
deleted file mode 100644
index 0ae1897..0000000
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
-
-define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
-entry:
-	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:0 [#uses=167]
-	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:1 [#uses=170]
-	alloca [4 x <4 x i32>]		; <[4 x <4 x i32>]*>:2 [#uses=12]
-	%.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0		; <<4 x float>*> [#uses=76]
-	%.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0		; <<4 x float>*> [#uses=59]
-
-	%tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63		; <<4 x float>*> [#uses=1]
-	%tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i		; <<4 x float>> [#uses=5]
-	%tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=4]
-	%tmp704.i1085.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
-	%tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i )		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i
-
-	%tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2		; <float*> [#uses=1]
-	%tmp5334.i = load float* %tmp2587.i1145.gep.i		; <float> [#uses=5]
-	%tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2		; <<4 x float>> [#uses=5]
-	store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i
-
-	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
-	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
-	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
-        ret i16 0
-}
-
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index e64375a..a0106d7 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -8,7 +8,7 @@ target triple = "i386-pc-linux-gnu"
 @__resp = thread_local global %struct.__res_state* @_res		; <%struct.__res_state**> [#uses=1]
 @_res = global %struct.__res_state zeroinitializer, section ".bss"		; <%struct.__res_state*> [#uses=1]
 
-@__libc_resp = hidden alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
+@__libc_resp = hidden thread_local alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
 
 define i32 @foo() {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 1259cf4..dfb98bb 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
-; CHECK: movaps  %xmm8, (%rsp)
-; CHECK: movaps  %xmm7, 16(%rsp)
+; CHECK: movaps  %xmm8, 16(%rsp)
+; CHECK: movaps  %xmm7, (%rsp)
 
 define i32 @a() nounwind {
 entry:
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index f9bf310..850f678 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -11,9 +11,9 @@ entry:
 ; CHECK: movl 4([[REG]]), %edx
 ; CHECK: LBB0_1:
 ; CHECK: movl %eax, %ebx
-; CHECK: addl {{%[a-z]+}}, %ebx
+; CHECK: addl $1, %ebx
 ; CHECK: movl %edx, %ecx
-; CHECK: adcl {{%[a-z]+}}, %ecx
+; CHECK: adcl $0, %ecx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg8b ([[REG]])
 ; CHECK-NEXT: jne
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index b45ac22..4181c26 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -24,7 +24,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
 !3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 786449, metadata !31, i32 4, metadata !"4.2.1 LLVM build", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !33, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
index f69cedc..ebf51a5 100644
--- a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -18,7 +18,8 @@ entry:
 loop:
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg8b
-  %r = cmpxchg i64* %ptr, i64 0, i64 1 monotonic monotonic
+  %pair = cmpxchg i64* %ptr, i64 0, i64 1 monotonic monotonic
+  %r = extractvalue { i64, i1 } %pair, 0
   %stored1  = icmp eq i64 %r, 0
   br i1 %stored1, label %loop, label %continue
 continue:
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index f016528..625a351 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -8,11 +8,11 @@ target triple = "x86_64-apple-darwin10.0.0"
 ; CHECK: DW_TAG_subprogram
 ; CHECK: DW_TAG_variable
 ; CHECK: DW_TAG_variable
+; CHECK-NEXT:   DW_AT_location
 ; CHECK-NEXT:   DW_AT_name {{.*}} "z_s"
 ; CHECK-NEXT:   DW_AT_decl_file
 ; CHECK-NEXT:   DW_AT_decl_line
 ; CHECK-NEXT:   DW_AT_type{{.*}}{[[TYPE:.*]]}
-; CHECK-NEXT:   DW_AT_location
 ; CHECK: [[TYPE]]:
 ; CHECK-NEXT: DW_AT_name {{.*}} "int"
 
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 650839a..36667de 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -69,15 +69,15 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !1 = metadata !{metadata !2}
 !2 = metadata !{}
 !4 = metadata !{i32 786688, metadata !5, metadata !"num1", metadata !14, i32 815, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [num1] [line 815]
-!5 = metadata !{i32 786443, metadata !6, i32 815, i32 0, metadata !14, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!6 = metadata !{i32 786443, metadata !7, i32 812, i32 0, metadata !14, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!7 = metadata !{i32 786443, metadata !8, i32 807, i32 0, metadata !14, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!8 = metadata !{i32 786443, metadata !9, i32 440, i32 0, metadata !14, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!9 = metadata !{i32 786443, metadata !10, i32 435, i32 0, metadata !14, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!10 = metadata !{i32 786443, metadata !11, i32 434, i32 0, metadata !14, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!11 = metadata !{i32 786443, metadata !12, i32 250, i32 0, metadata !14, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!13 = metadata !{i32 786443, metadata !2, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!5 = metadata !{i32 786443, metadata !14, metadata !6, i32 815, i32 0, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!6 = metadata !{i32 786443, metadata !14, metadata !7, i32 812, i32 0, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!7 = metadata !{i32 786443, metadata !14, metadata !8, i32 807, i32 0, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!8 = metadata !{i32 786443, metadata !14, metadata !9, i32 440, i32 0, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!9 = metadata !{i32 786443, metadata !14, metadata !10, i32 435, i32 0, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!10 = metadata !{i32 786443, metadata !14, metadata !11, i32 434, i32 0, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!11 = metadata !{i32 786443, metadata !14, metadata !12, i32 250, i32 0, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!12 = metadata !{i32 786443, metadata !14, metadata !13, i32 249, i32 0, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!13 = metadata !{i32 786443, metadata !14, metadata !2, i32 221, i32 0, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
 !16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
diff --git a/test/CodeGen/X86/2014-05-29-factorial.ll b/test/CodeGen/X86/2014-05-29-factorial.ll
new file mode 100644
index 0000000..987a21d
--- /dev/null
+++ b/test/CodeGen/X86/2014-05-29-factorial.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: decq [[X:%rdi|%rcx]]
+; CHECK-NOT: testq [[X]], [[X]]
+
+define i64 @fact2(i64 %x) {
+entry:
+  br label %while.body
+
+while.body:
+  %result.06 = phi i64 [ %mul, %while.body ], [ 1, %entry ]
+  %x.addr.05 = phi i64 [ %dec, %while.body ], [ %x, %entry ]
+  %mul = mul nsw i64 %result.06, %x.addr.05
+  %dec = add nsw i64 %x.addr.05, -1
+  %cmp = icmp sgt i64 %dec, 0
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:
+  %mul.lcssa = phi i64 [ %mul, %while.body ]
+  br label %while.end
+
+while.end:
+  %result.0.lcssa = phi i64 [ %mul.lcssa, %while.end.loopexit ]
+  ret i64 %result.0.lcssa
+}
diff --git a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll
new file mode 100644
index 0000000..4580795
--- /dev/null
+++ b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: addl
+
+; The two additions are the same , but have different flags.
+; In theory this code should never be generated by the frontend, but this 
+; tries to test that two identical instructions with two different flags
+; actually generate two different nodes.
+;
+; Normally the combiner would see this condition without the flags 
+; and optimize the result of the sub into a register clear
+; (the final result would be 0). With the different flags though the combiner 
+; needs to keep the add + sub nodes, because the two nodes result as different
+; nodes and so cannot assume that the subtraction of the two nodes
+; generates 0 as result
+define i32 @foo(i32 %a, i32 %b) {
+  %1 = add i32 %a, %b
+  %2 = add nsw i32 %a, %b
+  %3 = sub i32 %1, %2
+  ret i32 %3
+}
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index c274688..c392e94 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 > %t.x86-64
-; RUN: llc < %s -march=x86 > %t.x86
+; RUN: llc < %s -march=x86 -mattr=cx16 > %t.x86
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
@@ -704,7 +704,8 @@ entry:
   %3 = zext i8 %2 to i32
   %4 = trunc i32 %3 to i8
   %5 = trunc i32 %1 to i8
-  %6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
+  %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
+  %6 = extractvalue { i8, i1 } %pair6, 0
   store i8 %6, i8* @sc, align 1
   %7 = load i8* @sc, align 1
   %8 = zext i8 %7 to i32
@@ -712,7 +713,8 @@ entry:
   %10 = zext i8 %9 to i32
   %11 = trunc i32 %10 to i8
   %12 = trunc i32 %8 to i8
-  %13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
+  %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
+  %13 = extractvalue { i8, i1 } %pair13, 0
   store i8 %13, i8* @uc, align 1
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
@@ -722,7 +724,8 @@ entry:
   %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %20 = trunc i32 %18 to i16
   %21 = trunc i32 %16 to i16
-  %22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
+  %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
+  %22 = extractvalue { i16, i1 } %pair22, 0
   store i16 %22, i16* @ss, align 2
   %23 = load i8* @sc, align 1
   %24 = sext i8 %23 to i16
@@ -732,49 +735,56 @@ entry:
   %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %29 = trunc i32 %27 to i16
   %30 = trunc i32 %25 to i16
-  %31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
+  %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
+  %31 = extractvalue { i16, i1 } %pair31, 0
   store i16 %31, i16* @us, align 2
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i32
   %34 = load i8* @uc, align 1
   %35 = zext i8 %34 to i32
   %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
+  %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
+  %37 = extractvalue { i32, i1 } %pair37, 0
   store i32 %37, i32* @si, align 4
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i32
   %40 = load i8* @uc, align 1
   %41 = zext i8 %40 to i32
   %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
+  %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
+  %43 = extractvalue { i32, i1 } %pair43, 0
   store i32 %43, i32* @ui, align 4
   %44 = load i8* @sc, align 1
   %45 = sext i8 %44 to i64
   %46 = load i8* @uc, align 1
   %47 = zext i8 %46 to i64
   %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
-  %49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
+  %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
+  %49 = extractvalue { i64, i1 } %pair49, 0
   store i64 %49, i64* @sl, align 8
   %50 = load i8* @sc, align 1
   %51 = sext i8 %50 to i64
   %52 = load i8* @uc, align 1
   %53 = zext i8 %52 to i64
   %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
-  %55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
+  %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
+  %55 = extractvalue { i64, i1 } %pair55, 0
   store i64 %55, i64* @ul, align 8
   %56 = load i8* @sc, align 1
   %57 = sext i8 %56 to i64
   %58 = load i8* @uc, align 1
   %59 = zext i8 %58 to i64
   %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
-  %61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
+  %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
+  %61 = extractvalue { i64, i1 } %pair61, 0
   store i64 %61, i64* @sll, align 8
   %62 = load i8* @sc, align 1
   %63 = sext i8 %62 to i64
   %64 = load i8* @uc, align 1
   %65 = zext i8 %64 to i64
   %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
-  %67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
+  %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
+  %67 = extractvalue { i64, i1 } %pair67, 0
   store i64 %67, i64* @ull, align 8
   %68 = load i8* @sc, align 1
   %69 = zext i8 %68 to i32
@@ -782,7 +792,8 @@ entry:
   %71 = zext i8 %70 to i32
   %72 = trunc i32 %71 to i8
   %73 = trunc i32 %69 to i8
-  %74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
+  %pair74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
+  %74 = extractvalue { i8, i1 } %pair74, 0
   %75 = icmp eq i8 %74, %72
   %76 = zext i1 %75 to i8
   %77 = zext i8 %76 to i32
@@ -793,7 +804,8 @@ entry:
   %81 = zext i8 %80 to i32
   %82 = trunc i32 %81 to i8
   %83 = trunc i32 %79 to i8
-  %84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
+  %pair84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
+  %84 = extractvalue { i8, i1 } %pair84, 0
   %85 = icmp eq i8 %84, %82
   %86 = zext i1 %85 to i8
   %87 = zext i8 %86 to i32
@@ -805,7 +817,8 @@ entry:
   %92 = zext i8 %91 to i32
   %93 = trunc i32 %92 to i8
   %94 = trunc i32 %90 to i8
-  %95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
+  %pair95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
+  %95 = extractvalue { i8, i1 } %pair95, 0
   %96 = icmp eq i8 %95, %93
   %97 = zext i1 %96 to i8
   %98 = zext i8 %97 to i32
@@ -817,7 +830,8 @@ entry:
   %103 = zext i8 %102 to i32
   %104 = trunc i32 %103 to i8
   %105 = trunc i32 %101 to i8
-  %106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
+  %pair106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
+  %106 = extractvalue { i8, i1 } %pair106, 0
   %107 = icmp eq i8 %106, %104
   %108 = zext i1 %107 to i8
   %109 = zext i8 %108 to i32
@@ -828,7 +842,8 @@ entry:
   %113 = zext i8 %112 to i32
   %114 = trunc i32 %113 to i8
   %115 = trunc i32 %111 to i8
-  %116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
+  %pair116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
+  %116 = extractvalue { i8, i1 } %pair116, 0
   %117 = icmp eq i8 %116, %114
   %118 = zext i1 %117 to i8
   %119 = zext i8 %118 to i32
@@ -839,7 +854,8 @@ entry:
   %123 = zext i8 %122 to i32
   %124 = trunc i32 %123 to i8
   %125 = trunc i32 %121 to i8
-  %126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
+  %pair126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
+  %126 = extractvalue { i8, i1 } %pair126, 0
   %127 = icmp eq i8 %126, %124
   %128 = zext i1 %127 to i8
   %129 = zext i8 %128 to i32
@@ -850,7 +866,8 @@ entry:
   %133 = zext i8 %132 to i64
   %134 = trunc i64 %133 to i8
   %135 = trunc i64 %131 to i8
-  %136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
+  %pair136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
+  %136 = extractvalue { i8, i1 } %pair136, 0
   %137 = icmp eq i8 %136, %134
   %138 = zext i1 %137 to i8
   %139 = zext i8 %138 to i32
@@ -861,7 +878,8 @@ entry:
   %143 = zext i8 %142 to i64
   %144 = trunc i64 %143 to i8
   %145 = trunc i64 %141 to i8
-  %146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
+  %pair146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
+  %146 = extractvalue { i8, i1 } %pair146, 0
   %147 = icmp eq i8 %146, %144
   %148 = zext i1 %147 to i8
   %149 = zext i8 %148 to i32
@@ -872,7 +890,8 @@ entry:
   %153 = zext i8 %152 to i64
   %154 = trunc i64 %153 to i8
   %155 = trunc i64 %151 to i8
-  %156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
+  %pair156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
+  %156 = extractvalue { i8, i1 } %pair156, 0
   %157 = icmp eq i8 %156, %154
   %158 = zext i1 %157 to i8
   %159 = zext i8 %158 to i32
@@ -883,7 +902,8 @@ entry:
   %163 = zext i8 %162 to i64
   %164 = trunc i64 %163 to i8
   %165 = trunc i64 %161 to i8
-  %166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
+  %pair166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
+  %166 = extractvalue { i8, i1 } %pair166, 0
   %167 = icmp eq i8 %166, %164
   %168 = zext i1 %167 to i8
   %169 = zext i8 %168 to i32
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/CodeGen/X86/GC/lit.local.cfg
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 8487c60..bf55644 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,4 +1,20 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false \
+; RUN: -relocation-model=pic | FileCheck %s
+
+@thread_var = thread_local global i32 42, align 4
+@thread_alias = thread_local(localdynamic) alias i32* @thread_var
+
+; CHECK-LABEL: get_thread_var
+define i32* @get_thread_var() {
+; CHECK: leal    thread_var@TLSGD
+  ret i32* @thread_var
+}
+
+; CHECK-LABEL: get_thread_alias
+define i32* @get_thread_alias() {
+; CHECK: leal    thread_alias@TLSLD
+  ret i32* @thread_alias
+}
 
 @bar = global i32 42
 
@@ -22,7 +38,7 @@ define i32 @foo_f() {
 @bar_i = alias internal i32* @bar
 
 ; CHECK-DAG: .globl	A
-@A = alias i64, i32* @bar
+@A = alias bitcast (i32* @bar to i64*)
 
 ; CHECK-DAG: .globl	bar_h
 ; CHECK-DAG: .hidden	bar_h
@@ -32,6 +48,19 @@ define i32 @foo_f() {
 ; CHECK-DAG: .protected	bar_p
 @bar_p = protected alias i32* @bar
 
+; CHECK-DAG: test2 = bar+4
+@test2 = alias getelementptr(i32 *@bar, i32 1)
+
+; CHECK-DAG: test3 = 42
+@test3 = alias inttoptr(i32 42 to i32*)
+
+; CHECK-DAG: test4 = bar
+@test4 = alias inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
+
+; CHECK-DAG: test5 = test2-bar
+@test5 = alias inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
+                                 i32 ptrtoint (i32* @bar to i32)) to i32*)
+
 ; CHECK-DAG: .globl	test
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/X86/atom-fixup-lea4.ll b/test/CodeGen/X86/atom-fixup-lea4.ll
new file mode 100644
index 0000000..668574b
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea4.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux
+
+%struct.ValueWrapper = type { double }
+%struct.ValueWrapper.6 = type { %struct.ValueWrapper.7 }
+%struct.ValueWrapper.7 = type { %struct.ValueWrapper.8 }
+%struct.ValueWrapper.8 = type { %struct.ValueWrapper }
+
+; Function Attrs: uwtable
+define linkonce_odr void @_ZN12ValueWrapperIS_IS_IS_IdEEEEC2Ev(%struct.ValueWrapper.6* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %struct.ValueWrapper.6*, align 8
+  store %struct.ValueWrapper.6* %this, %struct.ValueWrapper.6** %this.addr, align 8
+  %this1 = load %struct.ValueWrapper.6** %this.addr
+  %value = getelementptr inbounds %struct.ValueWrapper.6* %this1, i32 0, i32 0
+  call void @_ZN12ValueWrapperIS_IS_IdEEEC2Ev(%struct.ValueWrapper.7* %value)
+  ret void
+}
+
+; Function Attrs: uwtable
+declare void @_ZN12ValueWrapperIS_IS_IdEEEC2Ev(%struct.ValueWrapper.7*) unnamed_addr #0 align 2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll
index 17e04f0..7352d5a 100644
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=corei7 -march=x86 -verify-machineinstrs | FileCheck %s
 
 ; 64-bit load/store on x86-32
 ; FIXME: The generated code can be substantially improved.
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index 1cfbc49..ffb7a3f 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -1,6 +1,5 @@
-; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
-; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
-; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
+; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
 
 @sc64 = external global i64
 
@@ -9,87 +8,39 @@ define void @atomic_maxmin_i6432() {
   %1 = atomicrmw max  i64* @sc64, i64 5 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: setl
-; LINUX: cmpl
-; LINUX: setl
+; LINUX: seta
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setl
-; NOCMOV: cmpl
-; NOCMOV: setl
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
   %2 = atomicrmw min  i64* @sc64, i64 6 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: setg
-; LINUX: cmpl
-; LINUX: setg
+; LINUX: setb
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setg
-; NOCMOV: cmpl
-; NOCMOV: setg
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
   %3 = atomicrmw umax i64* @sc64, i64 7 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: setb
-; LINUX: cmpl
-; LINUX: setb
+; LINUX: seta
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setb
-; NOCMOV: cmpl
-; NOCMOV: setb
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
   %4 = atomicrmw umin i64* @sc64, i64 8 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: seta
-; LINUX: cmpl
-; LINUX: seta
+; LINUX: setb
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: seta
-; NOCMOV: cmpl
-; NOCMOV: seta
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
   ret void
 }
 
@@ -98,8 +49,8 @@ define void @atomic_maxmin_i6432() {
 
 define void @tf_bug(i8* %ptr) nounwind {
 ; PIC-LABEL: tf_bug:
-; PIC: movl _id-L1$pb(
-; PIC: movl (_id-L1$pb)+4(
+; PIC-DAG: movl _id-L1$pb(
+; PIC-DAG: movl (_id-L1$pb)+4(
   %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst
   %tmp2 = add i64 %tmp1, 1
   %tmp3 = bitcast i8* %ptr to i64*
diff --git a/test/CodeGen/X86/atomic-ops-ancient-64.ll b/test/CodeGen/X86/atomic-ops-ancient-64.ll
new file mode 100644
index 0000000..18749b9
--- /dev/null
+++ b/test/CodeGen/X86/atomic-ops-ancient-64.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s
+
+define i64 @test_add(i64* %addr, i64 %inc) {
+; CHECK-LABEL: test_add:
+; CHECK: calll __sync_fetch_and_add_8
+  %old = atomicrmw add i64* %addr, i64 %inc seq_cst
+  ret i64 %old
+}
+
+define i64 @test_sub(i64* %addr, i64 %inc) {
+; CHECK-LABEL: test_sub:
+; CHECK: calll __sync_fetch_and_sub_8
+  %old = atomicrmw sub i64* %addr, i64 %inc seq_cst
+  ret i64 %old
+}
+
+define i64 @test_and(i64* %andr, i64 %inc) {
+; CHECK-LABEL: test_and:
+; CHECK: calll __sync_fetch_and_and_8
+  %old = atomicrmw and i64* %andr, i64 %inc seq_cst
+  ret i64 %old
+}
+
+define i64 @test_or(i64* %orr, i64 %inc) {
+; CHECK-LABEL: test_or:
+; CHECK: calll __sync_fetch_and_or_8
+  %old = atomicrmw or i64* %orr, i64 %inc seq_cst
+  ret i64 %old
+}
+
+define i64 @test_xor(i64* %xorr, i64 %inc) {
+; CHECK-LABEL: test_xor:
+; CHECK: calll __sync_fetch_and_xor_8
+  %old = atomicrmw xor i64* %xorr, i64 %inc seq_cst
+  ret i64 %old
+}
+
+define i64 @test_nand(i64* %nandr, i64 %inc) {
+; CHECK-LABEL: test_nand:
+; CHECK: calll __sync_fetch_and_nand_8
+  %old = atomicrmw nand i64* %nandr, i64 %inc seq_cst
+  ret i64 %old
+}
diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll
new file mode 100644
index 0000000..741d290
--- /dev/null
+++ b/test/CodeGen/X86/atomic128.ll
@@ -0,0 +1,316 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
+
+@var = global i128 0
+
+define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
+; CHECK-LABEL: val_compare_and_swap:
+; CHECK: movq %rsi, %rax
+; CHECK: movq %rcx, %rbx
+; CHECK: movq %r8, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+  %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+  %val = extractvalue { i128, i1 } %pair, 0
+  ret i128 %val
+}
+
+define void @fetch_and_nand(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_nand:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         movq %rdx, %rcx
+; CHECK:         andq [[INCHI]], %rcx
+; CHECK:         movq %rax, %rbx
+  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK:         andq %rsi, %rbx
+; CHECK:         notq %rbx
+; CHECK:         notq %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+  %val = atomicrmw nand i128* %p, i128 %bits release
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_or(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_or:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         movq %rax, %rbx
+  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK:         orq %rsi, %rbx
+; CHECK:         movq %rdx, %rcx
+; CHECK:         orq [[INCHI]], %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw or i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_add(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_add:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         movq %rax, %rbx
+  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK:         addq %rsi, %rbx
+; CHECK:         movq %rdx, %rcx
+; CHECK:         adcq [[INCHI]], %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw add i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_sub(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_sub:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         movq %rax, %rbx
+  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK:         subq %rsi, %rbx
+; CHECK:         movq %rdx, %rcx
+; CHECK:         sbbq [[INCHI]], %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_min(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_min:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         cmpq %rsi, %rax
+; CHECK:         setbe [[CMP:%[a-z0-9]+]]
+; CHECK:         cmpq [[INCHI]], %rdx
+; CHECK:         setle [[HICMP:%[a-z0-9]+]]
+; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK:         movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK:         testb [[CMP]], [[CMP]]
+; CHECK:         movq %rsi, %rbx
+; CHECK:         cmovneq %rax, %rbx
+; CHECK:         movq [[INCHI]], %rcx
+; CHECK:         cmovneq %rdx, %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw min i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_max(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_max:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         cmpq %rsi, %rax
+; CHECK:         setae [[CMP:%[a-z0-9]+]]
+; CHECK:         cmpq [[INCHI]], %rdx
+; CHECK:         setge [[HICMP:%[a-z0-9]+]]
+; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK:         movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK:         testb [[CMP]], [[CMP]]
+; CHECK:         movq %rsi, %rbx
+; CHECK:         cmovneq %rax, %rbx
+; CHECK:         movq [[INCHI]], %rcx
+; CHECK:         cmovneq %rdx, %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw max i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_umin(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umin:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         cmpq %rsi, %rax
+; CHECK:         setbe [[CMP:%[a-z0-9]+]]
+; CHECK:         cmpq [[INCHI]], %rdx
+; CHECK:         setbe [[HICMP:%[a-z0-9]+]]
+; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK:         movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK:         testb [[CMP]], [[CMP]]
+; CHECK:         movq %rsi, %rbx
+; CHECK:         cmovneq %rax, %rbx
+; CHECK:         movq [[INCHI]], %rcx
+; CHECK:         cmovneq %rdx, %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_umax(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umax:
+; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG:     movq (%rdi), %rax
+; CHECK-DAG:     movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         cmpq %rax, %rsi
+; CHECK:         setb [[CMP:%[a-z0-9]+]]
+; CHECK:         cmpq [[INCHI]], %rdx
+; CHECK:         seta [[HICMP:%[a-z0-9]+]]
+; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK:         movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK:         testb [[CMP]], [[CMP]]
+; CHECK:         movq %rsi, %rbx
+; CHECK:         cmovneq %rax, %rbx
+; CHECK:         movq [[INCHI]], %rcx
+; CHECK:         cmovneq %rdx, %rcx
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+; CHECK:         movq %rax, _var
+; CHECK:         movq %rdx, _var+8
+
+  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define i128 @atomic_load_seq_cst(i128* %p) {
+; CHECK-LABEL: atomic_load_seq_cst:
+; CHECK: xorl %eax, %eax
+; CHECK: xorl %edx, %edx
+; CHECK: xorl %ebx, %ebx
+; CHECK: xorl %ecx, %ecx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+   %r = load atomic i128* %p seq_cst, align 16
+   ret i128 %r
+}
+
+define i128 @atomic_load_relaxed(i128* %p) {
+; CHECK: atomic_load_relaxed:
+; CHECK: xorl %eax, %eax
+; CHECK: xorl %edx, %edx
+; CHECK: xorl %ebx, %ebx
+; CHECK: xorl %ecx, %ecx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+   %r = load atomic i128* %p monotonic, align 16
+   ret i128 %r
+}
+
+define void @atomic_store_seq_cst(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_seq_cst:
+; CHECK:         movq %rdx, %rcx
+; CHECK:         movq %rsi, %rbx
+; CHECK:         movq (%rdi), %rax
+; CHECK:         movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+; CHECK-NOT:     callq ___sync_lock_test_and_set_16
+
+   store atomic i128 %in, i128* %p seq_cst, align 16
+   ret void
+}
+
+define void @atomic_store_release(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_release:
+; CHECK:         movq %rdx, %rcx
+; CHECK:         movq %rsi, %rbx
+; CHECK:         movq (%rdi), %rax
+; CHECK:         movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+   store atomic i128 %in, i128* %p release, align 16
+   ret void
+}
+
+define void @atomic_store_relaxed(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_relaxed:
+; CHECK:         movq %rdx, %rcx
+; CHECK:         movq %rsi, %rbx
+; CHECK:         movq (%rdi), %rax
+; CHECK:         movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK:         lock
+; CHECK:         cmpxchg16b (%rdi)
+; CHECK:         jne [[LOOP]]
+
+   store atomic i128 %in, i128* %p unordered, align 16
+   ret void
+}
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
index 45d3ff4..faaa4c4 100644
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@@ -4,8 +4,8 @@
 @sc16 = external global i16
 
 define void @atomic_fetch_add16() nounwind {
-; X64:   atomic_fetch_add16
-; X32:   atomic_fetch_add16
+; X64-LABEL:   atomic_fetch_add16
+; X32-LABEL:   atomic_fetch_add16
 entry:
 ; 32-bit
   %t1 = atomicrmw add  i16* @sc16, i16 1 acquire
@@ -34,8 +34,8 @@ entry:
 }
 
 define void @atomic_fetch_sub16() nounwind {
-; X64:   atomic_fetch_sub16
-; X32:   atomic_fetch_sub16
+; X64-LABEL:   atomic_fetch_sub16
+; X32-LABEL:   atomic_fetch_sub16
   %t1 = atomicrmw sub  i16* @sc16, i16 1 acquire
 ; X64:       lock
 ; X64:       decw
@@ -62,18 +62,18 @@ define void @atomic_fetch_sub16() nounwind {
 }
 
 define void @atomic_fetch_and16() nounwind {
-; X64:   atomic_fetch_and16
-; X32:   atomic_fetch_and16
+; X64-LABEL:   atomic_fetch_and16
+; X32-LABEL:   atomic_fetch_and16
   %t1 = atomicrmw and  i16* @sc16, i16 3 acquire
 ; X64:       lock
 ; X64:       andw $3, {{.*}} # encoding: [0xf0,0x66
 ; X32:       lock
 ; X32:       andw $3
   %t2 = atomicrmw and  i16* @sc16, i16 5 acquire
-; X64:       andw
+; X64:       andl
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       andw
+; X32:       andl
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw and  i16* @sc16, i16 %t2 acquire
@@ -87,18 +87,18 @@ define void @atomic_fetch_and16() nounwind {
 }
 
 define void @atomic_fetch_or16() nounwind {
-; X64:   atomic_fetch_or16
-; X32:   atomic_fetch_or16
+; X64-LABEL:   atomic_fetch_or16
+; X32-LABEL:   atomic_fetch_or16
   %t1 = atomicrmw or   i16* @sc16, i16 3 acquire
 ; X64:       lock
 ; X64:       orw $3, {{.*}} # encoding: [0xf0,0x66
 ; X32:       lock
 ; X32:       orw $3
   %t2 = atomicrmw or   i16* @sc16, i16 5 acquire
-; X64:       orw
+; X64:       orl
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       orw
+; X32:       orl
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw or   i16* @sc16, i16 %t2 acquire
@@ -112,18 +112,18 @@ define void @atomic_fetch_or16() nounwind {
 }
 
 define void @atomic_fetch_xor16() nounwind {
-; X64:   atomic_fetch_xor16
-; X32:   atomic_fetch_xor16
+; X64-LABEL:   atomic_fetch_xor16
+; X32-LABEL:   atomic_fetch_xor16
   %t1 = atomicrmw xor  i16* @sc16, i16 3 acquire
 ; X64:       lock
 ; X64:       xorw $3, {{.*}} # encoding: [0xf0,0x66
 ; X32:       lock
 ; X32:       xorw $3
   %t2 = atomicrmw xor  i16* @sc16, i16 5 acquire
-; X64:       xorw
+; X64:       xorl
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       xorw
+; X32:       xorl
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw xor  i16* @sc16, i16 %t2 acquire
@@ -137,15 +137,15 @@ define void @atomic_fetch_xor16() nounwind {
 }
 
 define void @atomic_fetch_nand16(i16 %x) nounwind {
-; X64:   atomic_fetch_nand16
-; X32:   atomic_fetch_nand16
+; X64-LABEL:   atomic_fetch_nand16
+; X32-LABEL:   atomic_fetch_nand16
   %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
-; X64:       andw
-; X64:       notw
+; X64:       andl
+; X64:       notl
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       andw
-; X32:       notw
+; X32:       andl
+; X32:       notl
 ; X32:       lock
 ; X32:       cmpxchgw
   ret void
@@ -155,12 +155,16 @@ define void @atomic_fetch_nand16(i16 %x) nounwind {
 
 define void @atomic_fetch_max16(i16 %x) nounwind {
   %t1 = atomicrmw max  i16* @sc16, i16 %x acquire
-; X64:       cmpw
+; X64:       movswl
+; X64:       movswl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgw
 
-; X32:       cmpw
+; X32:       movswl
+; X32:       movswl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgw
@@ -171,12 +175,16 @@ define void @atomic_fetch_max16(i16 %x) nounwind {
 
 define void @atomic_fetch_min16(i16 %x) nounwind {
   %t1 = atomicrmw min  i16* @sc16, i16 %x acquire
-; X64:       cmpw
+; X64:       movswl
+; X64:       movswl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgw
 
-; X32:       cmpw
+; X32:       movswl
+; X32:       movswl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgw
@@ -187,12 +195,16 @@ define void @atomic_fetch_min16(i16 %x) nounwind {
 
 define void @atomic_fetch_umax16(i16 %x) nounwind {
   %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
-; X64:       cmpw
+; X64:       movzwl
+; X64:       movzwl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgw
 
-; X32:       cmpw
+; X32:       movzwl
+; X32:       movzwl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgw
@@ -203,11 +215,16 @@ define void @atomic_fetch_umax16(i16 %x) nounwind {
 
 define void @atomic_fetch_umin16(i16 %x) nounwind {
   %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
-; X64:       cmpw
+; X64:       movzwl
+; X64:       movzwl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       cmpw
+
+; X32:       movzwl
+; X32:       movzwl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgw
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index 474c0e6..4f2cbe0 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -5,8 +5,8 @@
 @sc32 = external global i32
 
 define void @atomic_fetch_add32() nounwind {
-; X64:   atomic_fetch_add32
-; X32:   atomic_fetch_add32
+; X64-LABEL:   atomic_fetch_add32:
+; X32-LABEL:   atomic_fetch_add32:
 entry:
 ; 32-bit
   %t1 = atomicrmw add  i32* @sc32, i32 1 acquire
@@ -35,8 +35,8 @@ entry:
 }
 
 define void @atomic_fetch_sub32() nounwind {
-; X64:   atomic_fetch_sub32
-; X32:   atomic_fetch_sub32
+; X64-LABEL:   atomic_fetch_sub32:
+; X32-LABEL:   atomic_fetch_sub32:
   %t1 = atomicrmw sub  i32* @sc32, i32 1 acquire
 ; X64:       lock
 ; X64:       decl
@@ -63,8 +63,8 @@ define void @atomic_fetch_sub32() nounwind {
 }
 
 define void @atomic_fetch_and32() nounwind {
-; X64:   atomic_fetch_and32
-; X32:   atomic_fetch_and32
+; X64-LABEL:   atomic_fetch_and32:
+; X32-LABEL:   atomic_fetch_and32:
   %t1 = atomicrmw and  i32* @sc32, i32 3 acquire
 ; X64:       lock
 ; X64:       andl $3
@@ -88,8 +88,8 @@ define void @atomic_fetch_and32() nounwind {
 }
 
 define void @atomic_fetch_or32() nounwind {
-; X64:   atomic_fetch_or32
-; X32:   atomic_fetch_or32
+; X64-LABEL:   atomic_fetch_or32:
+; X32-LABEL:   atomic_fetch_or32:
   %t1 = atomicrmw or   i32* @sc32, i32 3 acquire
 ; X64:       lock
 ; X64:       orl $3
@@ -113,8 +113,8 @@ define void @atomic_fetch_or32() nounwind {
 }
 
 define void @atomic_fetch_xor32() nounwind {
-; X64:   atomic_fetch_xor32
-; X32:   atomic_fetch_xor32
+; X64-LABEL:   atomic_fetch_xor32:
+; X32-LABEL:   atomic_fetch_xor32:
   %t1 = atomicrmw xor  i32* @sc32, i32 3 acquire
 ; X64:       lock
 ; X64:       xorl $3
@@ -138,8 +138,8 @@ define void @atomic_fetch_xor32() nounwind {
 }
 
 define void @atomic_fetch_nand32(i32 %x) nounwind {
-; X64:   atomic_fetch_nand32
-; X32:   atomic_fetch_nand32
+; X64-LABEL:   atomic_fetch_nand32:
+; X32-LABEL:   atomic_fetch_nand32:
   %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
 ; X64:       andl
 ; X64:       notl
@@ -155,19 +155,22 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_max32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_max32:
+; X32-LABEL: atomic_fetch_max32:
+
   %t1 = atomicrmw max  i32* @sc32, i32 %x acquire
-; X64:       cmpl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
 
-; X32:       cmpl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
 
-; NOCMOV:    cmpl
-; NOCMOV:    jl
+; NOCMOV:    subl
+; NOCMOV:    jge
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -177,19 +180,23 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_min32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_min32:
+; X32-LABEL: atomic_fetch_min32:
+; NOCMOV-LABEL: atomic_fetch_min32:
+
   %t1 = atomicrmw min  i32* @sc32, i32 %x acquire
-; X64:       cmpl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
 
-; X32:       cmpl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
 
-; NOCMOV:    cmpl
-; NOCMOV:    jg
+; NOCMOV:    subl
+; NOCMOV:    jle
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -199,19 +206,23 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_umax32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_umax32:
+; X32-LABEL: atomic_fetch_umax32:
+; NOCMOV-LABEL: atomic_fetch_umax32:
+
   %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
-; X64:       cmpl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
 
-; X32:       cmpl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
 
-; NOCMOV:    cmpl
-; NOCMOV:    jb
+; NOCMOV:    subl
+; NOCMOV:    ja
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -221,19 +232,23 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_umin32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_umin32:
+; X32-LABEL: atomic_fetch_umin32:
+; NOCMOV-LABEL: atomic_fetch_umin32:
+
   %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
-; X64:       cmpl
+; X64:       subl
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
 
-; X32:       cmpl
+; X32:       subl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
 
-; NOCMOV:    cmpl
-; NOCMOV:    ja
+; NOCMOV:    subl
+; NOCMOV:    jb
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -243,6 +258,9 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg32() nounwind {
+; X64-LABEL: atomic_fetch_cmpxchg32:
+; X32-LABEL: atomic_fetch_cmpxchg32:
+
   %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgl
@@ -254,6 +272,9 @@ define void @atomic_fetch_cmpxchg32() nounwind {
 }
 
 define void @atomic_fetch_store32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_store32:
+; X32-LABEL: atomic_fetch_store32:
+
   store atomic i32 %x, i32* @sc32 release, align 4
 ; X64-NOT:   lock
 ; X64:       movl
@@ -265,6 +286,9 @@ define void @atomic_fetch_store32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_swap32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_swap32:
+; X32-LABEL: atomic_fetch_swap32:
+
   %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
 ; X64-NOT:   lock
 ; X64:       xchgl
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 4f55edc..11b4e68 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -3,7 +3,8 @@
 @sc64 = external global i64
 
 define void @atomic_fetch_add64() nounwind {
-; X64:   atomic_fetch_add64
+; X64-LABEL:   atomic_fetch_add64:
+; X32-LABEL:   atomic_fetch_add64:
 entry:
   %t1 = atomicrmw add  i64* @sc64, i64 1 acquire
 ; X64:       lock
@@ -22,7 +23,8 @@ entry:
 }
 
 define void @atomic_fetch_sub64() nounwind {
-; X64:   atomic_fetch_sub64
+; X64-LABEL:   atomic_fetch_sub64:
+; X32-LABEL:   atomic_fetch_sub64:
   %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
 ; X64:       lock
 ; X64:       decq
@@ -40,7 +42,8 @@ define void @atomic_fetch_sub64() nounwind {
 }
 
 define void @atomic_fetch_and64() nounwind {
-; X64:   atomic_fetch_and64
+; X64-LABEL:   atomic_fetch_and64:
+; X32-LABEL:   atomic_fetch_and64:
   %t1 = atomicrmw and  i64* @sc64, i64 3 acquire
 ; X64:       lock
 ; X64:       andq $3
@@ -56,7 +59,8 @@ define void @atomic_fetch_and64() nounwind {
 }
 
 define void @atomic_fetch_or64() nounwind {
-; X64:   atomic_fetch_or64
+; X64-LABEL:   atomic_fetch_or64:
+; X32-LABEL:   atomic_fetch_or64:
   %t1 = atomicrmw or   i64* @sc64, i64 3 acquire
 ; X64:       lock
 ; X64:       orq $3
@@ -72,7 +76,8 @@ define void @atomic_fetch_or64() nounwind {
 }
 
 define void @atomic_fetch_xor64() nounwind {
-; X64:   atomic_fetch_xor64
+; X64-LABEL:   atomic_fetch_xor64:
+; X32-LABEL:   atomic_fetch_xor64:
   %t1 = atomicrmw xor  i64* @sc64, i64 3 acquire
 ; X64:       lock
 ; X64:       xorq $3
@@ -88,8 +93,8 @@ define void @atomic_fetch_xor64() nounwind {
 }
 
 define void @atomic_fetch_nand64(i64 %x) nounwind {
-; X64:   atomic_fetch_nand64
-; X32:   atomic_fetch_nand64
+; X64-LABEL:   atomic_fetch_nand64:
+; X32-LABEL:   atomic_fetch_nand64:
   %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
 ; X64:       andq
 ; X64:       notq
@@ -107,8 +112,10 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_max64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_max64:
+; X32-LABEL:   atomic_fetch_max64:
   %t1 = atomicrmw max  i64* @sc64, i64 %x acquire
-; X64:       cmpq
+; X64:       subq
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgq
@@ -126,8 +133,10 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_min64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_min64:
+; X32-LABEL:   atomic_fetch_min64:
   %t1 = atomicrmw min  i64* @sc64, i64 %x acquire
-; X64:       cmpq
+; X64:       subq
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgq
@@ -145,8 +154,10 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_umax64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umax64:
+; X32-LABEL:   atomic_fetch_umax64:
   %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
-; X64:       cmpq
+; X64:       subq
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgq
@@ -164,8 +175,10 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_umin64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umin64:
+; X32-LABEL:   atomic_fetch_umin64:
   %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
-; X64:       cmpq
+; X64:       subq
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgq
@@ -183,6 +196,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg64() nounwind {
+; X64-LABEL:   atomic_fetch_cmpxchg64:
+; X32-LABEL:   atomic_fetch_cmpxchg64:
   %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgq
@@ -194,6 +209,8 @@ define void @atomic_fetch_cmpxchg64() nounwind {
 }
 
 define void @atomic_fetch_store64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_store64:
+; X32-LABEL:   atomic_fetch_store64:
   store atomic i64 %x, i64* @sc64 release, align 8
 ; X64-NOT:   lock
 ; X64:       movq
@@ -205,6 +222,8 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_swap64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_swap64:
+; X32-LABEL:   atomic_fetch_swap64:
   %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
 ; X64-NOT:   lock
 ; X64:       xchgq
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index c0f7267..1c4b0f4 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -3,7 +3,8 @@
 @sc64 = external global i64
 
 define void @atomic_fetch_add64() nounwind {
-; X32:   atomic_fetch_add64
+; X64-LABEL:   atomic_fetch_add64:
+; X32-LABEL:   atomic_fetch_add64:
 entry:
   %t1 = atomicrmw add  i64* @sc64, i64 1 acquire
 ; X32:       addl
@@ -30,20 +31,21 @@ entry:
 }
 
 define void @atomic_fetch_sub64() nounwind {
-; X32:   atomic_fetch_sub64
+; X64-LABEL:   atomic_fetch_sub64:
+; X32-LABEL:   atomic_fetch_sub64:
   %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
-; X32:       subl
-; X32:       sbbl
+; X32:       addl $-1
+; X32:       adcl $-1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t2 = atomicrmw sub  i64* @sc64, i64 3 acquire
-; X32:       subl
-; X32:       sbbl
+; X32:       addl $-3
+; X32:       adcl $-1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t3 = atomicrmw sub  i64* @sc64, i64 5 acquire
-; X32:       subl
-; X32:       sbbl
+; X32:       addl $-5
+; X32:       adcl $-1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t4 = atomicrmw sub  i64* @sc64, i64 %t3 acquire
@@ -56,15 +58,16 @@ define void @atomic_fetch_sub64() nounwind {
 }
 
 define void @atomic_fetch_and64() nounwind {
-; X32:   atomic_fetch_and64
+; X64-LABEL:   atomic_fetch_and:64
+; X32-LABEL:   atomic_fetch_and64:
   %t1 = atomicrmw and  i64* @sc64, i64 3 acquire
-; X32:       andl
-; X32:       andl
+; X32:       andl $3
+; X32-NOT:       andl
 ; X32:       lock
 ; X32:       cmpxchg8b
-  %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
-; X32:       andl
-; X32:       andl
+  %t2 = atomicrmw and  i64* @sc64, i64 4294967297 acquire
+; X32:       andl $1
+; X32:       andl $1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
@@ -77,15 +80,16 @@ define void @atomic_fetch_and64() nounwind {
 }
 
 define void @atomic_fetch_or64() nounwind {
-; X32:   atomic_fetch_or64
+; X64-LABEL:   atomic_fetch_or64:
+; X32-LABEL:   atomic_fetch_or64:
   %t1 = atomicrmw or   i64* @sc64, i64 3 acquire
-; X32:       orl
-; X32:       orl
+; X32:       orl $3
+; X32-NOT:       orl
 ; X32:       lock
 ; X32:       cmpxchg8b
-  %t2 = atomicrmw or   i64* @sc64, i64 5 acquire
-; X32:       orl
-; X32:       orl
+  %t2 = atomicrmw or   i64* @sc64, i64 4294967297 acquire
+; X32:       orl $1
+; X32:       orl $1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t3 = atomicrmw or   i64* @sc64, i64 %t2 acquire
@@ -98,15 +102,16 @@ define void @atomic_fetch_or64() nounwind {
 }
 
 define void @atomic_fetch_xor64() nounwind {
-; X32:   atomic_fetch_xor64
+; X64-LABEL:   atomic_fetch_xor:64
+; X32-LABEL:   atomic_fetch_xor64:
   %t1 = atomicrmw xor  i64* @sc64, i64 3 acquire
 ; X32:       xorl
-; X32:       xorl
+; X32-NOT:       xorl
 ; X32:       lock
 ; X32:       cmpxchg8b
-  %t2 = atomicrmw xor  i64* @sc64, i64 5 acquire
-; X32:       xorl
-; X32:       xorl
+  %t2 = atomicrmw xor  i64* @sc64, i64 4294967297 acquire
+; X32:       xorl $1
+; X32:       xorl $1
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t3 = atomicrmw xor  i64* @sc64, i64 %t2 acquire
@@ -119,7 +124,8 @@ define void @atomic_fetch_xor64() nounwind {
 }
 
 define void @atomic_fetch_nand64(i64 %x) nounwind {
-; X32:   atomic_fetch_nand64
+; X64-LABEL:   atomic_fetch_nand64:
+; X32-LABEL:   atomic_fetch_nand64:
   %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
 ; X32:       andl
 ; X32:       andl
@@ -132,10 +138,11 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_max64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_max:64
+; X32-LABEL:   atomic_fetch_max64:
   %t1 = atomicrmw max  i64* @sc64, i64 %x acquire
-; X32:       cmpl
-; X32:       cmpl
-; X32:       cmov
+; X32:       subl
+; X32:       subl
 ; X32:       cmov
 ; X32:       cmov
 ; X32:       lock
@@ -145,10 +152,11 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_min64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_min64:
+; X32-LABEL:   atomic_fetch_min64:
   %t1 = atomicrmw min  i64* @sc64, i64 %x acquire
-; X32:       cmpl
-; X32:       cmpl
-; X32:       cmov
+; X32:       subl
+; X32:       subl
 ; X32:       cmov
 ; X32:       cmov
 ; X32:       lock
@@ -158,10 +166,11 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_umax64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umax:64
+; X32-LABEL:   atomic_fetch_umax64:
   %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
-; X32:       cmpl
-; X32:       cmpl
-; X32:       cmov
+; X32:       subl
+; X32:       subl
 ; X32:       cmov
 ; X32:       cmov
 ; X32:       lock
@@ -171,10 +180,11 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_umin64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umin64:
+; X32-LABEL:   atomic_fetch_umin64:
   %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
-; X32:       cmpl
-; X32:       cmpl
-; X32:       cmov
+; X32:       subl
+; X32:       subl
 ; X32:       cmov
 ; X32:       cmov
 ; X32:       lock
@@ -184,6 +194,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg64() nounwind {
+; X64-LABEL:   atomic_fetch_cmpxchg:64
+; X32-LABEL:   atomic_fetch_cmpxchg64:
   %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
 ; X32:       lock
 ; X32:       cmpxchg8b
@@ -192,6 +204,8 @@ define void @atomic_fetch_cmpxchg64() nounwind {
 }
 
 define void @atomic_fetch_store64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_store64:
+; X32-LABEL:   atomic_fetch_store64:
   store atomic i64 %x, i64* @sc64 release, align 8
 ; X32:       lock
 ; X32:       cmpxchg8b
@@ -200,6 +214,8 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_swap64(i64 %x) nounwind {
+; X64-LABEL:   atomic_fetch_swap64:
+; X32-LABEL:   atomic_fetch_swap64:
   %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
 ; X32:       lock
 ; X32:       xchg8b
diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll
index 203b26f..5eef9b2 100644
--- a/test/CodeGen/X86/atomic8.ll
+++ b/test/CodeGen/X86/atomic8.ll
@@ -4,8 +4,8 @@
 @sc8 = external global i8
 
 define void @atomic_fetch_add8() nounwind {
-; X64:   atomic_fetch_add8
-; X32:   atomic_fetch_add8
+; X64-LABEL:   atomic_fetch_add8:
+; X32-LABEL:   atomic_fetch_add8:
 entry:
 ; 32-bit
   %t1 = atomicrmw add  i8* @sc8, i8 1 acquire
@@ -34,8 +34,8 @@ entry:
 }
 
 define void @atomic_fetch_sub8() nounwind {
-; X64:   atomic_fetch_sub8
-; X32:   atomic_fetch_sub8
+; X64-LABEL:   atomic_fetch_sub8:
+; X32-LABEL:   atomic_fetch_sub8:
   %t1 = atomicrmw sub  i8* @sc8, i8 1 acquire
 ; X64:       lock
 ; X64:       decb
@@ -62,8 +62,8 @@ define void @atomic_fetch_sub8() nounwind {
 }
 
 define void @atomic_fetch_and8() nounwind {
-; X64:   atomic_fetch_and8
-; X32:   atomic_fetch_and8
+; X64-LABEL:   atomic_fetch_and8:
+; X32-LABEL:   atomic_fetch_and8:
   %t1 = atomicrmw and  i8* @sc8, i8 3 acquire
 ; X64:       lock
 ; X64:       andb $3
@@ -87,8 +87,8 @@ define void @atomic_fetch_and8() nounwind {
 }
 
 define void @atomic_fetch_or8() nounwind {
-; X64:   atomic_fetch_or8
-; X32:   atomic_fetch_or8
+; X64-LABEL:   atomic_fetch_or8:
+; X32-LABEL:   atomic_fetch_or8:
   %t1 = atomicrmw or   i8* @sc8, i8 3 acquire
 ; X64:       lock
 ; X64:       orb $3
@@ -112,8 +112,8 @@ define void @atomic_fetch_or8() nounwind {
 }
 
 define void @atomic_fetch_xor8() nounwind {
-; X64:   atomic_fetch_xor8
-; X32:   atomic_fetch_xor8
+; X64-LABEL:   atomic_fetch_xor8:
+; X32-LABEL:   atomic_fetch_xor8:
   %t1 = atomicrmw xor  i8* @sc8, i8 3 acquire
 ; X64:       lock
 ; X64:       xorb $3
@@ -137,8 +137,8 @@ define void @atomic_fetch_xor8() nounwind {
 }
 
 define void @atomic_fetch_nand8(i8 %x) nounwind {
-; X64:   atomic_fetch_nand8
-; X32:   atomic_fetch_nand8
+; X64-LABEL:   atomic_fetch_nand8:
+; X32-LABEL:   atomic_fetch_nand8:
   %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
 ; X64:       andb
 ; X64:       notb
@@ -154,14 +154,18 @@ define void @atomic_fetch_nand8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_max8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_max8:
+; X32-LABEL:   atomic_fetch_max8:
   %t1 = atomicrmw max  i8* @sc8, i8 %x acquire
-; X64:       cmpb
-; X64:       cmov
+; X64:       movsbl
+; X64:       movsbl
+; X64:       subl
 ; X64:       lock
 ; X64:       cmpxchgb
 
-; X32:       cmpb
-; X32:       cmov
+; X32:       movsbl
+; X32:       movsbl
+; X32:       subl
 ; X32:       lock
 ; X32:       cmpxchgb
   ret void
@@ -170,14 +174,18 @@ define void @atomic_fetch_max8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_min8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_min8:
+; X32-LABEL:   atomic_fetch_min8:
   %t1 = atomicrmw min  i8* @sc8, i8 %x acquire
-; X64:       cmpb
-; X64:       cmov
+; X64:       movsbl
+; X64:       movsbl
+; X64:       subl
 ; X64:       lock
 ; X64:       cmpxchgb
 
-; X32:       cmpb
-; X32:       cmov
+; X32:       movsbl
+; X32:       movsbl
+; X32:       subl
 ; X32:       lock
 ; X32:       cmpxchgb
   ret void
@@ -186,14 +194,18 @@ define void @atomic_fetch_min8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_umax8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umax8:
+; X32-LABEL:   atomic_fetch_umax8:
   %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
-; X64:       cmpb
-; X64:       cmov
+; X64:       movzbl
+; X64:       movzbl
+; X64:       subl
 ; X64:       lock
 ; X64:       cmpxchgb
 
-; X32:       cmpb
-; X32:       cmov
+; X32:       movzbl
+; X32:       movzbl
+; X32:       subl
 ; X32:       lock
 ; X32:       cmpxchgb
   ret void
@@ -202,13 +214,18 @@ define void @atomic_fetch_umax8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_umin8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_umin8:
+; X32-LABEL:   atomic_fetch_umin8:
   %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
-; X64:       cmpb
-; X64:       cmov
+; X64:       movzbl
+; X64:       movzbl
+; X64:       subl
 ; X64:       lock
 ; X64:       cmpxchgb
-; X32:       cmpb
-; X32:       cmov
+
+; X32:       movzbl
+; X32:       movzbl
+; X32:       subl
 ; X32:       lock
 ; X32:       cmpxchgb
   ret void
@@ -217,6 +234,8 @@ define void @atomic_fetch_umin8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg8() nounwind {
+; X64-LABEL:   atomic_fetch_cmpxchg8:
+; X32-LABEL:   atomic_fetch_cmpxchg8:
   %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgb
@@ -228,6 +247,8 @@ define void @atomic_fetch_cmpxchg8() nounwind {
 }
 
 define void @atomic_fetch_store8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_store8:
+; X32-LABEL:   atomic_fetch_store8:
   store atomic i8 %x, i8* @sc8 release, align 4
 ; X64-NOT:   lock
 ; X64:       movb
@@ -239,6 +260,8 @@ define void @atomic_fetch_store8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_swap8(i8 %x) nounwind {
+; X64-LABEL:   atomic_fetch_swap8:
+; X32-LABEL:   atomic_fetch_swap8:
   %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
 ; X64-NOT:   lock
 ; X64:       xchgb
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index b3045ed..d0ab28a 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov,cx16 -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -101,26 +101,28 @@ entry:
 	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
         ; CHECK: lock
         ; CHECK: cmpxchgl
-  %16 = cmpxchg i32* %val2, i32 %neg1, i32 1 monotonic monotonic
+  %pair16 = cmpxchg i32* %val2, i32 %neg1, i32 1 monotonic monotonic
+  %16 = extractvalue { i32, i1 } %pair16, 0
 	store i32 %16, i32* %old
         ; CHECK: lock
         ; CHECK: cmpxchgl
-  %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic monotonic
+  %pair17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic monotonic
+  %17 = extractvalue { i32, i1 } %pair17, 0
 	store i32 %17, i32* %old
         ; CHECK: movl  [[R17atomic:.*]], %eax
-        ; CHECK: movl	$1401, %[[R17mask:[a-z]*]]
-        ; CHECK: andl	%eax, %[[R17mask]]
-        ; CHECK: notl	%[[R17mask]]
+        ; CHECK: movl %eax, %[[R17mask:[a-z]*]]
+        ; CHECK: notl %[[R17mask]]
+        ; CHECK: orl $-1402, %[[R17mask]]
         ; CHECK: lock
         ; CHECK: cmpxchgl	%[[R17mask]], [[R17atomic]]
         ; CHECK: jne
         ; CHECK: movl	%eax,
   %18 = atomicrmw nand i32* %val2, i32 1401 monotonic
   store i32 %18, i32* %old
-        ; CHECK: andl
-        ; CHECK: andl
         ; CHECK: notl
         ; CHECK: notl
+        ; CHECK: orl $252645135
+        ; CHECK: orl $252645135
         ; CHECK: lock
         ; CHECK: cmpxchg8b
   %19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic
@@ -133,6 +135,7 @@ entry:
 ; CHECK: lock
 ; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
 
-  %0 = cmpxchg i32 addrspace(256)* %P, i32 0, i32 1 monotonic monotonic
+  %pair0 = cmpxchg i32 addrspace(256)* %P, i32 0, i32 1 monotonic monotonic
+  %0 = extractvalue { i32, i1 } %pair0, 0
   ret void
 }
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index e21c7a0..d2a22d7 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -110,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
 
 ;CHECK-LABEL: vsel_double4:
 ;CHECK-NOT: vinsertf128
-;CHECK: vshufpd $10
+;CHECK: vblendpd $10
 ;CHECK-NEXT: ret
 define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
@@ -158,3 +158,45 @@ define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd)
 
 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+;; 4 tests for shufflevectors that optimize to blend + immediate
+; CHECK-LABEL: @blend_shufflevector_4xfloat
+define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
+; Equivalent select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; Inverted mask: 1010 = 10.
+; According to the ABI:
+; a is in xmm0 => first argument is xmm0.
+; b is in xmm1 => second argument is xmm1.
+; Result is in xmm0 => destination argument.
+; CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
+; CHECK: ret
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %1
+}
+
+; CHECK-LABEL: @blend_shufflevector_8xfloat
+define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
+; CHECK: vblendps $190, %ymm1, %ymm0, %ymm0
+; CHECK: ret
+  %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
+  ret <8 x float> %1
+}
+
+; CHECK-LABEL: @blend_shufflevector_4xdouble
+define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
+; CHECK: vblendpd $2, %ymm1, %ymm0, %ymm0
+; CHECK: ret
+  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x double> %1
+}
+
+; CHECK-LABEL: @blend_shufflevector_4xi64
+define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: vblendpd $13, %ymm1, %ymm0, %ymm0
+; CHECK: ret
+  %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  ret <4 x i64> %1
+}
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 7337815..3e051bf 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -7,21 +7,21 @@ declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
 declare i32 @func_int(i32, i32)
 
-; WIN64: testf16_inp
+; WIN64-LABEL: testf16_inp
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: leaq    {{.*}}(%rsp), %rcx
 ; WIN64: call
 ; WIN64: ret
 
-; X32: testf16_inp
+; X32-LABEL: testf16_inp
 ; X32: movl    %eax, (%esp)
 ; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X32: call
 ; X32: ret
 
-; X64: testf16_inp
+; X64-LABEL: testf16_inp
 ; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X64: leaq    {{.*}}(%rsp), %rdi
@@ -41,14 +41,14 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ;test calling conventions - preserved registers
 
 ; preserved ymm6-ymm15
-; WIN64: testf16_regs
+; WIN64-LABEL: testf16_regs
 ; WIN64: call
 ; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
-; X64: testf16_regs
+; X64-LABEL: testf16_regs
 ; X64: call
 ; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
@@ -65,28 +65,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 }
 
 ; test calling conventions - prolog and epilog
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64-LABEL: test_prolog_epilog
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
 ; WIN64: call
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+
+; X64-LABEL: test_prolog_epilog
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
@@ -111,12 +113,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
 
 ; test functions with integer parameters
 ; pass parameters on stack for 32-bit platform
+; X32-LABEL: test_int
 ; X32: movl {{.*}}, 4(%esp)
 ; X32: movl {{.*}}, (%esp)
 ; X32: call
 ; X32: addl {{.*}}, %eax
 
 ; pass parameters in registers for 64-bit platform
+; X64-LABEL: test_int
 ; X64: leal {{.*}}, %edi
 ; X64: movl {{.*}}, %esi
 ; X64: call
@@ -128,21 +132,21 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
 	ret i32 %c
 }
 
-; WIN64: test_float4
+; WIN64-LABEL: test_float4
 ; WIN64-NOT: vzeroupper
 ; WIN64: call
 ; WIN64-NOT: vzeroupper
 ; WIN64: call
 ; WIN64: ret
 
-; X64: test_float4
+; X64-LABEL: test_float4
 ; X64-NOT: vzeroupper
 ; X64: call
 ; X64-NOT: vzeroupper
 ; X64: call
 ; X64: ret
 
-; X32: test_float4
+; X32-LABEL: test_float4
 ; X32: vzeroupper
 ; X32: call
 ; X32: vzeroupper
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 0be83f6..ce31161 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2219,14 +2219,6 @@ define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
 
 
-define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
-  ; CHECK: vbroadcastsd
-  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
-
-
 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
   ; CHECK: vbroadcastf128
   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
@@ -2243,22 +2235,6 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
-
-
-define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
-
-
 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
   ; CHECK: vextractf128
   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index f407ba4..4a996d7 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
   ret <4 x float> %b
 ; CHECK-LABEL: test1:
-; CHECK: vshufps
-; CHECK: vpshufd
+;; TODO: This test could be improved by removing the xor instruction and
+;; having vinsertps zero out the needed elements.
+; CHECK: vxorps
+; CHECK: vinsertps
 }
 
 ; rdar://10538417
@@ -23,7 +25,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
   %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
   ret <4 x i64> %c
 ; CHECK-LABEL: test3:
-; CHECK: vperm2f128
+; CHECK: vblendpd
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 5d07815..b1b2f8b 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -43,13 +43,10 @@ entry:
   ret <4 x double> %vecinit6.i
 }
 
-; Test this simple opt:
+; Test this turns into a broadcast:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
-; To:
-;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
-; CHECK-NEXT: vpshufd $-1
-; CHECK-NEXT: vinsertf128  $1
+;   
+; CHECK: vbroadcastss
 define <8 x float> @funcE() nounwind {
 allocas:
   %udx495 = alloca [18 x [18 x float]], align 32
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index caa21e5..c20775b 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -9,7 +9,7 @@ entry:
 }
 
 ; CHECK: _B
-; CHECK: vperm2f128 $48
+; CHECK: vblendps $240
 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index 45883b7..ad3dbc1 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -32,14 +32,14 @@ entry:
   ret <8 x i32> %shuffle
 }
 
-; CHECK: vshufpd  $10, %ymm
+; CHECK: vblendpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x double> %shuffle
 }
 
-; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+; CHECK: vblendpd  $10, (%{{.*}}), %ymm
 define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <4 x double>* %a
@@ -48,14 +48,14 @@ entry:
   ret <4 x double> %shuffle
 }
 
-; CHECK: vshufpd  $10, %ymm
+; CHECK: vblendpd  $10, %ymm
 define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i64> %shuffle
 }
 
-; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+; CHECK: vblendpd  $10, (%{{.*}}), %ymm
 define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <4 x i64>* %a
@@ -71,7 +71,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: vshufpd  $2, %ymm
+; CHECK: vblendpd  $2, %ymm
 define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index 0e6dd29..185b989 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -60,6 +60,24 @@ define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline
   ret <4 x i64> %t
 }
 
+;; 2 tests for shufflevectors that optimize to blend + immediate
+; CHECK-LABEL: @blend_test5
+; CHECK: vpblendd $10, %xmm1, %xmm0, %xmm0
+; CHECK: ret
+define <4 x i32> @blend_test5(<4 x i32> %a, <4 x i32> %b) {
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %1
+}
+
+; CHECK-LABEL: @blend_test6
+; CHECK: vpblendw $134, %ymm1, %ymm0, %ymm0
+; CHECK: ret
+define <16 x i16> @blend_test6(<16 x i16> %a, <16 x i16> %b) {
+  %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32  3, i32  4, i32  5, i32  6, i32 23,
+                                                               i32 8, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %1
+}
+
 ; CHECK: vpshufhw $27, %ymm
 define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 2476ea1..f5cda96 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -192,6 +192,14 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
   ret <16 x double> %b
 }
 
+; CHECK-LABEL: uitof64_256
+; CHECK: vcvtudq2pd
+; CHECK: ret
+define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
+  %b = uitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %b
+}
+
 ; CHECK-LABEL: uitof32
 ; CHECK: vcvtudq2ps
 ; CHECK: ret
diff --git a/test/CodeGen/X86/avx512-inc-dec.ll b/test/CodeGen/X86/avx512-inc-dec.ll
new file mode 100644
index 0000000..f04ca87
--- /dev/null
+++ b/test/CodeGen/X86/avx512-inc-dec.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+;CHECK-LABEL: test
+;CHECK-NOT: dec
+;CHECK_NOT: enc
+;CHECK: ret
+define i32 @test(i32 %a, i32 %b) {
+ %a1 = add i32 %a, -1
+ %b1 = add i32 %b, 1
+ %res = mul i32 %a1, %b1
+ ret i32 %res
+}
+
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index e19841a..18cfcfe 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -311,7 +311,6 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
 
 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
 
-
 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
   ; CHECK: vpconflictd 
   %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
@@ -324,6 +323,57 @@ define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
   ret <8 x i64> %res
 }
 
+define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
+  ; CHECK: movw $-1, %ax
+  ; CHECK: vpxor
+  ; CHECK: vplzcntd
+  %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
+
+define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
+  ; CHECK: movb $-1, %al
+  ; CHECK: vpxor
+  ; CHECK: vplzcntq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
+
+
+define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+  ; CHECK: vplzcntd
+  %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+  ; CHECK: vplzcntq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
+  ; CHECK-LABEL: test_ctlz_d
+  ; CHECK: vplzcntd
+  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
+
+define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
+  ; CHECK-LABEL: test_ctlz_q
+  ; CHECK: vplzcntq
+  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
+
 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK: vblendmps
   %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
@@ -544,4 +594,20 @@ define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%p
   ret <16 x float> %res
 }
 
+define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
+; CHECK-LABEL: test_vpermt2ps_mask:
+; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
+  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
+  ret <16 x float> %res
+}
+
 declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
+
+define <8 x i64> @test_vmovntdqa(i8 *%x) {
+; CHECK-LABEL: test_vmovntdqa:
+; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
+  %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll
new file mode 100644
index 0000000..ef50cdb
--- /dev/null
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+; CHECK: vmovntps %z
+  %cast = bitcast i8* %B to <16 x float>*
+  %A2 = fadd <16 x float> %A, %AA
+  store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast1 = bitcast i8* %B to <8 x i64>*
+  %E2 = add <8 x i64> %E, %EE
+  store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %z
+  %cast2 = bitcast i8* %B to <8 x double>*
+  %C2 = fadd <8 x double> %C, %CC
+  store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
index 23ddc3a..b99e89a 100644
--- a/test/CodeGen/X86/avx512-shuffle.ll
+++ b/test/CodeGen/X86/avx512-shuffle.ll
@@ -56,6 +56,16 @@ define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
   ret <8 x double> %c
 }
 
+; The reg variant of vpermt2 with a writemask
+; CHECK-LABEL: test5m:
+; CHECK: vpermt2pd {{.* {%k[1-7]} {z}}}
+define <8 x double> @test5m(<8 x double> %a, <8 x double> %b, i8 %mask) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  %m = bitcast i8 %mask to <8 x i1>
+  %res = select <8 x i1> %m, <8 x double> %c, <8 x double> zeroinitializer
+  ret <8 x double> %res
+}
+
 ; CHECK-LABEL: test6:
 ; CHECK: vpermq $30
 ; CHECK: ret
@@ -72,6 +82,27 @@ define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
   ret <8 x i64> %c
 }
 
+; The reg variant of vpermt2 with a writemask
+; CHECK-LABEL: test7m:
+; CHECK: vpermt2q {{.* {%k[1-7]} {z}}}
+define <8 x i64> @test7m(<8 x i64> %a, <8 x i64> %b, i8 %mask) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  %m = bitcast i8 %mask to <8 x i1>
+  %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
+  ret <8 x i64> %res
+}
+
+; The mem variant of vpermt2 with a writemask
+; CHECK-LABEL: test7mm:
+; CHECK: vpermt2q {{\(.*\).* {%k[1-7]} {z}}}
+define <8 x i64> @test7mm(<8 x i64> %a, <8 x i64> *%pb, i8 %mask) nounwind {
+  %b = load <8 x i64>* %pb
+  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  %m = bitcast i8 %mask to <8 x i1>
+  %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
+  ret <8 x i64> %res
+}
+
 ; CHECK-LABEL: test8:
 ; CHECK: vpermt2d
 ; CHECK: ret
@@ -80,6 +111,27 @@ define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
   ret <16 x i32> %c
 }
 
+; The reg variant of vpermt2 with a writemask
+; CHECK-LABEL: test8m:
+; CHECK: vpermt2d {{.* {%k[1-7]} {z}}}
+define <16 x i32> @test8m(<16 x i32> %a, <16 x i32> %b, i16 %mask) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  %m = bitcast i16 %mask to <16 x i1>
+  %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
+  ret <16 x i32> %res
+}
+
+; The mem variant of vpermt2 with a writemask
+; CHECK-LABEL: test8mm:
+; CHECK: vpermt2d {{\(.*\).* {%k[1-7]} {z}}}
+define <16 x i32> @test8mm(<16 x i32> %a, <16 x i32> *%pb, i16 %mask) nounwind {
+  %b = load <16 x i32> * %pb
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  %m = bitcast i16 %mask to <16 x i1>
+  %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
+  ret <16 x i32> %res
+}
+
 ; CHECK-LABEL: test9:
 ; CHECK: vpermt2ps
 ; CHECK: ret
@@ -88,6 +140,16 @@ define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
   ret <16 x float> %c
 }
 
+; The reg variant of vpermt2 with a writemask
+; CHECK-LABEL: test9m:
+; CHECK: vpermt2ps {{.*}} {%k{{.}}} {z}
+define <16 x float> @test9m(<16 x float> %a, <16 x float> %b, i16 %mask) nounwind {
+  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  %m = bitcast i16 %mask to <16 x i1>
+  %res = select <16 x i1> %m, <16 x float> %c, <16 x float> zeroinitializer
+  ret <16 x float> %res
+}
+
 ; CHECK-LABEL: test10:
 ; CHECK: vpermt2ps (
 ; CHECK: ret
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
index 3c931db..9dc960d 100644
--- a/test/CodeGen/X86/bswap-vector.ll
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3
 ; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3
 ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
+; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s -check-prefix=CHECK-WIDE-AVX2
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -31,6 +32,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test1
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test1
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 define <4 x i32> @test2(<4 x i32> %v) #0 {
@@ -52,6 +57,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test2
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test2
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 define <2 x i64> @test3(<2 x i64> %v) #0 {
@@ -71,6 +80,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test3
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test3
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
@@ -90,6 +103,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test4
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test4
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 define <8 x i32> @test5(<8 x i32> %v) #0 {
@@ -105,6 +122,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test5
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test5
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 define <4 x i64> @test6(<4 x i64> %v) #0 {
@@ -120,6 +141,10 @@ entry:
 ; CHECK-AVX2-LABEL: @test6
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test6
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
@@ -138,6 +163,10 @@ entry:
 ; CHECK-AVX2: vpshufb
 ; CHECK-AVX2: vpsrld $16
 ; CHECK-AVX2-NEXT: retq
+
+; CHECK-WIDE-AVX2-LABEL: @test7
+; CHECK-WIDE-AVX2: vpshufb
+; CHECK-WIDE-AVX2-NEXT: retq
 }
 
 attributes #0 = { nounwind uwtable }
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index cdcdc96..149d537 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -198,3 +198,16 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) #0 {
 ; CHECK: 	shrl	$7, %edi
 ; CHECK-NEXT: 	cmovnsl	%edx, %esi
 }
+
+; PR19964
+define zeroext i1 @test15(i32 %bf.load, i32 %n) {
+  %bf.lshr = lshr i32 %bf.load, 16
+  %cmp2 = icmp eq i32 %bf.lshr, 0
+  %cmp5 = icmp uge i32 %bf.lshr, %n
+  %.cmp5 = or i1 %cmp2, %cmp5
+  ret i1 %.cmp5
+
+; CHECK-LABEL: test15:
+; CHECK:  shrl	$16, %edi
+; CHECK:  cmpl	%esi, %edi
+}
diff --git a/test/CodeGen/X86/cmpxchg-i1.ll b/test/CodeGen/X86/cmpxchg-i1.ll
new file mode 100644
index 0000000..a21ab59
--- /dev/null
+++ b/test/CodeGen/X86/cmpxchg-i1.ll
@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=x86_64 -o - %s | FileCheck %s
+
+define i1 @try_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: try_cmpxchg:
+; CHECK: cmpxchgl
+; CHECK-NOT: cmp
+; CHECK: sete %al
+; CHECK: retq
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  ret i1 %success
+}
+
+define void @cmpxchg_flow(i64* %addr, i64 %desired, i64 %new) {
+; CHECK-LABEL: cmpxchg_flow:
+; CHECK: cmpxchgq
+; CHECK-NOT: cmp
+; CHECK-NOT: set
+; CHECK: {{jne|jeq}}
+  %pair = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst seq_cst
+  %success = extractvalue { i64, i1 } %pair, 1
+  br i1 %success, label %true, label %false
+
+true:
+  call void @foo()
+  ret void
+
+false:
+  call void @bar()
+  ret void
+}
+
+define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: cmpxchg_sext:
+; CHECK-DAG: cmpxchgl
+; CHECK-NOT: cmpl
+; CHECK: sete %al
+; CHECK: retq
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  %mask = sext i1 %success to i64
+  ret i64 %mask
+}
+
+define i32 @cmpxchg_zext(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: cmpxchg_zext:
+; CHECK: cmpxchgl
+; CHECK-NOT: cmp
+; CHECK: sete [[BYTE:%[a-z0-9]+]]
+; CHECK: movzbl [[BYTE]], %eax
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %pair, 1
+  %mask = zext i1 %success to i32
+  ret i32 %mask
+}
+
+
+define i32 @cmpxchg_use_eflags_and_val(i32* %addr, i32 %offset) {
+; CHECK-LABEL: cmpxchg_use_eflags_and_val:
+; CHECK: movl (%rdi), %e[[OLDVAL:[a-z0-9]+]]
+
+; CHECK: [[LOOPBB:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: leal (%r[[OLDVAL]],%rsi), [[NEW:%[a-z0-9]+]]
+; CHECK: cmpxchgl [[NEW]], (%rdi)
+; CHECK-NOT: cmpl
+; CHECK: jne [[LOOPBB]]
+
+  ; Result already in %eax
+; CHECK: retq
+entry:
+  %init = load atomic i32* %addr seq_cst, align 4
+  br label %loop
+
+loop:
+  %old = phi i32 [%init, %entry], [%oldval, %loop]
+  %new = add i32 %old, %offset
+  %pair = cmpxchg i32* %addr, i32 %old, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  %success = extractvalue { i32, i1 } %pair, 1
+  br i1 %success, label %done, label %loop
+
+done:
+  ret i32 %oldval
+}
+
+declare void @foo()
+declare void @bar()
diff --git a/test/CodeGen/X86/cmpxchg-i128-i1.ll b/test/CodeGen/X86/cmpxchg-i128-i1.ll
new file mode 100644
index 0000000..4dd3001
--- /dev/null
+++ b/test/CodeGen/X86/cmpxchg-i128-i1.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mcpu=core-avx2 -mtriple=x86_64 -o - %s | FileCheck %s
+
+define i1 @try_cmpxchg(i128* %addr, i128 %desired, i128 %new) {
+; CHECK-LABEL: try_cmpxchg:
+; CHECK: cmpxchg16b
+; CHECK-NOT: cmp
+; CHECK: sete %al
+; CHECK: retq
+  %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+  %success = extractvalue { i128, i1 } %pair, 1
+  ret i1 %success
+}
+
+define void @cmpxchg_flow(i128* %addr, i128 %desired, i128 %new) {
+; CHECK-LABEL: cmpxchg_flow:
+; CHECK: cmpxchg16b
+; CHECK-NOT: cmp
+; CHECK-NOT: set
+; CHECK: {{jne|jeq}}
+  %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+  %success = extractvalue { i128, i1 } %pair, 1
+  br i1 %success, label %true, label %false
+
+true:
+  call void @foo()
+  ret void
+
+false:
+  call void @bar()
+  ret void
+}
+
+; Can't use the flags here because cmpxchg16b only sets ZF.
+define i1 @cmpxchg_arithcmp(i128* %addr, i128 %desired, i128 %new) {
+; CHECK-LABEL: cmpxchg_arithcmp:
+; CHECK: cmpxchg16b
+; CHECK: cmpq
+; CHECK: retq
+  %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+  %oldval = extractvalue { i128, i1 } %pair, 0
+  %success = icmp sge i128 %oldval, %desired
+  ret i1 %success
+}
+
+define i128 @cmpxchg_zext(i128* %addr, i128 %desired, i128 %new) {
+; CHECK-LABEL: cmpxchg_zext:
+; CHECK: cmpxchg16b
+; CHECK-NOT: cmpq
+; CHECK: sete [[BYTE:%[a-z0-9]+]]
+; CHECK: movzbl [[BYTE]], %eax
+  %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+  %success = extractvalue { i128, i1 } %pair, 1
+  %mask = zext i1 %success to i128
+  ret i128 %mask
+}
+
+
+define i128 @cmpxchg_use_eflags_and_val(i128* %addr, i128 %offset) {
+; CHECK-LABEL: cmpxchg_use_eflags_and_val:
+
+; CHECK: cmpxchg16b
+; CHECK-NOT: cmpq
+; CHECK: jne
+entry:
+  %init = load atomic i128* %addr seq_cst, align 16
+  br label %loop
+
+loop:
+  %old = phi i128 [%init, %entry], [%oldval, %loop]
+  %new = add i128 %old, %offset
+
+  %pair = cmpxchg i128* %addr, i128 %old, i128 %new seq_cst seq_cst
+  %oldval = extractvalue { i128, i1 } %pair, 0
+  %success = extractvalue { i128, i1 } %pair, 1
+
+  br i1 %success, label %done, label %loop
+
+done:
+  ret i128 %old
+}
+
+declare void @foo()
+declare void @bar()
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index 468b70b..bb08a0e 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -5,7 +5,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic
+  %t0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic
+  %0 = extractvalue { i64, i1 } %t0, 0
   %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
new file mode 100644
index 0000000..bf27b2f
--- /dev/null
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
+
+$f1 = comdat any
+@v1 = global i32 0, comdat $f1
+define void @f1() comdat $f1 {
+  ret void
+}
+
+$f2 = comdat exactmatch
+@v2 = global i32 0, comdat $f2
+define void @f2() comdat $f2 {
+  ret void
+}
+
+$f3 = comdat largest
+@v3 = global i32 0, comdat $f3
+define void @f3() comdat $f3 {
+  ret void
+}
+
+$f4 = comdat noduplicates
+@v4 = global i32 0, comdat $f4
+define void @f4() comdat $f4 {
+  ret void
+}
+
+$f5 = comdat samesize
+@v5 = global i32 0, comdat $f5
+define void @f5() comdat $f5 {
+  ret void
+}
+
+$f6 = comdat samesize
+@v6 = global i32 0, comdat $f6
+@f6 = global i32 0, comdat $f6
+
+$"\01@f7@0" = comdat any
+define x86_fastcallcc void @"\01@v7@0"() comdat $"\01@f7@0" {
+  ret void
+}
+define x86_fastcallcc void @"\01@f7@0"() comdat $"\01@f7@0" {
+  ret void
+}
+
+$f8 = comdat any
+define x86_fastcallcc void @v8() comdat $f8 {
+  ret void
+}
+define x86_fastcallcc void @f8() comdat $f8 {
+  ret void
+}
+
+$vftable = comdat largest
+
+@some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat $vftable
+@vftable = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
+
+; CHECK: .section        .text,"xr",discard,_f1
+; CHECK: .globl  _f1
+; CHECK: .section        .text,"xr",same_contents,_f2
+; CHECK: .globl  _f2
+; CHECK: .section        .text,"xr",largest,_f3
+; CHECK: .globl  _f3
+; CHECK: .section        .text,"xr",one_only,_f4
+; CHECK: .globl  _f4
+; CHECK: .section        .text,"xr",same_size,_f5
+; CHECK: .globl  _f5
+; CHECK: .section        .text,"xr",associative,@f7@0
+; CHECK: .globl  @v7@0
+; CHECK: .section        .text,"xr",discard,@f7@0
+; CHECK: .globl  @f7@0
+; CHECK: .section        .text,"xr",associative,@f8@0
+; CHECK: .globl  @v8@0
+; CHECK: .section        .text,"xr",discard,@f8@0
+; CHECK: .globl  @f8@0
+; CHECK: .section        .bss,"bw",associative,_f1
+; CHECK: .globl  _v1
+; CHECK: .section        .bss,"bw",associative,_f2
+; CHECK: .globl  _v2
+; CHECK: .section        .bss,"bw",associative,_f3
+; CHECK: .globl  _v3
+; CHECK: .section        .bss,"bw",associative,_f4
+; CHECK: .globl  _v4
+; CHECK: .section        .bss,"bw",associative,_f5
+; CHECK: .globl  _v5
+; CHECK: .section        .bss,"bw",associative,_f6
+; CHECK: .globl  _v6
+; CHECK: .section        .bss,"bw",same_size,_f6
+; CHECK: .globl  _f6
+; CHECK: .section        .rdata,"rd",largest,_vftable
+; CHECK: .globl  _vftable
+; CHECK: _vftable = L_some_name+4
diff --git a/test/CodeGen/X86/coff-comdat2.ll b/test/CodeGen/X86/coff-comdat2.ll
new file mode 100644
index 0000000..6744b5b
--- /dev/null
+++ b/test/CodeGen/X86/coff-comdat2.ll
@@ -0,0 +1,9 @@
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat largest
+@foo = global i32 0
+@bar = global i32 0, comdat $foo
+; CHECK: Associative COMDAT symbol 'foo' is not a key for it's COMDAT.
diff --git a/test/CodeGen/X86/coff-comdat3.ll b/test/CodeGen/X86/coff-comdat3.ll
new file mode 100644
index 0000000..76e464b
--- /dev/null
+++ b/test/CodeGen/X86/coff-comdat3.ll
@@ -0,0 +1,8 @@
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat largest
+@bar = global i32 0, comdat $foo
+; CHECK: Associative COMDAT symbol 'foo' does not exist.
diff --git a/test/CodeGen/X86/combine-64bit-vec-binop.ll b/test/CodeGen/X86/combine-64bit-vec-binop.ll
new file mode 100644
index 0000000..8440fda
--- /dev/null
+++ b/test/CodeGen/X86/combine-64bit-vec-binop.ll
@@ -0,0 +1,273 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+define double @test1_add(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %add = add <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test1_add
+; SSE41: paddd
+; AVX: vpaddd
+; CHECK-NEXT: ret
+
+
+define double @test2_add(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %add = add <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test2_add
+; SSE41: paddw
+; AVX: vpaddw
+; CHECK-NEXT: ret
+
+define double @test3_add(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %add = add <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test3_add
+; SSE41: paddb
+; AVX: vpaddb
+; CHECK-NEXT: ret
+
+
+define double @test1_sub(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %sub = sub <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test1_sub
+; SSE41: psubd
+; AVX: vpsubd
+; CHECK-NEXT: ret
+
+
+define double @test2_sub(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %sub = sub <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test2_sub
+; SSE41: psubw
+; AVX: vpsubw
+; CHECK-NEXT: ret
+
+
+define double @test3_sub(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %sub = sub <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test3_sub
+; SSE41: psubb
+; AVX: vpsubb
+; CHECK-NEXT: ret
+
+
+define double @test1_mul(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %mul = mul <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test1_mul
+; SSE41: pmulld
+; AVX: vpmulld
+; CHECK-NEXT: ret
+
+
+define double @test2_mul(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %mul = mul <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test2_mul
+; SSE41: pmullw
+; AVX: vpmullw
+; CHECK-NEXT: ret
+
+; There is no legal ISD::MUL with type MVT::v8i16.
+define double @test3_mul(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %mul = mul <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test3_mul
+; CHECK: pmullw
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test1_and(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %and = and <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test1_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test2_and(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %and = and <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test2_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test3_and(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %and = and <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test3_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test1_or(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %or = or <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test1_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test2_or(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %or = or <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test2_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test3_or(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %or = or <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test3_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test1_xor(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %xor = xor <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test1_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test2_xor(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %xor = xor <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test2_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test3_xor(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %xor = xor <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test3_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test_fadd(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %add = fadd <2 x float> %1, %2
+  %3 = bitcast <2 x float> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test_fadd
+; SSE41: addps
+; AVX: vaddps
+; CHECK-NEXT: ret
+
+define double @test_fsub(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %sub = fsub <2 x float> %1, %2
+  %3 = bitcast <2 x float> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test_fsub
+; SSE41: subps
+; AVX: vsubps
+; CHECK-NEXT: ret
+
+define double @test_fmul(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %mul = fmul <2 x float> %1, %2
+  %3 = bitcast <2 x float> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test_fmul
+; SSE41: mulps
+; AVX: vmulps
+; CHECK-NEXT: ret
+
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index c1ce533..ff807b9 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -25,7 +25,7 @@ define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
 }
 ; CHECK-LABEL: test2
 ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: movsd
 ; CHECK: ret
 
 
@@ -74,7 +74,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
 }
 ; CHECK-LABEL: test6
 ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
 ; CHECK-NEXT: ret
 
 
@@ -86,7 +86,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
 }
 ; CHECK-LABEL: test7
 ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
 ; CHECK-NEXT: ret
 
 
@@ -111,7 +111,7 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
 }
 ; CHECK-LABEL: test9
 ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: movsd
 ; CHECK: ret
 
 
diff --git a/test/CodeGen/X86/combine-vec-shuffle-2.ll b/test/CodeGen/X86/combine-vec-shuffle-2.ll
new file mode 100644
index 0000000..7ab7f80
--- /dev/null
+++ b/test/CodeGen/X86/combine-vec-shuffle-2.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
+
+; Check that DAGCombiner correctly folds the following pairs of shuffles
+; using the following rules:
+;  1. shuffle(shuffle(x, y), undef) -> x
+;  2. shuffle(shuffle(x, y), undef) -> y
+;  3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
+;  4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
+;
+; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test1
+; Mask: [3,0,0,1]
+; CHECK: pshufd $67
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test2
+; Mask: [2,0,0,3]
+; CHECK: pshufd $-62
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test3
+; Mask: [2,0,0,3]
+; CHECK: pshufd $-62
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test4
+; Mask: [0,0,0,1]
+; CHECK: pshufd $64
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test5
+; Mask: [1,1]
+; CHECK: movhlps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test6
+; Mask: [2,0,0,0]
+; CHECK: pshufd $2
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test7
+; Mask: [0,2,0,2]
+; CHECK: pshufd $-120
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test8
+; Mask: [1,0,3,0]
+; CHECK: pshufd $49
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test9
+; Mask: [1,3,0,2]
+; CHECK: pshufd $-115
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test10
+; Mask: [1,0,1,0]
+; CHECK: pshufd $17
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test11
+; Mask: [1,0,2,1]
+; CHECK: pshufd $97
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test12
+; Mask: [0,0,0,0]
+; CHECK: pshufd $0
+; CHECK-NEXT: ret
+
+
+; The following pair of shuffles is folded into vector %A.
+define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test13
+; CHECK-NOT: pshufd
+; CHECK: ret
+
+
+; The following pair of shuffles is folded into vector %B.
+define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test14
+; CHECK-NOT: pshufd
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/computeKnownBits_urem.ll b/test/CodeGen/X86/computeKnownBits_urem.ll
new file mode 100644
index 0000000..9902e6f
--- /dev/null
+++ b/test/CodeGen/X86/computeKnownBits_urem.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+define i32 @main() #0 {
+entry:
+  %a = alloca i32, align 4
+  store i32 1, i32* %a, align 4
+  %0 = load i32* %a, align 4
+  %or = or i32 1, %0
+  %and = and i32 1, %or
+  %rem = urem i32 %and, 1
+  %add = add i32 %rem, 1
+  ret i32 %add
+}
+; CHECK: $1, %eax
+; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/cvt16.ll b/test/CodeGen/X86/cvt16.ll
new file mode 100644
index 0000000..951b5c3
--- /dev/null
+++ b/test/CodeGen/X86/cvt16.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=LIBCALL
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=F16C
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+
+; This is a test for float to half float conversions on x86-64.
+;
+; If flag -soft-float is set, or if there is no F16C support, then:
+; 1) half float to float conversions are
+;    translated into calls to __gnu_h2f_ieee defined
+;    by the compiler runtime library;
+; 2) float to half float conversions are translated into calls
+;    to __gnu_f2h_ieee which expected to be defined by the
+;    compiler runtime library.
+;
+; Otherwise (we have F16C support):
+; 1) half float to float conversion are translated using
+;    vcvtph2ps instructions;
+; 2) float to half float conversions are translated using
+;    vcvtps2ph instructions
+
+
+define void @test1(float %src, i16* %dest) {
+  %1 = tail call i16 @llvm.convert.to.fp16(float %src)
+  store i16 %1, i16* %dest, align 2
+  ret void
+}
+; CHECK-LABEL: test1
+; LIBCALL: callq  __gnu_f2h_ieee
+; SOFTFLOAT: callq  __gnu_f2h_ieee
+; F16C: vcvtps2ph
+; CHECK: ret
+
+
+define float @test2(i16* nocapture %src) {
+  %1 = load i16* %src, align 2
+  %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+  ret float %2
+}
+; CHECK-LABEL: test2:
+; LIBCALL: jmp  __gnu_h2f_ieee
+; SOFTFLOAT: callq  __gnu_h2f_ieee
+; F16C: vcvtph2ps
+; F16C: ret
+
+
+define float @test3(float %src) nounwind uwtable readnone {
+  %1 = tail call i16 @llvm.convert.to.fp16(float %src)
+  %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+  ret float %2
+}
+
+; CHECK-LABEL: test3:
+; LIBCALL: callq  __gnu_f2h_ieee
+; LIBCALL: jmp   __gnu_h2f_ieee
+; SOFTFLOAT: callq  __gnu_f2h_ieee
+; SOFTFLOAT: callq  __gnu_h2f_ieee
+; F16C: vcvtps2ph
+; F16C-NEXT: vcvtph2ps
+; F16C: ret
+
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+
diff --git a/test/CodeGen/X86/dagcombine-and-setcc.ll b/test/CodeGen/X86/dagcombine-and-setcc.ll
new file mode 100644
index 0000000..e7336a9
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-and-setcc.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...)
+
+; On X86 1 is true and 0 is false, so we can't perform the combine:
+; (and (setgt X,  true), (setgt Y,  true)) -> (setgt (or X, Y), true)
+; This combine only works if the true value is -1.
+
+
+;CHECK: cmpl
+;CHECK: setg
+;CHECK: cmpl
+;CHECK: setg
+;CHECK: andb
+
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+; Function Attrs: optsize ssp uwtable
+define i32 @foo(i32 %a, i32 %b, i32 * %c) {
+if.else429:
+  %cmp.i1144 = icmp eq i32* %c, null
+  %cmp430 = icmp slt i32 %a, 2
+  %cmp432 = icmp slt i32 %b, 2
+  %or.cond710 = or i1 %cmp430, %cmp432
+  %or.cond710.not = xor i1 %or.cond710, true
+  %brmerge1448 = or i1 %cmp.i1144, %or.cond710.not
+  br i1 %brmerge1448, label %ret1, label %ret2
+
+ret1:
+  ret i32 0
+
+ret2:
+  ret i32 1
+}
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+  %res = alloca i32, align 4
+  %t = call i32 @foo(i32 1, i32 2, i32* %res) #3
+  %v = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %t)
+  ret i32 0
+}
+
+
+
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index 23f8335..4912213 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -52,58 +52,153 @@ define void @_Z3barii(i32 %param1, i32 %param2) #0 {
 entry:
   %var1 = alloca %struct.AAA3, align 1
   %var2 = alloca %struct.AAA3, align 1
-  %tobool = icmp eq i32 %param2, 0
-  br i1 %tobool, label %if.end, label %if.then
+  tail call void @llvm.dbg.value(metadata !{i32 %param1}, i64 0, metadata !30), !dbg !47
+  tail call void @llvm.dbg.value(metadata !{i32 %param2}, i64 0, metadata !31), !dbg !47
+  tail call void @llvm.dbg.value(metadata !48, i64 0, metadata !32), !dbg !49
+  %tobool = icmp eq i32 %param2, 0, !dbg !50
+  br i1 %tobool, label %if.end, label %if.then, !dbg !50
 
 if.then:                                          ; preds = %entry
-  %call = call i8* @_Z5i2stri(i32 %param2)
-  br label %if.end
+  %call = tail call i8* @_Z5i2stri(i32 %param2), !dbg !52
+  tail call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !32), !dbg !49
+  br label %if.end, !dbg !54
 
 if.end:                                           ; preds = %entry, %if.then
-  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !60)
-  call void @llvm.dbg.value(metadata !62, i64 0, metadata !63)
-  %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  call void @llvm.dbg.declare(metadata !{%struct.AAA3* %var2}, metadata !38)
-  %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  %tobool1 = icmp eq i32 %param1, 0
-  br i1 %tobool1, label %if.else, label %if.then2
+  tail call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !33), !dbg !55
+  tail call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !56), !dbg !57
+  tail call void @llvm.dbg.value(metadata !58, i64 0, metadata !59), !dbg !60
+  %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0, !dbg !61
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !61
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !34), !dbg !63
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !64), !dbg !65
+  call void @llvm.dbg.value(metadata !58, i64 0, metadata !66), !dbg !67
+  %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0, !dbg !68
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !68
+  %tobool1 = icmp eq i32 %param1, 0, !dbg !69
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !34), !dbg !63
+  br i1 %tobool1, label %if.else, label %if.then2, !dbg !69
 
 if.then2:                                         ; preds = %if.end
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0))
-  br label %if.end3
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !71), !dbg !73
+  call void @llvm.dbg.value(metadata !74, i64 0, metadata !75), !dbg !76
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)), !dbg !76
+  br label %if.end3, !dbg !72
 
 if.else:                                          ; preds = %if.end
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0))
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !77), !dbg !79
+  call void @llvm.dbg.value(metadata !80, i64 0, metadata !81), !dbg !82
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)), !dbg !82
   br label %if.end3
 
 if.end3:                                          ; preds = %if.else, %if.then2
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  ret void
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !33), !dbg !55
+  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !83), !dbg !85
+  call void @llvm.dbg.value(metadata !58, i64 0, metadata !86), !dbg !87
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !87
+  ret void, !dbg !88
 }
 
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
-
-declare i8* @_Z5i2stri(i32) #2
+declare i8* @_Z5i2stri(i32) #1
 
-declare void @_Z3fooPcjPKc(i8*, i32, i8*) #2
+declare void @_Z3fooPcjPKc(i8*, i32, i8*) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!48, !49}
-!llvm.ident = !{!50}
-
-!38 = metadata !{i32 786688, null, metadata !"var2", null, i32 20, null, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var2] [line 20]
-!48 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!50 = metadata !{metadata !"clang version 3.5 (202418)"}
-!60 = metadata !{i32 786689, null, metadata !"this", null, i32 16777216, null, i32 1088, null} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!62 = metadata !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)}
-!63 = metadata !{i32 786689, null, metadata !"value", null, i32 33554439, null, i32 0, null} ; [ DW_TAG_arg_variable ] [value] [line 7]
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!44, !45}
+!llvm.ident = !{!46}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !23, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"dbg-changes-codegen-branch-folding.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"AAA3", i32 4, i64 32, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS4AAA3"} ; [ DW_TAG_structure_type ] [AAA3] [line 4, size 32, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !11, metadata !17, metadata !18}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS4AAA3", metadata !"text", i32 8, i64 32, i64 8, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [text] [line 8, size 32, align 8, offset 0] [from ]
+!7 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !8, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!8 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!11 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"AAA3", metadata !"AAA3", metadata !"", i32 5, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 5} ; [ DW_TAG_subprogram ] [line 5] [AAA3]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !14, metadata !15}
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4AAA3]
+!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!16 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char]
+!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator=", metadata !"operator=", metadata !"_ZN4AAA3aSEPKc", i32 6, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 6} ; [ DW_TAG_subprogram ] [line 6] [operator=]
+!18 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator const char *", metadata !"operator const char *", metadata !"_ZNK4AAA3cvPKcEv", i32 7, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [operator const char *]
+!19 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = metadata !{metadata !15, metadata !21}
+!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
+!22 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS4AAA3"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS4AAA3]
+!23 = metadata !{metadata !24, metadata !35, metadata !40}
+!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"bar", metadata !"bar", metadata !"_Z3barii", i32 11, metadata !26, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z3barii, null, null, metadata !29, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
+!25 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = metadata !{null, metadata !28, metadata !28}
+!28 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!29 = metadata !{metadata !30, metadata !31, metadata !32, metadata !33, metadata !34}
+!30 = metadata !{i32 786689, metadata !24, metadata !"param1", metadata !25, i32 16777227, metadata !28, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [param1] [line 11]
+!31 = metadata !{i32 786689, metadata !24, metadata !"param2", metadata !25, i32 33554443, metadata !28, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [param2] [line 11]
+!32 = metadata !{i32 786688, metadata !24, metadata !"temp", metadata !25, i32 12, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [temp] [line 12]
+!33 = metadata !{i32 786688, metadata !24, metadata !"var1", metadata !25, i32 17, metadata !"_ZTS4AAA3", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var1] [line 17]
+!34 = metadata !{i32 786688, metadata !24, metadata !"var2", metadata !25, i32 18, metadata !"_ZTS4AAA3", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var2] [line 18]
+!35 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator=", metadata !"operator=", metadata !"_ZN4AAA3aSEPKc", i32 6, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !17, metadata !36, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [operator=]
+!36 = metadata !{metadata !37, metadata !39}
+!37 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!38 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4AAA3]
+!39 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!40 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"AAA3", metadata !"AAA3", metadata !"_ZN4AAA3C2EPKc", i32 5, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !11, metadata !41, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [AAA3]
+!41 = metadata !{metadata !42, metadata !43}
+!42 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!43 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!44 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!45 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!46 = metadata !{metadata !"clang version 3.5.0 "}
+!47 = metadata !{i32 11, i32 0, metadata !24, null}
+!48 = metadata !{i8* null}
+!49 = metadata !{i32 12, i32 0, metadata !24, null}
+!50 = metadata !{i32 14, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !1, metadata !24, i32 14, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!52 = metadata !{i32 15, i32 0, metadata !53, null}
+!53 = metadata !{i32 786443, metadata !1, metadata !51, i32 14, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!54 = metadata !{i32 16, i32 0, metadata !53, null}
+!55 = metadata !{i32 17, i32 0, metadata !24, null}
+!56 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !55} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!57 = metadata !{i32 0, i32 0, metadata !40, metadata !55}
+!58 = metadata !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)}
+!59 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, metadata !55} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!60 = metadata !{i32 5, i32 0, metadata !40, metadata !55}
+!61 = metadata !{i32 5, i32 0, metadata !62, metadata !55}
+!62 = metadata !{i32 786443, metadata !1, metadata !40, i32 5, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!63 = metadata !{i32 18, i32 0, metadata !24, null}
+!64 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !63} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!65 = metadata !{i32 0, i32 0, metadata !40, metadata !63}
+!66 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, metadata !63} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!67 = metadata !{i32 5, i32 0, metadata !40, metadata !63}
+!68 = metadata !{i32 5, i32 0, metadata !62, metadata !63}
+!69 = metadata !{i32 20, i32 0, metadata !70, null}
+!70 = metadata !{i32 786443, metadata !1, metadata !24, i32 20, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!71 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !72} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!72 = metadata !{i32 21, i32 0, metadata !70, null}
+!73 = metadata !{i32 0, i32 0, metadata !35, metadata !72}
+!74 = metadata !{i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)}
+!75 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !72} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!76 = metadata !{i32 6, i32 0, metadata !35, metadata !72}
+!77 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !78} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!78 = metadata !{i32 23, i32 0, metadata !70, null}
+!79 = metadata !{i32 0, i32 0, metadata !35, metadata !78}
+!80 = metadata !{i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)}
+!81 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !78} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!82 = metadata !{i32 6, i32 0, metadata !35, metadata !78}
+!83 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !84} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!84 = metadata !{i32 24, i32 0, metadata !24, null}
+!85 = metadata !{i32 0, i32 0, metadata !35, metadata !84}
+!86 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !84} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!87 = metadata !{i32 6, i32 0, metadata !35, metadata !84}
+!88 = metadata !{i32 25, i32 0, metadata !24, null}
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index f4dec4f..0d5afa1 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -73,7 +73,7 @@ define weak_odr dllexport void @weak1() {
 @weak_alias = dllexport alias weak_odr void()* @f1
 
 @blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16
-@blob_alias = dllexport alias i32 (), [6 x i8]* @blob
+@blob_alias = dllexport alias bitcast ([6 x i8]* @blob to i32 ()*)
 
 ; CHECK: .section .drectve
 ; WIN32: " /EXPORT:Var1,DATA"
diff --git a/test/CodeGen/X86/elf-comdat.ll b/test/CodeGen/X86/elf-comdat.ll
new file mode 100644
index 0000000..c7e6df7
--- /dev/null
+++ b/test/CodeGen/X86/elf-comdat.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
+
+$f = comdat any
+@v = global i32 0, comdat $f
+define void @f() comdat $f {
+  ret void
+}
+; CHECK: .section        .text.f,"axG",@progbits,f,comdat
+; CHECK: .globl  f
+; CHECK: .section        .bss.v,"aGw",@nobits,f,comdat
+; CHECK: .globl  v
diff --git a/test/CodeGen/X86/elf-comdat2.ll b/test/CodeGen/X86/elf-comdat2.ll
new file mode 100644
index 0000000..209da39
--- /dev/null
+++ b/test/CodeGen/X86/elf-comdat2.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
+
+$foo = comdat any
+@bar = global i32 42, comdat $foo
+@foo = global i32 42
+
+; CHECK:      .type   bar,@object
+; CHECK-NEXT: .section        .data.bar,"aGw",@progbits,foo,comdat
+; CHECK-NEXT: .globl  bar
+; CHECK:      .type   foo,@object
+; CHECK-NEXT: .data
+; CHECK-NEXT: .globl  foo
diff --git a/test/CodeGen/X86/fast-isel-args-fail2.ll b/test/CodeGen/X86/fast-isel-args-fail2.ll
new file mode 100644
index 0000000..08de472
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args-fail2.ll
@@ -0,0 +1,10 @@
+; RUN: not --crash llc < %s -fast-isel -fast-isel-abort-args -mtriple=x86_64-apple-darwin10
+; REQUIRES: asserts
+
+%struct.s0 = type { x86_fp80, x86_fp80 }
+
+; FastISel cannot handle this case yet. Make sure that we abort.
+define i8* @args_fail(%struct.s0* byval nocapture readonly align 16 %y) {
+  %1 = bitcast %struct.s0* %y to i8*
+  ret i8* %1
+}
diff --git a/test/CodeGen/X86/fast-isel-args.ll b/test/CodeGen/X86/fast-isel-args.ll
index 0f36265..8c86a9c 100644
--- a/test/CodeGen/X86/fast-isel-args.ll
+++ b/test/CodeGen/X86/fast-isel-args.ll
@@ -23,3 +23,27 @@ entry:
   %add2 = add nsw i64 %add, %conv1
   ret i64 %add2
 }
+
+define float @t4(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h) {
+entry:
+  %add1 = fadd float %a, %b
+  %add2 = fadd float %c, %d
+  %add3 = fadd float %e, %f
+  %add4 = fadd float %g, %h
+  %add5 = fadd float %add1, %add2
+  %add6 = fadd float %add3, %add4
+  %add7 = fadd float %add5, %add6
+  ret float %add7
+}
+
+define double @t5(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h) {
+entry:
+  %add1 = fadd double %a, %b
+  %add2 = fadd double %c, %d
+  %add3 = fadd double %e, %f
+  %add4 = fadd double %g, %h
+  %add5 = fadd double %add1, %add2
+  %add6 = fadd double %add3, %add4
+  %add7 = fadd double %add5, %add6
+  ret double %add7
+}
diff --git a/test/CodeGen/X86/fast-isel-branch_weights.ll b/test/CodeGen/X86/fast-isel-branch_weights.ll
new file mode 100644
index 0000000..bc41395
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-branch_weights.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s                             -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; Test if the BBs are reordred according to their branch weights.
+define i64 @branch_weights_test(i64 %a, i64 %b) {
+; CHECK-LABEL: branch_weights_test
+; CHECK-LABEL: success
+; CHECK-LABEL: fail
+  %1 = icmp ult i64 %a, %b
+  br i1 %1, label %fail, label %success, !prof !0
+
+fail:
+  ret i64 -1
+
+success:
+  ret i64 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/test/CodeGen/X86/fast-isel-cmp-branch2.ll
new file mode 100644
index 0000000..7e45c49
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch2.ll
@@ -0,0 +1,294 @@
+; RUN: llc < %s                             -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define i32 @fcmp_oeq(float %x, float %y) {
+; CHECK-LABEL: fcmp_oeq
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_2}}
+  %1 = fcmp oeq float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt(float %x, float %y) {
+; CHECK-LABEL: fcmp_ogt
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp ogt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge(float %x, float %y) {
+; CHECK-LABEL: fcmp_oge
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp oge float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt(float %x, float %y) {
+; CHECK-LABEL: fcmp_olt
+; CHECK:       ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp olt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole(float %x, float %y) {
+; CHECK-LABEL: fcmp_ole
+; CHECK:       ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp ole float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one(float %x, float %y) {
+; CHECK-LABEL: fcmp_one
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_1}}
+  %1 = fcmp one float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord(float %x, float %y) {
+; CHECK-LABEL: fcmp_ord
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno(float %x, float %y) {
+; CHECK-LABEL: fcmp_uno
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq(float %x, float %y) {
+; CHECK-LABEL: fcmp_ueq
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_2}}
+  %1 = fcmp ueq float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt(float %x, float %y) {
+; CHECK-LABEL: fcmp_ugt
+; CHECK:       ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ugt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge(float %x, float %y) {
+; CHECK-LABEL: fcmp_uge
+; CHECK:       ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp uge float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult(float %x, float %y) {
+; CHECK-LABEL: fcmp_ult
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ult float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule(float %x, float %y) {
+; CHECK-LABEL: fcmp_ule
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp ule float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une(float %x, float %y) {
+; CHECK-LABEL: fcmp_une
+; CHECK:       ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_2}}
+; CHECK-NEXT:  jp  {{LBB.+_2}}
+; CHECK-NEXT:  jmp {{LBB.+_1}}
+  %1 = fcmp une float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_eq
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jne {{LBB.+_1}}
+  %1 = icmp eq i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ne(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ne
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  je {{LBB.+_1}}
+  %1 = icmp ne i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ugt
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = icmp ugt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_uge(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_uge
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = icmp uge i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ult
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = icmp ult i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ule(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ule
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = icmp ule i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sgt
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jle {{LBB.+_1}}
+  %1 = icmp sgt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sge(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sge
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jl {{LBB.+_1}}
+  %1 = icmp sge i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_slt
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jge {{LBB.+_1}}
+  %1 = icmp slt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sle(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sle
+; CHECK:       cmpl %esi, %edi
+; CHECK-NEXT:  jg {{LBB.+_1}}
+  %1 = icmp sle i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
new file mode 100644
index 0000000..a3f6851
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -0,0 +1,470 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define i32 @fcmp_oeq1(float %x) {
+; CHECK-LABEL: fcmp_oeq1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oeq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oeq2(float %x) {
+; CHECK-LABEL: fcmp_oeq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_2}}
+  %1 = fcmp oeq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt1(float %x) {
+; CHECK-LABEL: fcmp_ogt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp ogt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt2(float %x) {
+; CHECK-LABEL: fcmp_ogt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp ogt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge1(float %x) {
+; CHECK-LABEL: fcmp_oge1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge2(float %x) {
+; CHECK-LABEL: fcmp_oge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp oge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt1(float %x) {
+; CHECK-LABEL: fcmp_olt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp olt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt2(float %x) {
+; CHECK-LABEL: fcmp_olt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp olt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole1(float %x) {
+; CHECK-LABEL: fcmp_ole1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ole float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole2(float %x) {
+; CHECK-LABEL: fcmp_ole2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp ole float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one1(float %x) {
+; CHECK-LABEL: fcmp_one1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp one float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one2(float %x) {
+; CHECK-LABEL: fcmp_one2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_1}}
+  %1 = fcmp one float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord1(float %x) {
+; CHECK-LABEL: fcmp_ord1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord2(float %x) {
+; CHECK-LABEL: fcmp_ord2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno1(float %x) {
+; CHECK-LABEL: fcmp_uno1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno2(float %x) {
+; CHECK-LABEL: fcmp_uno2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq1(float %x) {
+; CHECK-LABEL: fcmp_ueq1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ueq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq2(float %x) {
+; CHECK-LABEL: fcmp_ueq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_2}}
+  %1 = fcmp ueq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt1(float %x) {
+; CHECK-LABEL: fcmp_ugt1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ugt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt2(float %x) {
+; CHECK-LABEL: fcmp_ugt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ugt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge1(float %x) {
+; CHECK-LABEL: fcmp_uge1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp uge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge2(float %x) {
+; CHECK-LABEL: fcmp_uge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp uge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult1(float %x) {
+; CHECK-LABEL: fcmp_ult1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ult float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult2(float %x) {
+; CHECK-LABEL: fcmp_ult2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ult float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule1(float %x) {
+; CHECK-LABEL: fcmp_ule1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ule float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule2(float %x) {
+; CHECK-LABEL: fcmp_ule2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp ule float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une1(float %x) {
+; CHECK-LABEL: fcmp_une1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp une float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une2(float %x) {
+; CHECK-LABEL: fcmp_une2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_2}}
+; CHECK-NEXT:  jp {{LBB.+_2}}
+; CHECK-NEXT:  jmp {{LBB.+_1}}
+  %1 = fcmp une float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq(i32 %x) {
+; CHECK-LABEL: icmp_eq
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp eq i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ne(i32 %x) {
+; CHECK-LABEL: icmp_ne
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ne i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ugt(i32 %x) {
+; CHECK-LABEL: icmp_ugt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ugt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_uge(i32 %x) {
+; CHECK-LABEL: icmp_uge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp uge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ult(i32 %x) {
+; CHECK-LABEL: icmp_ult
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ult i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ule(i32 %x) {
+; CHECK-LABEL: icmp_ule
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp ule i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sgt(i32 %x) {
+; CHECK-LABEL: icmp_sgt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp sgt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sge(i32 %x) {
+; CHECK-LABEL: icmp_sge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_slt(i32 %x) {
+; CHECK-LABEL: icmp_slt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp slt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sle(i32 %x) {
+; CHECK-LABEL: icmp_sle
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sle i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll
new file mode 100644
index 0000000..1b72cfc
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp.ll
@@ -0,0 +1,689 @@
+; RUN: llc < %s                             -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=SDAG
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=FAST
+
+define zeroext i1 @fcmp_oeq(float %x, float %y) {
+; SDAG-LABEL: fcmp_oeq
+; SDAG:       cmpeqss  %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_oeq
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+; FAST-NEXT:  setnp    %cl
+; FAST-NEXT:  andb     %al, %cl
+  %1 = fcmp oeq float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt(float %x, float %y) {
+; SDAG-LABEL: fcmp_ogt
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  seta     %al
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  seta     %al
+  %1 = fcmp ogt float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge(float %x, float %y) {
+; SDAG-LABEL: fcmp_oge
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_oge
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setae    %al
+  %1 = fcmp oge float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt(float %x, float %y) {
+; SDAG-LABEL: fcmp_olt
+; SDAG:       ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_olt
+; FAST:       ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  seta     %al
+  %1 = fcmp olt float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole(float %x, float %y) {
+; SDAG-LABEL: fcmp_ole
+; SDAG:       ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_ole
+; FAST:       ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setae    %al
+  %1 = fcmp ole float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one(float %x, float %y) {
+; SDAG-LABEL: fcmp_one
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setne    %al
+; FAST-LABEL: fcmp_one
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+  %1 = fcmp one float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord(float %x, float %y) {
+; SDAG-LABEL: fcmp_ord
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno(float %x, float %y) {
+; SDAG-LABEL: fcmp_uno
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq(float %x, float %y) {
+; SDAG-LABEL: fcmp_ueq
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  sete     %al
+; FAST-LABEL: fcmp_ueq
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+  %1 = fcmp ueq float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt(float %x, float %y) {
+; SDAG-LABEL: fcmp_ugt
+; SDAG:       ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ugt
+; FAST:       ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ugt float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge(float %x, float %y) {
+; SDAG-LABEL: fcmp_uge
+; SDAG:       ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_uge
+; FAST:       ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp uge float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult(float %x, float %y) {
+; SDAG-LABEL: fcmp_ult
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ult
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ult float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule(float %x, float %y) {
+; SDAG-LABEL: fcmp_ule
+; SDAG:       ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_ule
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp ule float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une(float %x, float %y) {
+; SDAG-LABEL: fcmp_une
+; SDAG:       cmpneqss %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_une
+; FAST:       ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+; FAST-NEXT:  setp     %cl
+; FAST-NEXT:  orb      %al, %cl
+  %1 = fcmp une float %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_eq(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_eq
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  sete     %al
+; FAST-LABEL: icmp_eq
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  sete     %al
+  %1 = icmp eq i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ne(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_ne
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setne    %al
+; FAST-LABEL: icmp_ne
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setne    %al
+  %1 = icmp ne i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ugt(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_ugt
+; SDAG:       cmpl     %edi, %esi
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: icmp_ugt
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  seta     %al
+  %1 = icmp ugt i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_uge(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_uge
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: icmp_uge
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setae    %al
+  %1 = icmp uge i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ult(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_ult
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: icmp_ult
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setb     %al
+  %1 = icmp ult i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ule(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_ule
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: icmp_ule
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setbe    %al
+  %1 = icmp ule i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sgt(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_sgt
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setg     %al
+; FAST-LABEL: icmp_sgt
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setg     %al
+  %1 = icmp sgt i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sge(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_sge
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setge    %al
+; FAST-LABEL: icmp_sge
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setge    %al
+  %1 = icmp sge i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_slt(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_slt
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setl     %al
+; FAST-LABEL: icmp_slt
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setl     %al
+  %1 = icmp slt i32 %x, %y
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sle(i32 %x, i32 %y) {
+; SDAG-LABEL: icmp_sle
+; SDAG:       cmpl     %esi, %edi
+; SDAG-NEXT:  setle    %al
+; FAST-LABEL: icmp_sle
+; FAST:       cmpl     %esi, %edi
+; FAST-NEXT:  setle    %al
+  %1 = icmp sle i32 %x, %y
+  ret i1 %1
+}
+
+; Test cmp folding and condition optimization.
+define zeroext i1 @fcmp_oeq2(float %x) {
+; SDAG-LABEL: fcmp_oeq2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oeq2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oeq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oeq3(float %x) {
+; SDAG-LABEL: fcmp_oeq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpeqss  %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_oeq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+; FAST-NEXT:  setnp    %cl
+; FAST-NEXT:  andb     %al, %cl
+  %1 = fcmp oeq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt2(float %x) {
+; SDAG-LABEL: fcmp_ogt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_ogt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp ogt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt3(float %x) {
+; SDAG-LABEL: fcmp_ogt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_ogt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  seta     %al
+  %1 = fcmp ogt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge2(float %x) {
+; SDAG-LABEL: fcmp_oge2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oge2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge3(float %x) {
+; SDAG-LABEL: fcmp_oge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_oge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setae    %al
+  %1 = fcmp oge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt2(float %x) {
+; SDAG-LABEL: fcmp_olt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_olt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp olt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt3(float %x) {
+; SDAG-LABEL: fcmp_olt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_olt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  seta     %al
+  %1 = fcmp olt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole2(float %x) {
+; SDAG-LABEL: fcmp_ole2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ole2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ole float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole3(float %x) {
+; SDAG-LABEL: fcmp_ole3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_ole3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setae    %al
+  %1 = fcmp ole float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one2(float %x) {
+; SDAG-LABEL: fcmp_one2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_one2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp one float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one3(float %x) {
+; SDAG-LABEL: fcmp_one3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setne    %al
+; FAST-LABEL: fcmp_one3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+  %1 = fcmp one float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord2(float %x) {
+; SDAG-LABEL: fcmp_ord2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord3(float %x) {
+; SDAG-LABEL: fcmp_ord3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno2(float %x) {
+; SDAG-LABEL: fcmp_uno2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno3(float %x) {
+; SDAG-LABEL: fcmp_uno3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq2(float %x) {
+; SDAG-LABEL: fcmp_ueq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ueq2
+; FAST:       movb     $1, %al
+  %1 = fcmp ueq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq3(float %x) {
+; SDAG-LABEL: fcmp_ueq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  sete     %al
+; FAST-LABEL: fcmp_ueq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+  %1 = fcmp ueq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt2(float %x) {
+; SDAG-LABEL: fcmp_ugt2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ugt2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ugt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt3(float %x) {
+; SDAG-LABEL: fcmp_ugt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ugt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ugt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge2(float %x) {
+; SDAG-LABEL: fcmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_uge2
+; FAST:       movb     $1, %al
+  %1 = fcmp uge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge3(float %x) {
+; SDAG-LABEL: fcmp_uge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_uge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp uge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult2(float %x) {
+; SDAG-LABEL: fcmp_ult2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ult2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ult float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult3(float %x) {
+; SDAG-LABEL: fcmp_ult3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ult3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ult float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule2(float %x) {
+; SDAG-LABEL: fcmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ule2
+; FAST:       movb     $1, %al
+  %1 = fcmp ule float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule3(float %x) {
+; SDAG-LABEL: fcmp_ule3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_ule3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp ule float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une2(float %x) {
+; SDAG-LABEL: fcmp_une2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_une2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp une float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une3(float %x) {
+; SDAG-LABEL: fcmp_une3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpneqss %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_une3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+; FAST-NEXT:  setp     %cl
+; FAST-NEXT:  orb      %al, %cl
+  %1 = fcmp une float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_eq2(i32 %x) {
+; SDAG-LABEL: icmp_eq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_eq2
+; FAST:       movb     $1, %al
+  %1 = icmp eq i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ne2(i32 %x) {
+; SDAG-LABEL: icmp_ne2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ne2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ne i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ugt2(i32 %x) {
+; SDAG-LABEL: icmp_ugt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ugt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ugt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_uge2(i32 %x) {
+; SDAG-LABEL: icmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_uge2
+; FAST:       movb     $1, %al
+  %1 = icmp uge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ult2(i32 %x) {
+; SDAG-LABEL: icmp_ult2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ult2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ult i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ule2(i32 %x) {
+; SDAG-LABEL: icmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_ule2
+; FAST:       movb     $1, %al
+  %1 = icmp ule i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sgt2(i32 %x) {
+; SDAG-LABEL: icmp_sgt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_sgt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp sgt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sge2(i32 %x) {
+; SDAG-LABEL: icmp_sge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sge2
+; FAST:       movb     $1, %al
+  %1 = icmp sge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_slt2(i32 %x) {
+; SDAG-LABEL: icmp_slt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_slt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp slt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sle2(i32 %x) {
+; SDAG-LABEL: icmp_sle2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sle2
+; FAST:       movb     $1, %al
+  %1 = icmp sle i32 %x, %x
+  ret i1 %1
+}
+
diff --git a/test/CodeGen/X86/fast-isel-fold-mem.ll b/test/CodeGen/X86/fast-isel-fold-mem.ll
new file mode 100644
index 0000000..a945779
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fold-mem.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s                             -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i64 @fold_load(i64* %a, i64 %b) {
+; CHECK-LABEL: fold_load
+; CHECK:       addq  (%rdi), %rsi
+; CHECK-NEXT:  movq  %rsi, %rax
+  %1 = load i64* %a, align 8
+  %2 = add i64 %1, %b
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select-cmov.ll b/test/CodeGen/X86/fast-isel-select-cmov.ll
new file mode 100644
index 0000000..8008e28
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-cmov.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10                  | FileCheck %s
+
+; Test conditional move for the supported types (i16, i32, and i32) and
+; conditon input (argument or cmp). Currently i8 is not supported.
+
+define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_cmov_i16
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmovew  %dx, %si
+; CHECK-NEXT:  movzwl  %si, %eax
+  %1 = select i1 %cond, i16 %a, i16 %b
+  ret i16 %1
+}
+
+define zeroext i16 @select_cmp_cmov_i16(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_cmp_cmov_i16
+; CHECK:       cmpw    %si, %di
+; CHECK-NEXT:  cmovbw  %di, %si
+; CHECK-NEXT:  movzwl  %si, %eax
+  %1 = icmp ult i16 %a, %b
+  %2 = select i1 %1, i16 %a, i16 %b
+  ret i16 %2
+}
+
+define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
+; CHECK-LABEL: select_cmov_i32
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmovel  %edx, %esi
+; CHECK-NEXT:  movl    %esi, %eax
+  %1 = select i1 %cond, i32 %a, i32 %b
+  ret i32 %1
+}
+
+define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: select_cmp_cmov_i32
+; CHECK:       cmpl    %esi, %edi
+; CHECK-NEXT:  cmovbl  %edi, %esi
+; CHECK-NEXT:  movl    %esi, %eax
+  %1 = icmp ult i32 %a, %b
+  %2 = select i1 %1, i32 %a, i32 %b
+  ret i32 %2
+}
+
+define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
+; CHECK-LABEL: select_cmov_i64
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmoveq  %rdx, %rsi
+; CHECK-NEXT:  movq    %rsi, %rax
+  %1 = select i1 %cond, i64 %a, i64 %b
+  ret i64 %1
+}
+
+define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: select_cmp_cmov_i64
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbq  %rdi, %rsi
+; CHECK-NEXT:  movq    %rsi, %rax
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, i64 %a, i64 %b
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select-cmov2.ll b/test/CodeGen/X86/fast-isel-select-cmov2.ll
new file mode 100644
index 0000000..658098f
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-cmov2.ll
@@ -0,0 +1,255 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+
+; Test all the cmp predicates that can feed an integer conditional move.
+
+define i64 @select_fcmp_false_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_false_cmov
+; CHECK:       movq %rsi, %rax
+; CHECK-NEXT:  retq
+  %1 = fcmp false double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_oeq_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  setnp %al
+; CHECK-NEXT:  sete %cl
+; CHECK-NEXT:  testb %al, %cl
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp oeq double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ogt_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovbeq %rsi, %rdi
+  %1 = fcmp ogt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_oge_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovbq %rsi, %rdi
+  %1 = fcmp oge double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_olt_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovbeq %rsi, %rdi
+  %1 = fcmp olt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ole_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovbq %rsi, %rdi
+  %1 = fcmp ole double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_one_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp one double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ord_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovpq %rsi, %rdi
+  %1 = fcmp ord double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_uno_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovnpq %rsi, %rdi
+  %1 = fcmp uno double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ueq_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovneq %rsi, %rdi
+  %1 = fcmp ueq double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ugt_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovaeq %rsi, %rdi
+  %1 = fcmp ugt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_uge_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovaq %rsi, %rdi
+  %1 = fcmp uge double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ult_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovaeq %rsi, %rdi
+  %1 = fcmp ult double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ule_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovaq %rsi, %rdi
+  %1 = fcmp ule double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_une_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  setp %al
+; CHECK-NEXT:  setne %cl
+; CHECK-NEXT:  orb %al, %cl
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp une double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_true_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_true_cmov
+; CHECK:       movq %rdi, %rax
+  %1 = fcmp true double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_eq_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovneq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp eq i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ne_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmoveq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ne i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ugt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ugt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+
+define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_uge_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp uge i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ult_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovaeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ule_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovaq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ule i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sgt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovleq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sgt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sge_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovlq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sge i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_slt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovgeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp slt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sle_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovgq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sle i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select-cmp.ll b/test/CodeGen/X86/fast-isel-select-cmp.ll
new file mode 100644
index 0000000..1af30e9
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-cmp.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; Test if we do not fold the cmp into select if the instructions are in
+; different basic blocks.
+
+define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: select_cmp_cmov_i32
+; CHECK-LABEL: continue
+; CHECK-NOT:   cmp
+  %1 = icmp ult i32 %a, %b
+  br i1 %1, label %continue, label %exit
+
+continue:
+  %2 = select i1 %1, i32 %a, i32 %b
+  ret i32 %2
+
+exit:
+  ret i32 -1
+}
+
+define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_oeq_f32
+; CHECK-LABEL: continue
+; CHECK-NOT:   cmp
+  %1 = fcmp oeq float %a, %b
+  br i1 %1, label %continue, label %exit
+
+continue:
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+
+exit:
+  ret float -1.0
+}
+
+define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_one_f32
+; CHECK-LABEL: continue
+; CHECK-NOT:   ucomi
+  %1 = fcmp one float %a, %b
+  br i1 %1, label %continue, label %exit
+
+continue:
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+
+exit:
+  ret float -1.0
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
new file mode 100644
index 0000000..1ec4d64
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                                              | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort                  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                             -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s
+
+
+define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_one_f32
+; CHECK:       ucomiss %xmm1, %xmm0
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  movaps %xmm2, %xmm0
+  %1 = fcmp one float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_one_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_one_f64
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  movaps  %xmm2, %xmm0
+  %1 = fcmp one double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_icmp_eq_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_eq_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  je [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp eq i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ne_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ne_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ne i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ugt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ugt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  ja [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ugt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_uge_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_uge_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jae [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp uge i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ult_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ult_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jb [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ule_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ule_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jbe [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ule i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sgt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sgt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jg [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sgt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sge_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sge_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jge [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sge i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_slt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_slt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jl [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp slt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sle_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sle_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jle [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sle i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select-sse.ll b/test/CodeGen/X86/fast-isel-select-sse.ll
new file mode 100644
index 0000000..3c03a03
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-sse.ll
@@ -0,0 +1,391 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                                              | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort                  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                             -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+
+; Test all cmp predicates that can be used with SSE.
+
+define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_oeq_f32
+; CHECK:       cmpeqss %xmm1, %xmm0
+; CHECK-NEXT:  andps   %xmm0, %xmm2
+; CHECK-NEXT:  andnps  %xmm3, %xmm0
+; CHECK-NEXT:  orps    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_oeq_f32
+; AVX:       vcmpeqss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps    %xmm1, %xmm0, %xmm0
+  %1 = fcmp oeq float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_oeq_f64
+; CHECK:       cmpeqsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd   %xmm0, %xmm2
+; CHECK-NEXT:  andnpd  %xmm3, %xmm0
+; CHECK-NEXT:  orpd    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_oeq_f64
+; AVX:       vcmpeqsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd    %xmm1, %xmm0, %xmm0
+  %1 = fcmp oeq double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ogt_f32
+; CHECK:       cmpltss %xmm0, %xmm1
+; CHECK-NEXT:  andps   %xmm1, %xmm2
+; CHECK-NEXT:  andnps  %xmm3, %xmm1
+; CHECK-NEXT:  orps    %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ogt_f32
+; AVX:       vcmpltss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandps   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps    %xmm1, %xmm0, %xmm0
+  %1 = fcmp ogt float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ogt_f64
+; CHECK:       cmpltsd %xmm0, %xmm1
+; CHECK-NEXT:  andpd   %xmm1, %xmm2
+; CHECK-NEXT:  andnpd  %xmm3, %xmm1
+; CHECK-NEXT:  orpd    %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ogt_f64
+; AVX:       vcmpltsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandpd   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd    %xmm1, %xmm0, %xmm0
+  %1 = fcmp ogt double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_oge_f32
+; CHECK:       cmpless %xmm0, %xmm1
+; CHECK-NEXT:  andps   %xmm1, %xmm2
+; CHECK-NEXT:  andnps  %xmm3, %xmm1
+; CHECK-NEXT:  orps    %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_oge_f32
+; AVX:       vcmpless %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandps   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps    %xmm1, %xmm0, %xmm0
+  %1 = fcmp oge float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_oge_f64
+; CHECK:       cmplesd %xmm0, %xmm1
+; CHECK-NEXT:  andpd   %xmm1, %xmm2
+; CHECK-NEXT:  andnpd  %xmm3, %xmm1
+; CHECK-NEXT:  orpd    %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_oge_f64
+; AVX:       vcmplesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandpd   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd    %xmm1, %xmm0, %xmm0
+  %1 = fcmp oge double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_olt_f32
+; CHECK:       cmpltss %xmm1, %xmm0
+; CHECK-NEXT:  andps   %xmm0, %xmm2
+; CHECK-NEXT:  andnps  %xmm3, %xmm0
+; CHECK-NEXT:  orps    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_olt_f32
+; AVX:       vcmpltss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps    %xmm1, %xmm0, %xmm0
+  %1 = fcmp olt float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_olt_f64
+; CHECK:       cmpltsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd   %xmm0, %xmm2
+; CHECK-NEXT:  andnpd  %xmm3, %xmm0
+; CHECK-NEXT:  orpd    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_olt_f64
+; AVX:       vcmpltsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd    %xmm1, %xmm0, %xmm0
+  %1 = fcmp olt double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ole_f32
+; CHECK:       cmpless %xmm1, %xmm0
+; CHECK-NEXT:  andps   %xmm0, %xmm2
+; CHECK-NEXT:  andnps  %xmm3, %xmm0
+; CHECK-NEXT:  orps    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ole_f32
+; AVX:       vcmpless %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps    %xmm1, %xmm0, %xmm0
+  %1 = fcmp ole float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ole_f64
+; CHECK:       cmplesd %xmm1, %xmm0
+; CHECK-NEXT:  andpd   %xmm0, %xmm2
+; CHECK-NEXT:  andnpd  %xmm3, %xmm0
+; CHECK-NEXT:  orpd    %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ole_f64
+; AVX:       vcmplesd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd   %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd  %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd    %xmm1, %xmm0, %xmm0
+  %1 = fcmp ole double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ord_f32
+; CHECK:       cmpordss %xmm1, %xmm0
+; CHECK-NEXT:  andps    %xmm0, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm0
+; CHECK-NEXT:  orps     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ord_f32
+; AVX:       vcmpordss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ord float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ord_f64
+; CHECK:       cmpordsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd    %xmm0, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm0
+; CHECK-NEXT:  orpd     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ord_f64
+; AVX:       vcmpordsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ord double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_uno_f32
+; CHECK:       cmpunordss %xmm1, %xmm0
+; CHECK-NEXT:  andps      %xmm0, %xmm2
+; CHECK-NEXT:  andnps     %xmm3, %xmm0
+; CHECK-NEXT:  orps       %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uno_f32
+; AVX:       vcmpunordss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps      %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps     %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps       %xmm1, %xmm0, %xmm0
+  %1 = fcmp uno float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_uno_f64
+; CHECK:       cmpunordsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd      %xmm0, %xmm2
+; CHECK-NEXT:  andnpd     %xmm3, %xmm0
+; CHECK-NEXT:  orpd       %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uno_f64
+; AVX:       vcmpunordsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd      %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd     %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd       %xmm1, %xmm0, %xmm0
+  %1 = fcmp uno double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ugt_f32
+; CHECK:       cmpnless %xmm1, %xmm0
+; CHECK-NEXT:  andps    %xmm0, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm0
+; CHECK-NEXT:  orps     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ugt_f32
+; AVX:       vcmpnless %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ugt float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ugt_f64
+; CHECK:       cmpnlesd %xmm1, %xmm0
+; CHECK-NEXT:  andpd    %xmm0, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm0
+; CHECK-NEXT:  orpd     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ugt_f64
+; AVX:       vcmpnlesd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ugt double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_uge_f32
+; CHECK:       cmpnltss %xmm1, %xmm0
+; CHECK-NEXT:  andps    %xmm0, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm0
+; CHECK-NEXT:  orps     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uge_f32
+; AVX:       vcmpnltss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp uge float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_uge_f64
+; CHECK:       cmpnltsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd    %xmm0, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm0
+; CHECK-NEXT:  orpd     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uge_f64
+; AVX:       vcmpnltsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp uge double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ult_f32
+; CHECK:       cmpnless %xmm0, %xmm1
+; CHECK-NEXT:  andps    %xmm1, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm1
+; CHECK-NEXT:  orps     %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ult_f32
+; AVX:       vcmpnless %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ult float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ult_f64
+; CHECK:       cmpnlesd %xmm0, %xmm1
+; CHECK-NEXT:  andpd    %xmm1, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm1
+; CHECK-NEXT:  orpd     %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ult_f64
+; AVX:       vcmpnlesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ult double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ule_f32
+; CHECK:       cmpnltss %xmm0, %xmm1
+; CHECK-NEXT:  andps    %xmm1, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm1
+; CHECK-NEXT:  orps     %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ule_f32
+; AVX:       vcmpnltss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ule float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ule_f64
+; CHECK:       cmpnltsd %xmm0, %xmm1
+; CHECK-NEXT:  andpd    %xmm1, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm1
+; CHECK-NEXT:  orpd     %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ule_f64
+; AVX:       vcmpnltsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp ule double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_une_f32
+; CHECK:       cmpneqss %xmm1, %xmm0
+; CHECK-NEXT:  andps    %xmm0, %xmm2
+; CHECK-NEXT:  andnps   %xmm3, %xmm0
+; CHECK-NEXT:  orps     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_une_f32
+; AVX:       vcmpneqss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandps    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnps   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorps     %xmm1, %xmm0, %xmm0
+  %1 = fcmp une float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_une_f64
+; CHECK:       cmpneqsd %xmm1, %xmm0
+; CHECK-NEXT:  andpd    %xmm0, %xmm2
+; CHECK-NEXT:  andnpd   %xmm3, %xmm0
+; CHECK-NEXT:  orpd     %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_une_f64
+; AVX:       vcmpneqsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:  vandpd    %xmm2, %xmm0, %xmm1
+; AVX-NEXT:  vandnpd   %xmm3, %xmm0, %xmm0
+; AVX-NEXT:  vorpd     %xmm1, %xmm0, %xmm0
+  %1 = fcmp une double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
diff --git a/test/CodeGen/X86/fast-isel-select.ll b/test/CodeGen/X86/fast-isel-select.ll
index 53158bc..7b3c99f 100644
--- a/test/CodeGen/X86/fast-isel-select.ll
+++ b/test/CodeGen/X86/fast-isel-select.ll
@@ -4,10 +4,10 @@
 ; lsb is zero.
 ; <rdar://problem/15651765>
 
-; CHECK-LABEL: fastisel_select: 
+; CHECK-LABEL: fastisel_select:
 ; CHECK: subb {{%[a-z0-9]+}}, [[RES:%[a-z0-9]+]]
 ; CHECK: testb $1, [[RES]]
-; CHECK: cmovel
+; CHECK: cmovnel %edi, %esi
 define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) {
   %shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766
   %counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0
diff --git a/test/CodeGen/X86/fast-isel-sse12-fptoint.ll b/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
new file mode 100644
index 0000000..769c987
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+
+define i32 @cvt_test1(float %a) {
+; SSE-LABEL: cvt_test1
+; SSE:       cvttss2si %xmm0, %eax
+; AVX-LABEL: cvt_test1
+; AVX:       vcvttss2si %xmm0, %eax
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3
+  %5 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %4)
+  ret i32 %5
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+define i64 @cvt_test2(float %a) {
+; SSE-LABEL: cvt_test2
+; SSE:       cvttss2si %xmm0, %rax
+; AVX-LABEL: cvt_test2
+; AVX:       vcvttss2si %xmm0, %rax
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3
+  %5 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %4)
+  ret i64 %5
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define i32 @cvt_test3(double %a) {
+; SSE-LABEL: cvt_test3
+; SSE:       cvttsd2si %xmm0, %eax
+; AVX-LABEL: cvt_test3
+; AVX:       vcvttsd2si %xmm0, %eax
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1
+  %3 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %2)
+  ret i32 %3
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+define i64 @cvt_test4(double %a) {
+; SSE-LABEL: cvt_test4
+; SSE:       cvttsd2si %xmm0, %rax
+; AVX-LABEL: cvt_test4
+; AVX:       vcvttsd2si %xmm0, %rax
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1
+  %3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %2)
+  ret i64 %3
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
index 4aeae7f..5de9700 100644
--- a/test/CodeGen/X86/float-asmprint.ll
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -16,8 +16,9 @@
 ; CHECK-NEXT: .size
 
 ; CHECK: varppc128:
-; CHECK-NEXT: .quad 0                         # ppc_fp128 -0
-; CHECK-NEXT: .quad -9223372036854775808
+; For ppc_fp128, the high double always comes first.
+; CHECK-NEXT: .quad -9223372036854775808      # ppc_fp128 -0
+; CHECK-NEXT: .quad 0
 ; CHECK-NEXT: .size
 
 ; CHECK: var80:
diff --git a/test/CodeGen/X86/frameaddr.ll b/test/CodeGen/X86/frameaddr.ll
new file mode 100644
index 0000000..6c1ca25
--- /dev/null
+++ b/test/CodeGen/X86/frameaddr.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86                                | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -march=x86    -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -march=x86-64                             | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
+
+define i8* @test1() nounwind {
+entry:
+; CHECK-32-LABEL: test1
+; CHECK-32:       push
+; CHECK-32-NEXT:  movl %esp, %ebp
+; CHECK-32-NEXT:  movl %ebp, %eax
+; CHECK-32-NEXT:  pop
+; CHECK-32-NEXT:  ret
+; CHECK-64-LABEL: test1
+; CHECK-64:       push
+; CHECK-64-NEXT:  movq %rsp, %rbp
+; CHECK-64-NEXT:  movq %rbp, %rax
+; CHECK-64-NEXT:  pop
+; CHECK-64-NEXT:  ret
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @test2() nounwind {
+entry:
+; CHECK-32-LABEL: test2
+; CHECK-32:       push
+; CHECK-32-NEXT:  movl %esp, %ebp
+; CHECK-32-NEXT:  movl (%ebp), %eax
+; CHECK-32-NEXT:  movl (%eax), %eax
+; CHECK-32-NEXT:  pop
+; CHECK-32-NEXT:  ret
+; CHECK-64-LABEL: test2
+; CHECK-64:       push
+; CHECK-64-NEXT:  movq %rsp, %rbp
+; CHECK-64-NEXT:  movq (%rbp), %rax
+; CHECK-64-NEXT:  movq (%rax), %rax
+; CHECK-64-NEXT:  pop
+; CHECK-64-NEXT:  ret
+  %0 = tail call i8* @llvm.frameaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index 8c328ec..a732eb1 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -13,14 +13,14 @@ define i32 @main() uwtable optsize ssp {
 ; APPLE: GCC_except_table0:
 ; APPLE: Lexception0:
 
-; MINGW64: .cfi_startproc
-; MINGW64: .cfi_personality 0, __gxx_personality_v0
-; MINGW64: .cfi_lsda 0, .Lexception0
-; MINGW64: .cfi_def_cfa_offset 16
+; MINGW64: .seh_proc
+; MINGW64: .seh_handler __gxx_personality_v0
+; MINGW64: .seh_setframe 5, 0
 ; MINGW64: callq _Unwind_Resume
-; MINGW64: .cfi_endproc
+; MINGW64: .seh_handlerdata
 ; MINGW64: GCC_except_table0:
 ; MINGW64: Lexception0:
+; MINGW64: .seh_endproc
 
 ; MINGW32: .cfi_startproc
 ; MINGW32: .cfi_personality 0, ___gxx_personality_v0
diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll
new file mode 100644
index 0000000..ff939a9
--- /dev/null
+++ b/test/CodeGen/X86/haddsub-2.ll
@@ -0,0 +1,802 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE3
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse3,+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+
+
+define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecext1 = extractelement <4 x float> %A, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %A, i32 2
+  %vecext3 = extractelement <4 x float> %A, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext6 = extractelement <4 x float> %B, i32 0
+  %vecext7 = extractelement <4 x float> %B, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+  %vecext10 = extractelement <4 x float> %B, i32 2
+  %vecext11 = extractelement <4 x float> %B, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: hadd_ps_test1
+; CHECK: haddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
+  %vecext = extractelement <4 x float> %A, i32 2
+  %vecext1 = extractelement <4 x float> %A, i32 3
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 1
+  %vecext2 = extractelement <4 x float> %A, i32 0
+  %vecext3 = extractelement <4 x float> %A, i32 1
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 0
+  %vecext6 = extractelement <4 x float> %B, i32 2
+  %vecext7 = extractelement <4 x float> %B, i32 3
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 3
+  %vecext10 = extractelement <4 x float> %B, i32 0
+  %vecext11 = extractelement <4 x float> %B, i32 1
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 2
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: hadd_ps_test2
+; CHECK: haddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecext1 = extractelement <4 x float> %A, i32 1
+  %sub = fsub float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %sub, i32 0
+  %vecext2 = extractelement <4 x float> %A, i32 2
+  %vecext3 = extractelement <4 x float> %A, i32 3
+  %sub4 = fsub float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 1
+  %vecext6 = extractelement <4 x float> %B, i32 0
+  %vecext7 = extractelement <4 x float> %B, i32 1
+  %sub8 = fsub float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 2
+  %vecext10 = extractelement <4 x float> %B, i32 2
+  %vecext11 = extractelement <4 x float> %B, i32 3
+  %sub12 = fsub float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: hsub_ps_test1
+; CHECK: hsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
+  %vecext = extractelement <4 x float> %A, i32 2
+  %vecext1 = extractelement <4 x float> %A, i32 3
+  %sub = fsub float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %sub, i32 1
+  %vecext2 = extractelement <4 x float> %A, i32 0
+  %vecext3 = extractelement <4 x float> %A, i32 1
+  %sub4 = fsub float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0
+  %vecext6 = extractelement <4 x float> %B, i32 2
+  %vecext7 = extractelement <4 x float> %B, i32 3
+  %sub8 = fsub float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3
+  %vecext10 = extractelement <4 x float> %B, i32 0
+  %vecext11 = extractelement <4 x float> %B, i32 1
+  %sub12 = fsub float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: hsub_ps_test2
+; CHECK: hsubps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
+  %vecext = extractelement <4 x i32> %A, i32 0
+  %vecext1 = extractelement <4 x i32> %A, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <4 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <4 x i32> %A, i32 2
+  %vecext3 = extractelement <4 x i32> %A, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 1
+  %vecext6 = extractelement <4 x i32> %B, i32 0
+  %vecext7 = extractelement <4 x i32> %B, i32 1
+  %add8 = add i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 2
+  %vecext10 = extractelement <4 x i32> %B, i32 2
+  %vecext11 = extractelement <4 x i32> %B, i32 3
+  %add12 = add i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 3
+  ret <4 x i32> %vecinit13
+}
+; CHECK-LABEL: phadd_d_test1
+; SSE3-NOT: phaddd
+; SSSE3: phaddd
+; AVX: vphaddd
+; AVX2 vphaddd
+; CHECK: ret
+
+
+define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
+  %vecext = extractelement <4 x i32> %A, i32 2
+  %vecext1 = extractelement <4 x i32> %A, i32 3
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <4 x i32> undef, i32 %add, i32 1
+  %vecext2 = extractelement <4 x i32> %A, i32 0
+  %vecext3 = extractelement <4 x i32> %A, i32 1
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 0
+  %vecext6 = extractelement <4 x i32> %B, i32 3
+  %vecext7 = extractelement <4 x i32> %B, i32 2
+  %add8 = add i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 3
+  %vecext10 = extractelement <4 x i32> %B, i32 1
+  %vecext11 = extractelement <4 x i32> %B, i32 0
+  %add12 = add i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 2
+  ret <4 x i32> %vecinit13
+}
+; CHECK-LABEL: phadd_d_test2
+; SSE3-NOT: phaddd
+; SSSE3: phaddd
+; AVX: vphaddd
+; AVX2 vphaddd
+; CHECK: ret
+
+
+define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
+  %vecext = extractelement <4 x i32> %A, i32 0
+  %vecext1 = extractelement <4 x i32> %A, i32 1
+  %sub = sub i32 %vecext, %vecext1
+  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0
+  %vecext2 = extractelement <4 x i32> %A, i32 2
+  %vecext3 = extractelement <4 x i32> %A, i32 3
+  %sub4 = sub i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1
+  %vecext6 = extractelement <4 x i32> %B, i32 0
+  %vecext7 = extractelement <4 x i32> %B, i32 1
+  %sub8 = sub i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2
+  %vecext10 = extractelement <4 x i32> %B, i32 2
+  %vecext11 = extractelement <4 x i32> %B, i32 3
+  %sub12 = sub i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3
+  ret <4 x i32> %vecinit13
+}
+; CHECK-LABEL: phsub_d_test1
+; SSE3-NOT: phsubd
+; SSSE3: phsubd
+; AVX: vphsubd
+; AVX2 vphsubd
+; CHECK: ret
+
+
+define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
+  %vecext = extractelement <4 x i32> %A, i32 2
+  %vecext1 = extractelement <4 x i32> %A, i32 3
+  %sub = sub i32 %vecext, %vecext1
+  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 1
+  %vecext2 = extractelement <4 x i32> %A, i32 0
+  %vecext3 = extractelement <4 x i32> %A, i32 1
+  %sub4 = sub i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 0
+  %vecext6 = extractelement <4 x i32> %B, i32 2
+  %vecext7 = extractelement <4 x i32> %B, i32 3
+  %sub8 = sub i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 3
+  %vecext10 = extractelement <4 x i32> %B, i32 0
+  %vecext11 = extractelement <4 x i32> %B, i32 1
+  %sub12 = sub i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 2
+  ret <4 x i32> %vecinit13
+}
+; CHECK-LABEL: phsub_d_test2
+; SSE3-NOT: phsubd
+; SSSE3: phsubd
+; AVX: vphsubd
+; AVX2 vphsubd
+; CHECK: ret
+
+
+define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
+  %vecext = extractelement <2 x double> %A, i32 0
+  %vecext1 = extractelement <2 x double> %A, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %add, i32 0
+  %vecext2 = extractelement <2 x double> %B, i32 0
+  %vecext3 = extractelement <2 x double> %B, i32 1
+  %add2 = fadd double %vecext2, %vecext3
+  %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: hadd_pd_test1
+; CHECK: haddpd
+; CHECK-NEXT: ret
+
+
+define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
+  %vecext = extractelement <2 x double> %A, i32 1
+  %vecext1 = extractelement <2 x double> %A, i32 0
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %add, i32 0
+  %vecext2 = extractelement <2 x double> %B, i32 1
+  %vecext3 = extractelement <2 x double> %B, i32 0
+  %add2 = fadd double %vecext2, %vecext3
+  %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: hadd_pd_test2
+; CHECK: haddpd
+; CHECK-NEXT: ret
+
+
+define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
+  %vecext = extractelement <2 x double> %A, i32 0
+  %vecext1 = extractelement <2 x double> %A, i32 1
+  %sub = fsub double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %sub, i32 0
+  %vecext2 = extractelement <2 x double> %B, i32 0
+  %vecext3 = extractelement <2 x double> %B, i32 1
+  %sub2 = fsub double %vecext2, %vecext3
+  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 1
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: hsub_pd_test1
+; CHECK: hsubpd
+; CHECK-NEXT: ret
+
+
+define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
+  %vecext = extractelement <2 x double> %B, i32 0
+  %vecext1 = extractelement <2 x double> %B, i32 1
+  %sub = fsub double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %sub, i32 1
+  %vecext2 = extractelement <2 x double> %A, i32 0
+  %vecext3 = extractelement <2 x double> %A, i32 1
+  %sub2 = fsub double %vecext2, %vecext3
+  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: hsub_pd_test2
+; CHECK: hsubpd
+; CHECK-NEXT: ret
+
+
+define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
+  %vecext = extractelement <4 x double> %A, i32 0
+  %vecext1 = extractelement <4 x double> %A, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> undef, double %add, i32 0
+  %vecext2 = extractelement <4 x double> %A, i32 2
+  %vecext3 = extractelement <4 x double> %A, i32 3
+  %add4 = fadd double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
+  %vecext6 = extractelement <4 x double> %B, i32 0
+  %vecext7 = extractelement <4 x double> %B, i32 1
+  %add8 = fadd double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
+  %vecext10 = extractelement <4 x double> %B, i32 2
+  %vecext11 = extractelement <4 x double> %B, i32 3
+  %add12 = fadd double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
+  ret <4 x double> %vecinit13
+}
+; CHECK-LABEL: avx_vhadd_pd_test
+; SSE3: haddpd
+; SSE3-NEXT: haddpd
+; SSSE3: haddpd
+; SSSE3: haddpd
+; AVX: vhaddpd
+; AVX: vhaddpd
+; AVX2: vhaddpd
+; AVX2: vhaddpd
+; CHECK: ret
+
+
+define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
+  %vecext = extractelement <4 x double> %A, i32 0
+  %vecext1 = extractelement <4 x double> %A, i32 1
+  %sub = fsub double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> undef, double %sub, i32 0
+  %vecext2 = extractelement <4 x double> %A, i32 2
+  %vecext3 = extractelement <4 x double> %A, i32 3
+  %sub4 = fsub double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1
+  %vecext6 = extractelement <4 x double> %B, i32 0
+  %vecext7 = extractelement <4 x double> %B, i32 1
+  %sub8 = fsub double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2
+  %vecext10 = extractelement <4 x double> %B, i32 2
+  %vecext11 = extractelement <4 x double> %B, i32 3
+  %sub12 = fsub double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3
+  ret <4 x double> %vecinit13
+}
+; CHECK-LABEL: avx_vhsub_pd_test
+; SSE3: hsubpd
+; SSE3-NEXT: hsubpd
+; SSSE3: hsubpd
+; SSSE3-NEXT: hsubpd
+; AVX: vhsubpd
+; AVX: vhsubpd
+; AVX2: vhsubpd
+; AVX2: vhsubpd
+; CHECK: ret
+
+
+define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
+  %vecext = extractelement <8 x i32> %A, i32 0
+  %vecext1 = extractelement <8 x i32> %A, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %A, i32 2
+  %vecext3 = extractelement <8 x i32> %A, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
+  %vecext6 = extractelement <8 x i32> %A, i32 4
+  %vecext7 = extractelement <8 x i32> %A, i32 5
+  %add8 = add i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2
+  %vecext10 = extractelement <8 x i32> %A, i32 6
+  %vecext11 = extractelement <8 x i32> %A, i32 7
+  %add12 = add i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3
+  %vecext14 = extractelement <8 x i32> %B, i32 0
+  %vecext15 = extractelement <8 x i32> %B, i32 1
+  %add16 = add i32 %vecext14, %vecext15
+  %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4
+  %vecext18 = extractelement <8 x i32> %B, i32 2
+  %vecext19 = extractelement <8 x i32> %B, i32 3
+  %add20 = add i32 %vecext18, %vecext19
+  %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5
+  %vecext22 = extractelement <8 x i32> %B, i32 4
+  %vecext23 = extractelement <8 x i32> %B, i32 5
+  %add24 = add i32 %vecext22, %vecext23
+  %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6
+  %vecext26 = extractelement <8 x i32> %B, i32 6
+  %vecext27 = extractelement <8 x i32> %B, i32 7
+  %add28 = add i32 %vecext26, %vecext27
+  %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7
+  ret <8 x i32> %vecinit29
+}
+; CHECK-LABEL: avx2_vphadd_d_test
+; SSE3-NOT: phaddd
+; SSSE3: phaddd
+; SSSE3-NEXT: phaddd
+; AVX: vphaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; AVX2: vphaddd
+; CHECK: ret
+
+define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
+  %vecext = extractelement <16 x i16> %a, i32 0
+  %vecext1 = extractelement <16 x i16> %a, i32 1
+  %add = add i16 %vecext, %vecext1
+  %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0
+  %vecext4 = extractelement <16 x i16> %a, i32 2
+  %vecext6 = extractelement <16 x i16> %a, i32 3
+  %add8 = add i16 %vecext4, %vecext6
+  %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1
+  %vecext11 = extractelement <16 x i16> %a, i32 4
+  %vecext13 = extractelement <16 x i16> %a, i32 5
+  %add15 = add i16 %vecext11, %vecext13
+  %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2
+  %vecext18 = extractelement <16 x i16> %a, i32 6
+  %vecext20 = extractelement <16 x i16> %a, i32 7
+  %add22 = add i16 %vecext18, %vecext20
+  %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3
+  %vecext25 = extractelement <16 x i16> %a, i32 8
+  %vecext27 = extractelement <16 x i16> %a, i32 9
+  %add29 = add i16 %vecext25, %vecext27
+  %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 4
+  %vecext32 = extractelement <16 x i16> %a, i32 10
+  %vecext34 = extractelement <16 x i16> %a, i32 11
+  %add36 = add i16 %vecext32, %vecext34
+  %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 5
+  %vecext39 = extractelement <16 x i16> %a, i32 12
+  %vecext41 = extractelement <16 x i16> %a, i32 13
+  %add43 = add i16 %vecext39, %vecext41
+  %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 6
+  %vecext46 = extractelement <16 x i16> %a, i32 14
+  %vecext48 = extractelement <16 x i16> %a, i32 15
+  %add50 = add i16 %vecext46, %vecext48
+  %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 7
+  %vecext53 = extractelement <16 x i16> %b, i32 0
+  %vecext55 = extractelement <16 x i16> %b, i32 1
+  %add57 = add i16 %vecext53, %vecext55
+  %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 8
+  %vecext60 = extractelement <16 x i16> %b, i32 2
+  %vecext62 = extractelement <16 x i16> %b, i32 3
+  %add64 = add i16 %vecext60, %vecext62
+  %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 9
+  %vecext67 = extractelement <16 x i16> %b, i32 4
+  %vecext69 = extractelement <16 x i16> %b, i32 5
+  %add71 = add i16 %vecext67, %vecext69
+  %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 10
+  %vecext74 = extractelement <16 x i16> %b, i32 6
+  %vecext76 = extractelement <16 x i16> %b, i32 7
+  %add78 = add i16 %vecext74, %vecext76
+  %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 11
+  %vecext81 = extractelement <16 x i16> %b, i32 8
+  %vecext83 = extractelement <16 x i16> %b, i32 9
+  %add85 = add i16 %vecext81, %vecext83
+  %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12
+  %vecext88 = extractelement <16 x i16> %b, i32 10
+  %vecext90 = extractelement <16 x i16> %b, i32 11
+  %add92 = add i16 %vecext88, %vecext90
+  %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13
+  %vecext95 = extractelement <16 x i16> %b, i32 12
+  %vecext97 = extractelement <16 x i16> %b, i32 13
+  %add99 = add i16 %vecext95, %vecext97
+  %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14
+  %vecext102 = extractelement <16 x i16> %b, i32 14
+  %vecext104 = extractelement <16 x i16> %b, i32 15
+  %add106 = add i16 %vecext102, %vecext104
+  %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15
+  ret <16 x i16> %vecinit108
+}
+; CHECK-LABEL: avx2_vphadd_w_test
+; SSE3-NOT: phaddw
+; SSSE3: phaddw
+; SSSE3-NEXT: phaddw
+; AVX: vphaddw
+; AVX: vphaddw
+; AVX2: vphaddw
+; AVX2: vphaddw
+; CHECK: ret
+
+
+; Verify that we don't select horizontal subs in the following functions.
+
+define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
+  %vecext = extractelement <4 x i32> %A, i32 0
+  %vecext1 = extractelement <4 x i32> %A, i32 1
+  %sub = sub i32 %vecext, %vecext1
+  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0
+  %vecext2 = extractelement <4 x i32> %A, i32 2
+  %vecext3 = extractelement <4 x i32> %A, i32 3
+  %sub4 = sub i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1
+  %vecext6 = extractelement <4 x i32> %B, i32 1
+  %vecext7 = extractelement <4 x i32> %B, i32 0
+  %sub8 = sub i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2
+  %vecext10 = extractelement <4 x i32> %B, i32 3
+  %vecext11 = extractelement <4 x i32> %B, i32 2
+  %sub12 = sub i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3
+  ret <4 x i32> %vecinit13
+}
+; CHECK-LABEL: not_a_hsub_1
+; CHECK-NOT: phsubd
+; CHECK: ret
+
+
+define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
+  %vecext = extractelement <4 x float> %A, i32 2
+  %vecext1 = extractelement <4 x float> %A, i32 3
+  %sub = fsub float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %sub, i32 1
+  %vecext2 = extractelement <4 x float> %A, i32 0
+  %vecext3 = extractelement <4 x float> %A, i32 1
+  %sub4 = fsub float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0
+  %vecext6 = extractelement <4 x float> %B, i32 3
+  %vecext7 = extractelement <4 x float> %B, i32 2
+  %sub8 = fsub float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3
+  %vecext10 = extractelement <4 x float> %B, i32 0
+  %vecext11 = extractelement <4 x float> %B, i32 1
+  %sub12 = fsub float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: not_a_hsub_2
+; CHECK-NOT: hsubps
+; CHECK: ret
+
+
+define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) {
+  %vecext = extractelement <2 x double> %B, i32 0
+  %vecext1 = extractelement <2 x double> %B, i32 1
+  %sub = fsub double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %sub, i32 1
+  %vecext2 = extractelement <2 x double> %A, i32 1
+  %vecext3 = extractelement <2 x double> %A, i32 0
+  %sub2 = fsub double %vecext2, %vecext3
+  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: not_a_hsub_3
+; CHECK-NOT: hsubpd
+; CHECK: ret
+
+
+; Test AVX horizontal add/sub of packed single/double precision
+; floating point values from 256-bit vectors.
+
+define <8 x float> @avx_vhadd_ps(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
+  %vecext6 = extractelement <8 x float> %b, i32 0
+  %vecext7 = extractelement <8 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <8 x float> %vecinit5, float %add8, i32 2
+  %vecext10 = extractelement <8 x float> %b, i32 2
+  %vecext11 = extractelement <8 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <8 x float> %vecinit9, float %add12, i32 3
+  %vecext14 = extractelement <8 x float> %a, i32 4
+  %vecext15 = extractelement <8 x float> %a, i32 5
+  %add16 = fadd float %vecext14, %vecext15
+  %vecinit17 = insertelement <8 x float> %vecinit13, float %add16, i32 4
+  %vecext18 = extractelement <8 x float> %a, i32 6
+  %vecext19 = extractelement <8 x float> %a, i32 7
+  %add20 = fadd float %vecext18, %vecext19
+  %vecinit21 = insertelement <8 x float> %vecinit17, float %add20, i32 5
+  %vecext22 = extractelement <8 x float> %b, i32 4
+  %vecext23 = extractelement <8 x float> %b, i32 5
+  %add24 = fadd float %vecext22, %vecext23
+  %vecinit25 = insertelement <8 x float> %vecinit21, float %add24, i32 6
+  %vecext26 = extractelement <8 x float> %b, i32 6
+  %vecext27 = extractelement <8 x float> %b, i32 7
+  %add28 = fadd float %vecext26, %vecext27
+  %vecinit29 = insertelement <8 x float> %vecinit25, float %add28, i32 7
+  ret <8 x float> %vecinit29
+}
+; CHECK-LABEL: avx_vhadd_ps
+; SSE3: haddps
+; SSE3-NEXT: haddps
+; SSSE3: haddps
+; SSSE3-NEXT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK: ret
+
+
+define <8 x float> @avx_vhsub_ps(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %sub = fsub float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %sub, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %sub4 = fsub float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %sub4, i32 1
+  %vecext6 = extractelement <8 x float> %b, i32 0
+  %vecext7 = extractelement <8 x float> %b, i32 1
+  %sub8 = fsub float %vecext6, %vecext7
+  %vecinit9 = insertelement <8 x float> %vecinit5, float %sub8, i32 2
+  %vecext10 = extractelement <8 x float> %b, i32 2
+  %vecext11 = extractelement <8 x float> %b, i32 3
+  %sub12 = fsub float %vecext10, %vecext11
+  %vecinit13 = insertelement <8 x float> %vecinit9, float %sub12, i32 3
+  %vecext14 = extractelement <8 x float> %a, i32 4
+  %vecext15 = extractelement <8 x float> %a, i32 5
+  %sub16 = fsub float %vecext14, %vecext15
+  %vecinit17 = insertelement <8 x float> %vecinit13, float %sub16, i32 4
+  %vecext18 = extractelement <8 x float> %a, i32 6
+  %vecext19 = extractelement <8 x float> %a, i32 7
+  %sub20 = fsub float %vecext18, %vecext19
+  %vecinit21 = insertelement <8 x float> %vecinit17, float %sub20, i32 5
+  %vecext22 = extractelement <8 x float> %b, i32 4
+  %vecext23 = extractelement <8 x float> %b, i32 5
+  %sub24 = fsub float %vecext22, %vecext23
+  %vecinit25 = insertelement <8 x float> %vecinit21, float %sub24, i32 6
+  %vecext26 = extractelement <8 x float> %b, i32 6
+  %vecext27 = extractelement <8 x float> %b, i32 7
+  %sub28 = fsub float %vecext26, %vecext27
+  %vecinit29 = insertelement <8 x float> %vecinit25, float %sub28, i32 7
+  ret <8 x float> %vecinit29
+}
+; CHECK-LABEL: avx_vhsub_ps
+; SSE3: hsubps
+; SSE3-NEXT: hsubps
+; SSSE3: hsubps
+; SSSE3-NEXT: hsubps
+; AVX: vhsubps
+; AVX2: vhsubps
+; CHECK: ret
+
+
+define <4 x double> @avx_hadd_pd(<4 x double> %a, <4 x double> %b) {
+  %vecext = extractelement <4 x double> %a, i32 0
+  %vecext1 = extractelement <4 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> undef, double %add, i32 0
+  %vecext2 = extractelement <4 x double> %b, i32 0
+  %vecext3 = extractelement <4 x double> %b, i32 1
+  %add4 = fadd double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
+  %vecext6 = extractelement <4 x double> %a, i32 2
+  %vecext7 = extractelement <4 x double> %a, i32 3
+  %add8 = fadd double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
+  %vecext10 = extractelement <4 x double> %b, i32 2
+  %vecext11 = extractelement <4 x double> %b, i32 3
+  %add12 = fadd double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
+  ret <4 x double> %vecinit13
+}
+; CHECK-LABEL: avx_hadd_pd
+; SSE3: haddpd
+; SSE3-NEXT: haddpd
+; SSSE3: haddpd
+; SSSE3-NEXT: haddpd
+; AVX: vhaddpd
+; AVX2: vhaddpd
+; CHECK: ret
+
+
+define <4 x double> @avx_hsub_pd(<4 x double> %a, <4 x double> %b) {
+  %vecext = extractelement <4 x double> %a, i32 0
+  %vecext1 = extractelement <4 x double> %a, i32 1
+  %sub = fsub double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> undef, double %sub, i32 0
+  %vecext2 = extractelement <4 x double> %b, i32 0
+  %vecext3 = extractelement <4 x double> %b, i32 1
+  %sub4 = fsub double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1
+  %vecext6 = extractelement <4 x double> %a, i32 2
+  %vecext7 = extractelement <4 x double> %a, i32 3
+  %sub8 = fsub double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2
+  %vecext10 = extractelement <4 x double> %b, i32 2
+  %vecext11 = extractelement <4 x double> %b, i32 3
+  %sub12 = fsub double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3
+  ret <4 x double> %vecinit13
+}
+; CHECK-LABEL: avx_hsub_pd
+; SSE3: hsubpd
+; SSE3-NEXT: hsubpd
+; SSSE3: hsubpd
+; SSSE3-NEXT: hsubpd
+; AVX: vhsubpd
+; AVX2: vhsubpd
+; CHECK: ret
+
+
+; Test AVX2 horizontal add of packed integer values from 256-bit vectors.
+
+define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
+  %vecext6 = extractelement <8 x i32> %b, i32 0
+  %vecext7 = extractelement <8 x i32> %b, i32 1
+  %add8 = add i32 %vecext6, %vecext7
+  %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2
+  %vecext10 = extractelement <8 x i32> %b, i32 2
+  %vecext11 = extractelement <8 x i32> %b, i32 3
+  %add12 = add i32 %vecext10, %vecext11
+  %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3
+  %vecext14 = extractelement <8 x i32> %a, i32 4
+  %vecext15 = extractelement <8 x i32> %a, i32 5
+  %add16 = add i32 %vecext14, %vecext15
+  %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4
+  %vecext18 = extractelement <8 x i32> %a, i32 6
+  %vecext19 = extractelement <8 x i32> %a, i32 7
+  %add20 = add i32 %vecext18, %vecext19
+  %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5
+  %vecext22 = extractelement <8 x i32> %b, i32 4
+  %vecext23 = extractelement <8 x i32> %b, i32 5
+  %add24 = add i32 %vecext22, %vecext23
+  %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6
+  %vecext26 = extractelement <8 x i32> %b, i32 6
+  %vecext27 = extractelement <8 x i32> %b, i32 7
+  %add28 = add i32 %vecext26, %vecext27
+  %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7
+  ret <8 x i32> %vecinit29
+}
+; CHECK-LABEL: avx2_hadd_d
+; SSE3-NOT: phaddd
+; SSSE3: phaddd
+; SSSE3-NEXT: phaddd
+; AVX: vphaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; AVX2-NOT: vphaddd
+; CHECK: ret
+
+
+define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
+  %vecext = extractelement <16 x i16> %a, i32 0
+  %vecext1 = extractelement <16 x i16> %a, i32 1
+  %add = add i16 %vecext, %vecext1
+  %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0
+  %vecext4 = extractelement <16 x i16> %a, i32 2
+  %vecext6 = extractelement <16 x i16> %a, i32 3
+  %add8 = add i16 %vecext4, %vecext6
+  %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1
+  %vecext11 = extractelement <16 x i16> %a, i32 4
+  %vecext13 = extractelement <16 x i16> %a, i32 5
+  %add15 = add i16 %vecext11, %vecext13
+  %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2
+  %vecext18 = extractelement <16 x i16> %a, i32 6
+  %vecext20 = extractelement <16 x i16> %a, i32 7
+  %add22 = add i16 %vecext18, %vecext20
+  %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3
+  %vecext25 = extractelement <16 x i16> %a, i32 8
+  %vecext27 = extractelement <16 x i16> %a, i32 9
+  %add29 = add i16 %vecext25, %vecext27
+  %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 8
+  %vecext32 = extractelement <16 x i16> %a, i32 10
+  %vecext34 = extractelement <16 x i16> %a, i32 11
+  %add36 = add i16 %vecext32, %vecext34
+  %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 9
+  %vecext39 = extractelement <16 x i16> %a, i32 12
+  %vecext41 = extractelement <16 x i16> %a, i32 13
+  %add43 = add i16 %vecext39, %vecext41
+  %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 10
+  %vecext46 = extractelement <16 x i16> %a, i32 14
+  %vecext48 = extractelement <16 x i16> %a, i32 15
+  %add50 = add i16 %vecext46, %vecext48
+  %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 11
+  %vecext53 = extractelement <16 x i16> %b, i32 0
+  %vecext55 = extractelement <16 x i16> %b, i32 1
+  %add57 = add i16 %vecext53, %vecext55
+  %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 4
+  %vecext60 = extractelement <16 x i16> %b, i32 2
+  %vecext62 = extractelement <16 x i16> %b, i32 3
+  %add64 = add i16 %vecext60, %vecext62
+  %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 5
+  %vecext67 = extractelement <16 x i16> %b, i32 4
+  %vecext69 = extractelement <16 x i16> %b, i32 5
+  %add71 = add i16 %vecext67, %vecext69
+  %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 6
+  %vecext74 = extractelement <16 x i16> %b, i32 6
+  %vecext76 = extractelement <16 x i16> %b, i32 7
+  %add78 = add i16 %vecext74, %vecext76
+  %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 7
+  %vecext81 = extractelement <16 x i16> %b, i32 8
+  %vecext83 = extractelement <16 x i16> %b, i32 9
+  %add85 = add i16 %vecext81, %vecext83
+  %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12
+  %vecext88 = extractelement <16 x i16> %b, i32 10
+  %vecext90 = extractelement <16 x i16> %b, i32 11
+  %add92 = add i16 %vecext88, %vecext90
+  %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13
+  %vecext95 = extractelement <16 x i16> %b, i32 12
+  %vecext97 = extractelement <16 x i16> %b, i32 13
+  %add99 = add i16 %vecext95, %vecext97
+  %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14
+  %vecext102 = extractelement <16 x i16> %b, i32 14
+  %vecext104 = extractelement <16 x i16> %b, i32 15
+  %add106 = add i16 %vecext102, %vecext104
+  %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15
+  ret <16 x i16> %vecinit108
+}
+; CHECK-LABEL: avx2_hadd_w
+; SSE3-NOT: phaddw
+; SSSE3: phaddw
+; SSSE3-NEXT: phaddw
+; AVX: vphaddw
+; AVX: vphaddw
+; AVX2: vphaddw
+; AVX2-NOT: vphaddw
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
new file mode 100644
index 0000000..954a9d9
--- /dev/null
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -0,0 +1,325 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
+
+define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test1_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test2_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+  ret <4 x float> %vecinit9
+}
+; CHECK-LABEL: test3_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  ret <4 x float> %vecinit
+}
+; CHECK-LABEL: test4_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
+  %vecext = extractelement <2 x double> %a, i32 0
+  %vecext1 = extractelement <2 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %add, i32 0
+  ret <2 x double> %vecinit
+}
+; CHECK-LABEL: test5_undef
+; CHECK-NOT: haddpd
+; CHECK: ret
+
+
+define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test6_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %b, i32 0
+  %vecext1 = extractelement <4 x float> %b, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 2
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test7_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test8_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test9_undef
+; CHECK: haddps
+; CHECK-NEXT: ret
+
+define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 2
+  %vecext3 = extractelement <8 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test10_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 4
+  %vecext3 = extractelement <8 x float> %b, i32 5
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test11_undef
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK: ret
+
+define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test12_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add1 = fadd float %vecext, %vecext1
+  %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add2 = fadd float %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1
+  %vecext4 = extractelement <8 x float> %a, i32 4
+  %vecext5 = extractelement <8 x float> %a, i32 5
+  %add3 = fadd float %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2
+  %vecext6 = extractelement <8 x float> %a, i32 6
+  %vecext7 = extractelement <8 x float> %a, i32 7
+  %add4 = fadd float %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3
+  ret <8 x float> %vecinit4
+}
+; CHECK-LABEL: test13_undef
+; SSE: haddps
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 2
+  %vecext3 = extractelement <8 x i32> %b, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test14_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: phaddd
+; CHECK: ret
+
+; On AVX2, the following sequence can be folded into a single horizontal add.
+; If the Subtarget doesn't support AVX2, then we avoid emitting two packed 
+; integer horizontal adds instead of two scalar adds followed by vector inserts.
+define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 4
+  %vecext3 = extractelement <8 x i32> %b, i32 5
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test15_undef
+; SSE-NOT: phaddd
+; AVX-NOT: vphaddd
+; AVX2: vphaddd
+; CHECK: ret
+
+define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test16_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add1 = add i32 %vecext, %vecext1
+  %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add2 = add i32 %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1
+  %vecext4 = extractelement <8 x i32> %a, i32 4
+  %vecext5 = extractelement <8 x i32> %a, i32 5
+  %add3 = add i32 %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2
+  %vecext6 = extractelement <8 x i32> %a, i32 6
+  %vecext7 = extractelement <8 x i32> %a, i32 7
+  %add4 = add i32 %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3
+  ret <8 x i32> %vecinit4
+}
+; CHECK-LABEL: test17_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/i8-umulo.ll b/test/CodeGen/X86/i8-umulo.ll
new file mode 100644
index 0000000..ba846f3
--- /dev/null
+++ b/test/CodeGen/X86/i8-umulo.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
+; PR19858
+
+declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+define i8 @testumulo(i32 %argc) {
+; CHECK: imulw
+; CHECK: testb %{{.+}}, %{{.+}}
+; CHECK: je [[NOOVERFLOWLABEL:.+]]
+; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:
+; CHECK-NEXT: movb
+; CHECK-NEXT: retl
+top:
+  %RHS = trunc i32 %argc to i8
+  %umul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 25, i8 %RHS)
+  %ex = extractvalue { i8, i1 } %umul, 1
+  br i1 %ex, label %overflow, label %nooverlow
+
+overflow:
+  ret i8 %RHS
+
+nooverlow:
+  %umul.value = extractvalue { i8, i1 } %umul, 0
+  ret i8 %umul.value
+}
diff --git a/test/CodeGen/X86/jump_table_alias.ll b/test/CodeGen/X86/jump_table_alias.ll
new file mode 100644
index 0000000..f3691fd
--- /dev/null
+++ b/test/CodeGen/X86/jump_table_alias.ll
@@ -0,0 +1,33 @@
+; RUN: llc <%s -jump-table-type=single | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @f() unnamed_addr jumptable {
+entry:
+  ret i32 0
+}
+
+@i = alias internal i32 ()* @f
+@j = alias i32 ()* @f
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %temp = alloca i32 ()*, align 8
+  store i32 ()* @i, i32()** %temp, align 8
+; CHECK: movq    $__llvm_jump_instr_table_0_1
+  %1 = load i32 ()** %temp, align 8
+; CHECK: movl    $__llvm_jump_instr_table_0_1
+  %2 = call i32 ()* %1()
+  %3 = call i32 ()* @i()
+; CHECK: callq   i
+  %4 = call i32 ()* @j()
+; CHECK: callq   j
+  ret i32 %3
+}
+
+; There should only be one table, even though there are two GlobalAliases,
+; because they both alias the same value.
+
+; CHECK:         .globl  __llvm_jump_instr_table_0_1
+; CHECK:         .align  8, 0x90
+; CHECK:         .type   __llvm_jump_instr_table_0_1,@function
+; CHECK: __llvm_jump_instr_table_0_1:
+; CHECK:         jmp     f@PLT
+
diff --git a/test/CodeGen/X86/jump_table_bitcast.ll b/test/CodeGen/X86/jump_table_bitcast.ll
new file mode 100644
index 0000000..33a798f
--- /dev/null
+++ b/test/CodeGen/X86/jump_table_bitcast.ll
@@ -0,0 +1,46 @@
+; RUN: llc <%s -jump-table-type=single | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @g(i8* %a) unnamed_addr jumptable {
+  ret i32 0
+}
+
+define void @h(void ()* %func) unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @main() {
+  %g = alloca i32 (...)*, align 8
+  store i32 (...)* bitcast (i32 ()* @f to i32 (...)*), i32 (...)** %g, align 8
+; CHECK: movq    $__llvm_jump_instr_table_0_[[ENTRY:1|2|3]], (%rsp)
+; CHECK: movl    $__llvm_jump_instr_table_0_[[ENTRY]], %ecx
+  %1 = load i32 (...)** %g, align 8
+  %call = call i32 (...)* %1()
+  call void (void ()*)* @h(void ()* bitcast (void (void ()*)* @h to void ()*))
+; CHECK: movl    $__llvm_jump_instr_table_0_{{1|2|3}}, %edi
+; CHECK: callq   h
+
+  %a = call i32 (i32*)* bitcast (i32 (i8*)* @g to i32(i32*)*)(i32* null)
+; CHECK: callq g
+  ret i32 %a
+}
+
+; CHECK:         .globl  __llvm_jump_instr_table_0_1
+; CHECK:         .align  8, 0x90
+; CHECK:         .type   __llvm_jump_instr_table_0_1,@function
+; CHECK: __llvm_jump_instr_table_0_1:
+; CHECK:         jmp     {{f|g|h}}@PLT
+; CHECK:         .globl  __llvm_jump_instr_table_0_2
+; CHECK:         .align  8, 0x90
+; CHECK:         .type   __llvm_jump_instr_table_0_2,@function
+; CHECK: __llvm_jump_instr_table_0_2:
+; CHECK:         jmp     {{f|g|h}}@PLT
+; CHECK:         .globl  __llvm_jump_instr_table_0_3
+; CHECK:         .align  8, 0x90
+; CHECK:         .type   __llvm_jump_instr_table_0_3,@function
+; CHECK: __llvm_jump_instr_table_0_3:
+; CHECK:         jmp     {{f|g|h}}@PLT
+
diff --git a/test/CodeGen/X86/jump_tables.ll b/test/CodeGen/X86/jump_tables.ll
new file mode 100644
index 0000000..5a0aed0
--- /dev/null
+++ b/test/CodeGen/X86/jump_tables.ll
@@ -0,0 +1,272 @@
+; RUN: llc <%s -jump-table-type=single | FileCheck --check-prefix=SINGLE %s
+; RUN: llc <%s -jump-table-type=arity | FileCheck --check-prefix=ARITY %s
+; RUN: llc <%s -jump-table-type=simplified | FileCheck --check-prefix=SIMPL %s
+; RUN: llc <%s -jump-table-type=full | FileCheck --check-prefix=FULL %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.fun_struct = type { i32 (...)* }
+
+define void @indirect_fun() unnamed_addr jumptable {
+  ret void
+}
+
+define void @indirect_fun_match() unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @indirect_fun_i32() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @indirect_fun_i32_1(i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32 @indirect_fun_i32_2(i32 %a, i32 %b) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32* @indirect_fun_i32S_2(i32* %a, i32 %b) unnamed_addr jumptable {
+  ret i32* %a
+}
+
+define void @indirect_fun_struct(%struct.fun_struct %fs) unnamed_addr jumptable {
+  ret void
+}
+
+define void @indirect_fun_fun(i32 (...)* %fun, i32 %a) unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @indirect_fun_fun_ret(i32 (...)* %fun, i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define void @indirect_fun_array([19 x i8] %a) unnamed_addr jumptable {
+  ret void
+}
+
+define void @indirect_fun_vec(<3 x i32> %a) unnamed_addr jumptable {
+  ret void
+}
+
+define void @indirect_fun_vec_2(<4 x float> %a) unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @m(void ()* %fun) {
+  call void ()* %fun()
+  ret i32 0
+}
+
+define void ()* @get_fun() {
+  ret void ()* @indirect_fun
+; SINGLE: movl    $__llvm_jump_instr_table_0_
+; ARITY: movl    $__llvm_jump_instr_table_
+; SIMPL: movl    $__llvm_jump_instr_table_
+; FULL: movl    $__llvm_jump_instr_table_
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %f = call void ()* ()* @get_fun()
+  %a = call i32 @m(void ()* %f)
+  ret i32 %a
+}
+
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_1
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_1,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_1:
+; SINGLE-DAG:         jmp     indirect_fun_array@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_2
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_2,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_2:
+; SINGLE-DAG:         jmp     indirect_fun_i32_2@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_3
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_3,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_3:
+; SINGLE-DAG:         jmp     indirect_fun_vec_2@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_4
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_4,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_4:
+; SINGLE-DAG:         jmp     indirect_fun_i32S_2@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_5
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_5,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_5:
+; SINGLE-DAG:         jmp     indirect_fun_struct@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_6
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_6,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_6:
+; SINGLE-DAG:         jmp     indirect_fun_i32_1@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_7
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_7,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_7:
+; SINGLE-DAG:         jmp     indirect_fun_i32@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_8
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_8,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_8:
+; SINGLE-DAG:         jmp     indirect_fun_fun@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_9
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_9,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_9:
+; SINGLE-DAG:         jmp     indirect_fun_fun_ret@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_10
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_10,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_10:
+; SINGLE-DAG:         jmp     indirect_fun@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_11
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_11,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_11:
+; SINGLE-DAG:         jmp     indirect_fun_match@PLT
+; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_12
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_12,@function
+; SINGLE-DAG: __llvm_jump_instr_table_0_12:
+; SINGLE-DAG:         jmp     indirect_fun_vec@PLT
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         ud2
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         ud2
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         ud2
+; SINGLE-DAG:         .align  8, 0x90
+; SINGLE-DAG:         ud2
+
+
+; ARITY-DAG:         .globl  __llvm_jump_instr_table_2_1
+; ARITY-DAG:         .align  8, 0x90
+; ARITY-DAG:         .type   __llvm_jump_instr_table_2_1,@function
+; ARITY-DAG: __llvm_jump_instr_table_2_1:
+; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
+; ARITY-DAG:         .align  8, 0x90
+; ARITY-DAG:         ud2
+; ARITY-DAG:         .globl  __llvm_jump_instr_table_0_1
+; ARITY-DAG:         .align  8, 0x90
+; ARITY-DAG:         .type   __llvm_jump_instr_table_0_1,@function
+; ARITY-DAG: __llvm_jump_instr_table_0_1:
+; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
+; ARITY-DAG:         .globl  __llvm_jump_instr_table_1_1
+; ARITY-DAG:         .align  8, 0x90
+; ARITY-DAG:         .type   __llvm_jump_instr_table_1_1,@function
+; ARITY-DAG: __llvm_jump_instr_table_1_1:
+; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
+
+; SIMPL-DAG:         .globl  __llvm_jump_instr_table_2_1
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         .type   __llvm_jump_instr_table_2_1,@function
+; SIMPL-DAG: __llvm_jump_instr_table_2_1:
+; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         ud2
+; SIMPL-DAG:         .globl  __llvm_jump_instr_table_0_1
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         .type   __llvm_jump_instr_table_0_1,@function
+; SIMPL-DAG: __llvm_jump_instr_table_0_1:
+; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
+; SIMPL-DAG:         .globl  __llvm_jump_instr_table_1_1
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         .type   __llvm_jump_instr_table_1_1,@function
+; SIMPL-DAG: __llvm_jump_instr_table_1_1:
+; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
+; SIMPL-DAG:         .globl  __llvm_jump_instr_table_3_1
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         .type   __llvm_jump_instr_table_3_1,@function
+; SIMPL-DAG: __llvm_jump_instr_table_3_1:
+; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
+; SIMPL-DAG:         .globl  __llvm_jump_instr_table_4_1
+; SIMPL-DAG:         .align  8, 0x90
+; SIMPL-DAG:         .type   __llvm_jump_instr_table_4_1,@function
+; SIMPL-DAG: __llvm_jump_instr_table_4_1:
+; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
+
+
+; FULL-DAG:        .globl  __llvm_jump_instr_table_10_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_10_1,@function
+; FULL-DAG:__llvm_jump_instr_table_10_1:
+; FULL-DAG:        jmp     indirect_fun_i32_1@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_9_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_9_1,@function
+; FULL-DAG:__llvm_jump_instr_table_9_1:
+; FULL-DAG:        jmp     indirect_fun_i32_2@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_7_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_7_1,@function
+; FULL-DAG:__llvm_jump_instr_table_7_1:
+; FULL-DAG:        jmp     indirect_fun_i32S_2@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_3_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_3_1,@function
+; FULL-DAG:__llvm_jump_instr_table_3_1:
+; FULL-DAG:        jmp     indirect_fun_vec_2@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_2_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_2_1,@function
+; FULL-DAG:__llvm_jump_instr_table_2_1:
+; FULL-DAG:        jmp     indirect_fun@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_8_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_8_1,@function
+; FULL-DAG:__llvm_jump_instr_table_8_1:
+; FULL-DAG:        jmp     indirect_fun_i32@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_1_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_1_1,@function
+; FULL-DAG:__llvm_jump_instr_table_1_1:
+; FULL-DAG:        jmp     indirect_fun_array@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_0_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_0_1,@function
+; FULL-DAG:__llvm_jump_instr_table_0_1:
+; FULL-DAG:        jmp     indirect_fun_vec@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_6_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_6_1,@function
+; FULL-DAG:__llvm_jump_instr_table_6_1:
+; FULL-DAG:        jmp     indirect_fun_struct@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_5_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_5_1,@function
+; FULL-DAG:__llvm_jump_instr_table_5_1:
+; FULL-DAG:        jmp     indirect_fun_fun@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
+; FULL-DAG:        .globl  __llvm_jump_instr_table_4_1
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        .type   __llvm_jump_instr_table_4_1,@function
+; FULL-DAG:__llvm_jump_instr_table_4_1:
+; FULL-DAG:        jmp     indirect_fun_fun_ret@PLT
+; FULL-DAG:        .align  8, 0x90
+; FULL-DAG:        ud2
diff --git a/test/CodeGen/X86/libcall-sret.ll b/test/CodeGen/X86/libcall-sret.ll
new file mode 100644
index 0000000..67b99ac
--- /dev/null
+++ b/test/CodeGen/X86/libcall-sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=i686-linux-gnu -o - %s | FileCheck %s
+
+@var = global i128 0
+
+; We were trying to convert the i128 operation into a libcall, but failing to
+; perform sret demotion when we couldn't return the result in registers. Make
+; sure we marshal the return properly:
+
+define void @test_sret_libcall(i128 %l, i128 %r) {
+; CHECK-LABEL: test_sret_libcall:
+
+  ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical
+  ; (aligned) place for the actual sret data is %esp + 40.
+; CHECK: leal 40(%esp), [[SRET_ADDR:%[a-z]+]]
+; CHECK: movl [[SRET_ADDR]], (%esp)
+; CHECK: calll __multi3
+; CHECK-DAG: movl 40(%esp), [[RES0:%[a-z]+]]
+; CHECK-DAG: movl 44(%esp), [[RES1:%[a-z]+]]
+; CHECK-DAG: movl 48(%esp), [[RES2:%[a-z]+]]
+; CHECK-DAG: movl 52(%esp), [[RES3:%[a-z]+]]
+; CHECK-DAG: movl [[RES0]], var
+; CHECK-DAG: movl [[RES1]], var+4
+; CHECK-DAG: movl [[RES2]], var+8
+; CHECK-DAG: movl [[RES3]], var+12
+  %prod = mul i128 %l, %r
+  store i128 %prod, i128* @var
+  ret void
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 3d91b03..8ed58f1 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -6,7 +6,6 @@
 # cleanly.
 config.suffixes = ['.ll', '.test', '.txt']
 
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll
index b9b29a5..f47161e 100644
--- a/test/CodeGen/X86/lower-bitcast.ll
+++ b/test/CodeGen/X86/lower-bitcast.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
 
 define double @test1(double %A) {
@@ -9,14 +10,19 @@ define double @test1(double %A) {
 }
 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
 ; single paddd instruction. At the moment we produce the sequence 
-; pshufd+paddq+pshufd.
-
+; pshufd+paddq+pshufd. This is fixed with the widening legalization.
+;
 ; CHECK-LABEL: test1
 ; CHECK-NOT: movsd
 ; CHECK: pshufd
-; CHECK-NEXT: paddq
+; CHECK-NEXT: paddd
 ; CHECK-NEXT: pshufd
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test1
+; CHECK-WIDE-NOT: movsd
+; CHECK-WIDE: paddd
+; CHECK-WIDE-NEXT: ret
 
 
 define double @test2(double %A, double %B) {
@@ -26,17 +32,15 @@ define double @test2(double %A, double %B) {
   %3 = bitcast <2 x i32> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test2 into a
-; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddq+pshufd.
-
 ; CHECK-LABEL: test2
 ; CHECK-NOT: movsd
-; CHECK: pshufd
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: paddq
-; CHECK-NEXT: pshufd
+; CHECK: paddd
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test2
+; CHECK-WIDE-NOT: movsd
+; CHECK-WIDE: paddd
+; CHECK-WIDE-NEXT: ret
 
 
 define i64 @test3(i64 %A) {
@@ -50,6 +54,12 @@ define i64 @test3(i64 %A) {
 ; CHECK: addps
 ; CHECK-NOT: pshufd
 ; CHECK: ret
+;
+; CHECK-WIDE-LABEL: test3
+; CHECK-WIDE-NOT: pshufd
+; CHECK-WIDE: addps
+; CHECK-WIDE-NOT: pshufd
+; CHECK-WIDE: ret
 
 
 define i64 @test4(i64 %A) {
@@ -59,13 +69,20 @@ define i64 @test4(i64 %A) {
   ret i64 %2
 }
 ; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
-; Ideally, we should fold that sequence into a single paddd.
-
+; Ideally, we should fold that sequence into a single paddd. This is fixed with
+; the widening legalization.
+;
 ; CHECK-LABEL: test4
 ; CHECK: pshufd
 ; CHECK-NEXT: paddq
 ; CHECK-NEXT: pshufd
 ; CHECK: ret
+;
+; CHECK-WIDE-LABEL: test4
+; CHECK-WIDE: movd %{{rdi|rcx}},
+; CHECK-WIDE-NEXT: paddd
+; CHECK-WIDE-NEXT: movd {{.*}}, %rax
+; CHECK-WIDE: ret
 
 
 define double @test5(double %A) {
@@ -77,6 +94,10 @@ define double @test5(double %A) {
 ; CHECK-LABEL: test5
 ; CHECK: addps
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test5
+; CHECK-WIDE: addps
+; CHECK-WIDE-NEXT: ret
 
 
 define double @test6(double %A) {
@@ -86,14 +107,20 @@ define double @test6(double %A) {
   ret double %2
 }
 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
-; single paddw instruction.
-
+; single paddw instruction. This is fixed with the widening legalization.
+;
 ; CHECK-LABEL: test6
 ; CHECK-NOT: movsd
 ; CHECK: punpcklwd
-; CHECK-NEXT: paddd
+; CHECK-NEXT: paddw
 ; CHECK-NEXT: pshufb
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test6
+; CHECK-WIDE-NOT: mov
+; CHECK-WIDE-NOT: punpcklwd
+; CHECK-WIDE: paddw
+; CHECK-WIDE-NEXT: ret
 
 
 define double @test7(double %A, double %B) {
@@ -103,17 +130,17 @@ define double @test7(double %A, double %B) {
   %3 = bitcast <4 x i16> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test7 into a
-; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddd+pshufd.
-
 ; CHECK-LABEL: test7
 ; CHECK-NOT: movsd
-; CHECK: punpcklwd
-; CHECK-NEXT: punpcklwd
-; CHECK-NEXT: paddd
-; CHECK-NEXT: pshufb
+; CHECK-NOT: punpcklwd
+; CHECK: paddw
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test7
+; CHECK-WIDE-NOT: movsd
+; CHECK-WIDE-NOT: punpcklwd
+; CHECK-WIDE: paddw
+; CHECK-WIDE-NEXT: ret
 
 
 define double @test8(double %A) {
@@ -124,14 +151,20 @@ define double @test8(double %A) {
 }
 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 ; single paddb instruction. At the moment we produce the sequence 
-; pshufd+paddw+pshufd.
-
+; pshufd+paddw+pshufd. This is fixed with the widening legalization.
+;
 ; CHECK-LABEL: test8
 ; CHECK-NOT: movsd
 ; CHECK: punpcklbw
-; CHECK-NEXT: paddw
+; CHECK-NEXT: paddb
 ; CHECK-NEXT: pshufb
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test8
+; CHECK-WIDE-NOT: movsd
+; CHECK-WIDE-NOT: punpcklbw
+; CHECK-WIDE: paddb
+; CHECK-WIDE-NEXT: ret
 
 
 define double @test9(double %A, double %B) {
@@ -141,15 +174,15 @@ define double @test9(double %A, double %B) {
   %3 = bitcast <8 x i8> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test9 into a
-; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddw+pshufd.
-
 ; CHECK-LABEL: test9
 ; CHECK-NOT: movsd
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: paddw
-; CHECK-NEXT: pshufb
+; CHECK-NOT: punpcklbw
+; CHECK: paddb
 ; CHECK-NEXT: ret
+;
+; CHECK-WIDE-LABEL: test9
+; CHECK-WIDE-NOT: movsd
+; CHECK-WIDE-NOT: punpcklbw
+; CHECK-WIDE: paddb
+; CHECK-WIDE-NEXT: ret
 
diff --git a/test/CodeGen/X86/macho-comdat.ll b/test/CodeGen/X86/macho-comdat.ll
new file mode 100644
index 0000000..3c2d997
--- /dev/null
+++ b/test/CodeGen/X86/macho-comdat.ll
@@ -0,0 +1,6 @@
+; RUN: not llc -mtriple x86_64-apple-darwin < %s 2> %t
+; RUN: FileCheck < %t %s
+
+$f = comdat any
+@v = global i32 0, comdat $f
+; CHECK: LLVM ERROR: MachO doesn't support COMDATs, 'f' cannot be lowered.
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
index 7c0e82f..fa77fcb 100644
--- a/test/CodeGen/X86/null-streamer.ll
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -1,6 +1,7 @@
 ; Check the MCNullStreamer operates correctly, at least on a minimal test case.
 ;
 ; RUN: llc -filetype=null -o %t -march=x86 %s
+; RUN: llc -filetype=null -o %t -mtriple=i686-cygwin %s
 
 define void @f0()  {
   ret void
@@ -9,3 +10,20 @@ define void @f0()  {
 define void @f1() {
   ret void
 }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !" ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""}
+!1 = metadata !{metadata !"", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"", metadata !"", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* null, null, null, metadata !2, i32 2}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null}
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"_ZL1i", metadata !5, i32 1, metadata !8, i32 1, i32 1, null, null}
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/pr20020.ll b/test/CodeGen/X86/pr20020.ll
new file mode 100644
index 0000000..83dae36
--- /dev/null
+++ b/test/CodeGen/X86/pr20020.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-lsr -post-RA-scheduler=1 -break-anti-dependencies=critical  | FileCheck %s
+
+; In PR20020, the critical anti-dependency breaker algorithm mistakenly
+; changes the register operands of an 'xorl %eax, %eax' to 'xorl %ecx, %ecx'
+; and then immediately reloads %rcx with a value based on the wrong %rax
+
+; CHECK-NOT: xorl %ecx, %ecx
+; CHECK: leaq 1(%rax), %rcx
+
+
+%struct.planet = type { double, double, double }
+
+; Function Attrs: nounwind ssp uwtable
+define void @advance(i32 %nbodies, %struct.planet* nocapture %bodies) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %nbodies, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.end38
+
+for.body.preheader:                               ; preds = %entry
+  %gep = getelementptr %struct.planet* %bodies, i64 1, i32 1
+  %gep13 = bitcast double* %gep to %struct.planet*
+  %0 = add i32 %nbodies, -1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc20
+  %iv19 = phi i32 [ %0, %for.body.preheader ], [ %iv.next, %for.inc20 ]
+  %iv = phi %struct.planet* [ %gep13, %for.body.preheader ], [ %gep14, %for.inc20 ]
+  %iv9 = phi i64 [ %iv.next10, %for.inc20 ], [ 0, %for.body.preheader ]
+  %iv.next10 = add nuw nsw i64 %iv9, 1
+  %1 = trunc i64 %iv.next10 to i32
+  %cmp22 = icmp slt i32 %1, %nbodies
+  br i1 %cmp22, label %for.body3.lr.ph, label %for.inc20
+
+for.body3.lr.ph:                                  ; preds = %for.body
+  %x = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 0
+  %y = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 1
+  %vx = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 2
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %iv20 = phi i32 [ %iv.next21, %for.body3 ], [ %iv19, %for.body3.lr.ph ]
+  %iv15 = phi %struct.planet* [ %gep16, %for.body3 ], [ %iv, %for.body3.lr.ph ]
+  %iv1517 = bitcast %struct.planet* %iv15 to double*
+  %2 = load double* %x, align 8
+  %gep18 = getelementptr double* %iv1517, i64 -1
+  %3 = load double* %gep18, align 8
+  %sub = fsub double %2, %3
+  %4 = load double* %y, align 8
+  %5 = load double* %iv1517, align 8
+  %sub8 = fsub double %4, %5
+  %add10 = fadd double %sub, %sub8
+  %call = tail call double @sqrt(double %sub8) #2
+  store double %add10, double* %vx, align 8
+  %gep16 = getelementptr %struct.planet* %iv15, i64 1
+  %iv.next21 = add i32 %iv20, -1
+  %exitcond = icmp eq i32 %iv.next21, 0
+  br i1 %exitcond, label %for.inc20, label %for.body3
+
+for.inc20:                                        ; preds = %for.body3, %for.body
+  %lftr.wideiv11 = trunc i64 %iv.next10 to i32
+  %gep14 = getelementptr %struct.planet* %iv, i64 1
+  %iv.next = add i32 %iv19, -1
+  %exitcond12 = icmp eq i32 %lftr.wideiv11, %nbodies
+  br i1 %exitcond12, label %for.end38, label %for.body
+
+for.end38:                                        ; preds = %for.inc20, %entry
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @sqrt(double) #1
+
+attributes #0 = { "no-frame-pointer-elim-non-leaf" }
diff --git a/test/CodeGen/X86/pr20088.ll b/test/CodeGen/X86/pr20088.ll
new file mode 100644
index 0000000..3a82962
--- /dev/null
+++ b/test/CodeGen/X86/pr20088.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+
+define <16 x i8> @foo(<16 x i8> %x) {
+; CHECK: vpblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> zeroinitializer, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %x)
+  ret <16 x i8> %res;
+}
diff --git a/test/CodeGen/X86/pr5145.ll b/test/CodeGen/X86/pr5145.ll
index d048db8..32a797b 100644
--- a/test/CodeGen/X86/pr5145.ll
+++ b/test/CodeGen/X86/pr5145.ll
@@ -5,29 +5,29 @@ define void @atomic_maxmin_i8() {
 ; CHECK: atomic_maxmin_i8
   %1 = atomicrmw max  i8* @sc8, i8 5 acquire
 ; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovl
+; CHECK: movsbl
+; CHECK: cmpl
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchgb
 ; CHECK: jne [[LABEL1]]
   %2 = atomicrmw min  i8* @sc8, i8 6 acquire
 ; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovg
+; CHECK: movsbl
+; CHECK: cmpl
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchgb
 ; CHECK: jne [[LABEL3]]
   %3 = atomicrmw umax i8* @sc8, i8 7 acquire
 ; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovb
+; CHECK: movzbl
+; CHECK: cmpl
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchgb
 ; CHECK: jne [[LABEL5]]
   %4 = atomicrmw umin i8* @sc8, i8 8 acquire
 ; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmova
+; CHECK: movzbl
+; CHECK: cmpl
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchgb
 ; CHECK: jne [[LABEL7]]
diff --git a/test/CodeGen/X86/pshufd-combine-crash.ll b/test/CodeGen/X86/pshufd-combine-crash.ll
new file mode 100644
index 0000000..84c69e3
--- /dev/null
+++ b/test/CodeGen/X86/pshufd-combine-crash.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -debug
+
+; REQUIRES: asserts
+
+; Test that the dag combiner doesn't assert if we try to replace a sequence of two
+; v4f32 X86ISD::PSHUFD nodes with a single PSHUFD.
+
+
+define <4 x float> @test(<4 x float> %V) {
+  %1 = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  ret <4 x float> %2
+}
+
diff --git a/test/CodeGen/X86/rdpmc.ll b/test/CodeGen/X86/rdpmc.ll
new file mode 100644
index 0000000..7f1ca46
--- /dev/null
+++ b/test/CodeGen/X86/rdpmc.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86-64
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+; Verify that we correctly lower the "Read Performance-Monitoring Counters"
+; x86 builtin.
+
+
+define i64 @test_builtin_read_pmc(i32 %ID) {
+  %1 = tail call i64 @llvm.x86.rdpmc(i32 %ID)
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_read_pmc
+; CHECK: rdpmc
+; X86-NOT: shlq
+; X86-NOT: or
+; X86-64: shlq
+; X86-64: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+declare i64 @llvm.x86.rdpmc(i32 %ID)
+
diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
index ce4f538..ddad307 100644
--- a/test/CodeGen/X86/shift-parts.ll
+++ b/test/CodeGen/X86/shift-parts.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=x86-64 | grep shrdq
+; RUN: llc -march=x86-64 < %s | FileCheck %s
 ; PR4736
 
 %0 = type { i32, i8, [35 x i8] }
 
 @g_144 = external global %0, align 8              ; <%0*> [#uses=1]
 
+; CHECK: shrdq
+
 define i32 @int87(i32 %uint64p_8) nounwind {
 entry:
   %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
diff --git a/test/CodeGen/X86/sqrt.ll b/test/CodeGen/X86/sqrt.ll
new file mode 100644
index 0000000..be7c6e8
--- /dev/null
+++ b/test/CodeGen/X86/sqrt.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2                             | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx                             | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+
+define float @test_sqrt_f32(float %a) {
+; SSE2-LABEL: test_sqrt_f32
+; SSE2:       sqrtss %xmm0, %xmm0
+; AVX-LABEL:  test_sqrt_f32
+; AVX:        vsqrtss %xmm0, %xmm0
+  %res = call float @llvm.sqrt.f32(float %a)
+  ret float %res
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define double @test_sqrt_f64(double %a) {
+; SSE2-LABEL: test_sqrt_f64
+; SSE2:       sqrtsd %xmm0, %xmm0
+; AVX-LABEL:  test_sqrt_f64
+; AVX:        vsqrtsd %xmm0, %xmm0
+  %res = call double @llvm.sqrt.f64(double %a)
+  ret double %res
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index cfc892d..c906ecd 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -717,3 +717,30 @@ define void @test_x86_sse2_pause() {
   ret void 
 }
 declare void @llvm.x86.sse2.pause() nounwind
+
+define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
+; CHECK-LABEL: test_x86_sse2_pshuf_d:
+; CHECK: pshufd $27
+entry:
+   %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
+   ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
+
+define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufl_w:
+; CHECK: pshuflw $27
+entry:
+   %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
+   ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
+
+define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufh_w:
+; CHECK: pshufhw $27
+entry:
+   %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
+   ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
new file mode 100644
index 0000000..b7706cc
--- /dev/null
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -0,0 +1,318 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+; Verify that we correctly generate 'addsub' instructions from
+; a sequence of vector extracts + float add/sub + vector inserts.
+
+define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 2
+  %4 = extractelement <4 x float> %B, i32 2
+  %sub2 = fsub float %3, %4
+  %5 = extractelement <4 x float> %A, i32 1
+  %6 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %5, %6
+  %7 = extractelement <4 x float> %A, i32 3
+  %8 = extractelement <4 x float> %B, i32 3
+  %add2 = fadd float %7, %8
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+  %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0
+  %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
+  ret <4 x float> %vecinsert4
+}
+; CHECK-LABEL: test1
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 2
+  %2 = extractelement <4 x float> %B, i32 2
+  %sub2 = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 3
+  %4 = extractelement <4 x float> %B, i32 3
+  %add2 = fadd float %3, %4
+  %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 2
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test2
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 3
+  %4 = extractelement <4 x float> %B, i32 3
+  %add = fadd float %4, %3
+  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test3
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 2
+  %2 = extractelement <4 x float> %B, i32 2
+  %sub = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 1
+  %4 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %3, %4
+  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test4
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub2 = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 1
+  %4 = extractelement <4 x float> %B, i32 1
+  %add2 = fadd float %3, %4
+  %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 0
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 1
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test5
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test6(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 2
+  %4 = extractelement <4 x float> %B, i32 2
+  %sub2 = fsub float %3, %4
+  %5 = extractelement <4 x float> %A, i32 1
+  %6 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %5, %6
+  %7 = extractelement <4 x float> %A, i32 3
+  %8 = extractelement <4 x float> %B, i32 3
+  %add2 = fadd float %7, %8
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+  %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0
+  %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
+  ret <4 x float> %vecinsert4
+}
+; CHECK-LABEL: test6
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
+  %1 = extractelement <4 x double> %A, i32 0
+  %2 = extractelement <4 x double> %B, i32 0
+  %sub = fsub double %1, %2
+  %3 = extractelement <4 x double> %A, i32 2
+  %4 = extractelement <4 x double> %B, i32 2
+  %sub2 = fsub double %3, %4
+  %5 = extractelement <4 x double> %A, i32 1
+  %6 = extractelement <4 x double> %B, i32 1
+  %add = fadd double %5, %6
+  %7 = extractelement <4 x double> %A, i32 3
+  %8 = extractelement <4 x double> %B, i32 3
+  %add2 = fadd double %7, %8
+  %vecinsert1 = insertelement <4 x double> undef, double %add, i32 1
+  %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add2, i32 3
+  %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub, i32 0
+  %vecinsert4 = insertelement <4 x double> %vecinsert3, double %sub2, i32 2
+  ret <4 x double> %vecinsert4
+}
+; CHECK-LABEL: test7
+; SSE: addsubpd
+; SSE-NEXT: addsubpd
+; AVX: vaddsubpd
+; AVX-NOT: vaddsubpd
+; CHECK: ret
+
+
+define <2 x double> @test8(<2 x double> %A, <2 x double> %B) {
+  %1 = extractelement <2 x double> %A, i32 0
+  %2 = extractelement <2 x double> %B, i32 0
+  %sub = fsub double %1, %2
+  %3 = extractelement <2 x double> %A, i32 1
+  %4 = extractelement <2 x double> %B, i32 1
+  %add = fadd double %3, %4
+  %vecinsert1 = insertelement <2 x double> undef, double %sub, i32 0
+  %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add, i32 1
+  ret <2 x double> %vecinsert2
+}
+; CHECK-LABEL: test8
+; SSE: addsubpd
+; AVX: vaddsubpd
+; CHECK: ret
+
+
+define <8 x float> @test9(<8 x float> %A, <8 x float> %B) {
+  %1 = extractelement <8 x float> %A, i32 0
+  %2 = extractelement <8 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %3 = extractelement <8 x float> %A, i32 2
+  %4 = extractelement <8 x float> %B, i32 2
+  %sub2 = fsub float %3, %4
+  %5 = extractelement <8 x float> %A, i32 1
+  %6 = extractelement <8 x float> %B, i32 1
+  %add = fadd float %5, %6
+  %7 = extractelement <8 x float> %A, i32 3
+  %8 = extractelement <8 x float> %B, i32 3
+  %add2 = fadd float %7, %8
+  %9 = extractelement <8 x float> %A, i32 4
+  %10 = extractelement <8 x float> %B, i32 4
+  %sub3 = fsub float %9, %10
+  %11 = extractelement <8 x float> %A, i32 6
+  %12 = extractelement <8 x float> %B, i32 6
+  %sub4 = fsub float %11, %12
+  %13 = extractelement <8 x float> %A, i32 5
+  %14 = extractelement <8 x float> %B, i32 5
+  %add3 = fadd float %13, %14
+  %15 = extractelement <8 x float> %A, i32 7
+  %16 = extractelement <8 x float> %B, i32 7
+  %add4 = fadd float %15, %16
+  %vecinsert1 = insertelement <8 x float> undef, float %add, i32 1
+  %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add2, i32 3
+  %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub, i32 0
+  %vecinsert4 = insertelement <8 x float> %vecinsert3, float %sub2, i32 2
+  %vecinsert5 = insertelement <8 x float> %vecinsert4, float %add3, i32 5
+  %vecinsert6 = insertelement <8 x float> %vecinsert5, float %add4, i32 7
+  %vecinsert7 = insertelement <8 x float> %vecinsert6, float %sub3, i32 4
+  %vecinsert8 = insertelement <8 x float> %vecinsert7, float %sub4, i32 6
+  ret <8 x float> %vecinsert8
+}
+; CHECK-LABEL: test9
+; SSE: addsubps
+; SSE-NEXT: addsubps
+; AVX: vaddsubps
+; AVX-NOT: vaddsubps
+; CHECK: ret
+
+
+; Verify that we don't generate addsub instruction for the following
+; functions.
+define <4 x float> @test10(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
+  ret <4 x float> %vecinsert1
+}
+; CHECK-LABEL: test10
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test11(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 2
+  %2 = extractelement <4 x float> %B, i32 2
+  %sub = fsub float %1, %2
+  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2
+  ret <4 x float> %vecinsert1
+}
+; CHECK-LABEL: test11
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test12(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 1
+  %2 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %1, %2
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
+  ret <4 x float> %vecinsert1
+}
+; CHECK-LABEL: test12
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test13(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 3
+  %2 = extractelement <4 x float> %B, i32 3
+  %add = fadd float %1, %2
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 3
+  ret <4 x float> %vecinsert1
+}
+; CHECK-LABEL: test13
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, %2
+  %3 = extractelement <4 x float> %A, i32 2
+  %4 = extractelement <4 x float> %B, i32 2
+  %sub2 = fsub float %3, %4
+  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %sub2, i32 2
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test14
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 1
+  %2 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %1, %2
+  %3 = extractelement <4 x float> %A, i32 3
+  %4 = extractelement <4 x float> %B, i32 3
+  %add2 = fadd float %3, %4
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+  ret <4 x float> %vecinsert2
+}
+; CHECK-LABEL: test15
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
+define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
+  %1 = extractelement <4 x float> %A, i32 0
+  %2 = extractelement <4 x float> %B, i32 0
+  %sub = fsub float %1, undef
+  %3 = extractelement <4 x float> %A, i32 2
+  %4 = extractelement <4 x float> %B, i32 2
+  %sub2 = fsub float %3, %4
+  %5 = extractelement <4 x float> %A, i32 1
+  %6 = extractelement <4 x float> %B, i32 1
+  %add = fadd float %5, undef
+  %7 = extractelement <4 x float> %A, i32 3
+  %8 = extractelement <4 x float> %B, i32 3
+  %add2 = fadd float %7, %8
+  %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
+  %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+  %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0
+  %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
+  ret <4 x float> %vecinsert4
+}
+; CHECK-LABEL: test16
+; CHECK-NOT: addsubps
+; CHECK: ret
+
+
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll
new file mode 100644
index 0000000..8b66743
--- /dev/null
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -0,0 +1,296 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
+
+; Test ADDSUB ISel patterns.
+
+; Functions below are obtained from the following source:
+;
+; typedef double double2 __attribute__((ext_vector_type(2)));
+; typedef double double4 __attribute__((ext_vector_type(4)));
+; typedef float float4 __attribute__((ext_vector_type(4)));
+; typedef float float8 __attribute__((ext_vector_type(8)));
+;
+; float4 test1(float4 A, float4 B) {
+;   float4 X = A - B;
+;   float4 Y = A + B;
+;   return (float4){X[0], Y[1], X[2], Y[3]};
+; }
+;
+; float8 test2(float8 A, float8 B) {
+;   float8 X = A - B;
+;   float8 Y = A + B;
+;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
+; }
+;
+; double4 test3(double4 A, double4 B) {
+;   double4 X = A - B;
+;   double4 Y = A + B;
+;   return (double4){X[0], Y[1], X[2], Y[3]};
+; }
+;
+; double2 test4(double2 A, double2 B) {
+;   double2 X = A - B;
+;   double2 Y = A + B;
+;   return (double2){X[0], Y[1]};
+; }
+
+define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+  %sub = fsub <4 x float> %A, %B
+  %add = fadd <4 x float> %A, %B
+  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %vecinit6
+}
+; CHECK-LABEL: test1
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
+  %sub = fsub <8 x float> %A, %B
+  %add = fadd <8 x float> %A, %B
+  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %vecinit14
+}
+; CHECK-LABEL: test2
+; SSE: addsubps
+; SSE-NEXT: addsubps
+; AVX: vaddsubps
+; AVX-NOT: vaddsubps
+; CHECK: ret
+
+
+define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
+  %sub = fsub <4 x double> %A, %B
+  %add = fadd <4 x double> %A, %B
+  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %vecinit6
+}
+; CHECK-LABEL: test3
+; SSE: addsubpd
+; SSE: addsubpd
+; AVX: vaddsubpd
+; AVX-NOT: vaddsubpd
+; CHECK: ret
+
+
+define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
+  %add = fadd <2 x double> %A, %B
+  %sub = fsub <2 x double> %A, %B
+  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: test4
+; SSE: addsubpd
+; AVX: vaddsubpd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
+  %1 = load <4 x float>* %B
+  %add = fadd <4 x float> %A, %1
+  %sub = fsub <4 x float> %A, %1
+  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %vecinit6
+}
+; CHECK-LABEL: test1b
+; SSE: addsubps
+; AVX: vaddsubps
+; CHECK-NEXT: ret
+
+
+define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
+  %1 = load <8 x float>* %B
+  %add = fadd <8 x float> %A, %1
+  %sub = fsub <8 x float> %A, %1
+  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %vecinit14
+}
+; CHECK-LABEL: test2b
+; SSE: addsubps
+; SSE-NEXT: addsubps
+; AVX: vaddsubps
+; AVX-NOT: vaddsubps
+; CHECK: ret
+
+
+define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
+  %1 = load <4 x double>* %B
+  %add = fadd <4 x double> %A, %1
+  %sub = fsub <4 x double> %A, %1
+  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %vecinit6
+}
+; CHECK-LABEL: test3b
+; SSE: addsubpd
+; SSE: addsubpd
+; AVX: vaddsubpd
+; AVX-NOT: vaddsubpd
+; CHECK: ret
+
+
+define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
+  %1 = load <2 x double>* %B
+  %sub = fsub <2 x double> %A, %1
+  %add = fadd <2 x double> %A, %1
+  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: test4b
+; SSE: addsubpd
+; AVX: vaddsubpd
+; CHECK-NEXT: ret
+
+; Functions below are obtained from the following source:
+;
+; float4 test1(float4 A, float4 B) {
+;   float4 X = A + B;
+;   float4 Y = A - B;
+;   return (float4){X[0], Y[1], X[2], Y[3]};
+; }
+;
+; float8 test2(float8 A, float8 B) {
+;   float8 X = A + B;
+;   float8 Y = A - B;
+;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
+; }
+;
+; double4 test3(double4 A, double4 B) {
+;   double4 X = A + B;
+;   double4 Y = A - B;
+;   return (double4){X[0], Y[1], X[2], Y[3]};
+; }
+;
+; double2 test4(double2 A, double2 B) {
+;   double2 X = A + B;
+;   double2 Y = A - B;
+;   return (double2){X[0], Y[1]};
+; }
+
+define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
+  %sub = fsub <4 x float> %A, %B
+  %add = fadd <4 x float> %A, %B
+  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %vecinit6
+}
+; CHECK-LABEL: test5
+; SSE: xorps
+; SSE-NEXT: addsubps
+; AVX: vxorps
+; AVX-NEXT: vaddsubps
+; CHECK: ret
+
+
+define <8 x float> @test6(<8 x float> %A, <8 x float> %B) {
+  %sub = fsub <8 x float> %A, %B
+  %add = fadd <8 x float> %A, %B
+  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %vecinit14
+}
+; CHECK-LABEL: test6
+; SSE: xorps
+; SSE-NEXT: addsubps
+; SSE: xorps
+; SSE-NEXT: addsubps
+; AVX: vxorps
+; AVX-NEXT: vaddsubps
+; AVX-NOT: vxorps
+; AVX-NOT: vaddsubps
+; CHECK: ret
+
+
+define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
+  %sub = fsub <4 x double> %A, %B
+  %add = fadd <4 x double> %A, %B
+  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %vecinit6
+}
+; CHECK-LABEL: test7
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; AVX: vxorpd
+; AVX-NEXT: vaddsubpd
+; AVX-NOT: vxorpd
+; AVX-NOT: vaddsubpd
+; CHECK: ret
+
+
+define <2 x double> @test8(<2 x double> %A, <2 x double> %B) #0 {
+  %add = fadd <2 x double> %A, %B
+  %sub = fsub <2 x double> %A, %B
+  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: test8
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; AVX: vxorpd
+; AVX-NEXT: vaddsubpd
+; CHECK: ret
+
+
+define <4 x float> @test5b(<4 x float> %A, <4 x float> %B) {
+  %sub = fsub <4 x float> %A, %B
+  %add = fadd <4 x float> %B, %A
+  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %vecinit6
+}
+; CHECK-LABEL: test5
+; SSE: xorps
+; SSE-NEXT: addsubps
+; AVX: vxorps
+; AVX-NEXT: vaddsubps
+; CHECK: ret
+
+
+define <8 x float> @test6b(<8 x float> %A, <8 x float> %B) {
+  %sub = fsub <8 x float> %A, %B
+  %add = fadd <8 x float> %B, %A
+  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %vecinit14
+}
+; CHECK-LABEL: test6
+; SSE: xorps
+; SSE-NEXT: addsubps
+; SSE: xorps
+; SSE-NEXT: addsubps
+; AVX: vxorps
+; AVX-NEXT: vaddsubps
+; AVX-NOT: vxorps
+; AVX-NOT: vaddsubps
+; CHECK: ret
+
+
+define <4 x double> @test7b(<4 x double> %A, <4 x double> %B) {
+  %sub = fsub <4 x double> %A, %B
+  %add = fadd <4 x double> %B, %A
+  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %vecinit6
+}
+; CHECK-LABEL: test7
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; AVX: vxorpd
+; AVX-NEXT: vaddsubpd
+; AVX-NOT: vxorpd
+; AVX-NOT: vaddsubpd
+; CHECK: ret
+
+
+define <2 x double> @test8b(<2 x double> %A, <2 x double> %B) #0 {
+  %add = fadd <2 x double> %B, %A
+  %sub = fsub <2 x double> %A, %B
+  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %vecinit2
+}
+; CHECK-LABEL: test8
+; SSE: xorpd
+; SSE-NEXT: addsubpd
+; AVX: vxorpd
+; AVX-NEXT: vaddsubpd
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index 8ad7987..3a48121 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -117,6 +117,24 @@ define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
   %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd
   ret <16 x i8> %1
 }
+
 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
+
+;; 2 tests for shufflevectors that optimize to blend + immediate
+; CHECK-LABEL: @blend_shufflevector_4xfloat
+; CHECK: blendps $6, %xmm1, %xmm0
+; CHECK: ret
+define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x float> %1
+}
+
+; CHECK-LABEL: @blend_shufflevector_8xi16
+; CHECK: pblendw $134, %xmm1, %xmm0
+; CHECK: ret
+define <8 x i16> @blend_shufflevector_8xi16(<8 x i16> %a, <8 x i16> %b) {
+  %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
+  ret <8 x i16> %1
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index a3c6201..6726a3e 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -692,3 +692,25 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
   %13 = fadd <4 x float> %11, %12
   ret <4 x float> %13
 }
+
+define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
+; CHECK-LABEL: insertps_with_undefs:
+; CHECK-NOT: shufps
+; CHECK: insertps    $32, %xmm0
+; CHECK: ret
+  %1 = load float* %b, align 4
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
+  ret <4 x float> %result
+}
+
+; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using
+; the destination index to change the load, instead of the source index.
+define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
+; CHECK-LABEL: pr20087:
+; CHECK: insertps  $48
+; CHECK: ret
+  %load = load <4 x float> *%ptr
+  %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
+  ret <4 x float> %ret
+}
diff --git a/test/CodeGen/X86/stackmap-fast-isel.ll b/test/CodeGen/X86/stackmap-fast-isel.ll
new file mode 100644
index 0000000..0b7e6db
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-fast-isel.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim                             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim -fast-isel -fast-isel-abort | FileCheck %s
+
+; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 4
+; Num LargeConstants
+; CHECK-NEXT:   .long 3
+; Num Callsites
+; CHECK-NEXT:   .long 7
+
+; Functions and stack size
+; CHECK-NEXT:   .quad _constantargs
+; CHECK-NEXT:   .quad 8
+; CHECK-NEXT:   .quad _liveConstant
+; CHECK-NEXT:   .quad 8
+; CHECK-NEXT:   .quad _directFrameIdx
+; CHECK-NEXT:   .quad 40
+; CHECK-NEXT:   .quad _longid
+; CHECK-NEXT:   .quad 8
+
+; Large Constants
+; CHECK-NEXT:   .quad   2147483648
+; CHECK-NEXT:   .quad   4294967295
+; CHECK-NEXT:   .quad   4294967296
+
+; Callsites
+; Constant arguments
+;
+; CHECK-NEXT:   .quad   1
+; CHECK-NEXT:   .long   L{{.*}}-_constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  12
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2000000000
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2147483647
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 1
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   1
+; LargeConstant at index 2
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+
+define void @constantargs() {
+entry:
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 15, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  ret void
+}
+
+; Map a constant value.
+;
+; CHECK-LABEL:  .long L{{.*}}-_liveConstant
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   33
+
+define void @liveConstant() {
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
+  ret void
+}
+
+; Directly map an alloca's address.
+;
+; Callsite 16
+; CHECK-LABEL:  .long L{{.*}}-_directFrameIdx
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short	1
+; Loc 0: Direct RBP - ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .short	6
+; CHECK-NEXT:   .long
+
+define void @directFrameIdx() {
+entry:
+  %metadata1 = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata1
+  store i64 12, i64* %metadata1
+  store i64 13, i64* %metadata1
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+  ret void
+}
+
+; Test a 64-bit ID.
+;
+; CHECK:        .quad 4294967295
+; CHECK-LABEL:  .long L{{.*}}-_longid
+; CHECK:        .quad 4294967296
+; CHECK-LABEL:  .long L{{.*}}-_longid
+; CHECK:        .quad 9223372036854775807
+; CHECK-LABEL:  .long L{{.*}}-_longid
+; CHECK:        .quad -1
+; CHECK-LABEL:  .long L{{.*}}-_longid
+define void @longid() {
+entry:
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967295, i32 0)
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967296, i32 0)
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 9223372036854775807, i32 0)
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 -1, i32 0)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/X86/stackmap-liveness.ll b/test/CodeGen/X86/stackmap-liveness.ll
index 9ce5254..897595d 100644
--- a/test/CodeGen/X86/stackmap-liveness.ll
+++ b/test/CodeGen/X86/stackmap-liveness.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-stackmap-liveness| FileCheck -check-prefix=STACK %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-patchpoint-liveness| FileCheck -check-prefix=PATCH %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-patchpoint-liveness=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim                                   | FileCheck -check-prefix=PATCH %s
 ;
 ; Note: Print verbose stackmaps using -debug-only=stackmaps.
 
@@ -37,36 +36,21 @@ entry:
 ; Align
 ; CHECK-NEXT:   .align  3
 
-; StackMap 1 (stackmap liveness information enabled)
-; STACK-LABEL:  .long L{{.*}}-_stackmap_liveness
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .short  0
-; Padding
-; STACK-NEXT:   .short  0
-; Num LiveOut Entries: 2
-; STACK-NEXT:   .short  2
-; LiveOut Entry 1: %RSP (8 bytes)
-; STACK-NEXT:   .short  7
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 8
-; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2
-; STACK-NEXT:   .short  19
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 16
-; Align
-; STACK-NEXT:   .align  3
-
 ; StackMap 1 (patchpoint liveness information enabled)
 ; PATCH-LABEL:  .long L{{.*}}-_stackmap_liveness
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
 ; PATCH-NEXT:   .short  0
-; Num LiveOut Entries: 0
-; PATCH-NEXT:   .short  0
+; Num LiveOut Entries: 1
+; PATCH-NEXT:   .short  1
+; LiveOut Entry 1: %YMM2 (16 bytes) --> %XMM2
+; PATCH-NEXT:   .short  19
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 5)
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 12, i8* null, i32 0)
   %a2 = call i64 asm sideeffect "", "={r8}"() nounwind
   %a3 = call i8 asm sideeffect "", "={ah}"() nounwind
   %a4 = call <4 x double> asm sideeffect "", "={ymm0}"() nounwind
@@ -83,52 +67,37 @@ entry:
 ; Align
 ; CHECK-NEXT:   .align  3
 
-; StackMap 2 (stackmap liveness information enabled)
-; STACK-LABEL:  .long L{{.*}}-_stackmap_liveness
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .short  0
-; Padding
-; STACK-NEXT:   .short  0
-; Num LiveOut Entries: 6
-; STACK-NEXT:   .short  6
-; LiveOut Entry 1: %RAX (1 bytes) --> %AL or %AH
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 1
-; LiveOut Entry 2: %RSP (8 bytes)
-; STACK-NEXT:   .short  7
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 8
-; LiveOut Entry 3: %R8 (8 bytes)
-; STACK-NEXT:   .short  8
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 8
-; LiveOut Entry 4: %YMM0 (32 bytes)
-; STACK-NEXT:   .short  17
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 32
-; LiveOut Entry 5: %YMM1 (32 bytes)
-; STACK-NEXT:   .short  18
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 32
-; LiveOut Entry 6: %YMM2 (16 bytes) --> %XMM2
-; STACK-NEXT:   .short  19
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 16
-; Align
-; STACK-NEXT:   .align  3
-
 ; StackMap 2 (patchpoint liveness information enabled)
 ; PATCH-LABEL:  .long L{{.*}}-_stackmap_liveness
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
 ; PATCH-NEXT:   .short  0
-; Num LiveOut Entries: 0
+; Num LiveOut Entries: 5
+; PATCH-NEXT:   .short  5
+; LiveOut Entry 1: %RAX (1 bytes) --> %AL or %AH
 ; PATCH-NEXT:   .short  0
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 1
+; LiveOut Entry 2: %R8 (8 bytes)
+; PATCH-NEXT:   .short  8
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 8
+; LiveOut Entry 3: %YMM0 (32 bytes)
+; PATCH-NEXT:   .short  17
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 32
+; LiveOut Entry 4: %YMM1 (32 bytes)
+; PATCH-NEXT:   .short  18
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 32
+; LiveOut Entry 5: %YMM2 (16 bytes) --> %XMM2
+; PATCH-NEXT:   .short  19
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 2, i32 5)
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 2, i32 12, i8* null, i32 0)
   call void asm sideeffect "", "{r8},{ah},{ymm0},{ymm1}"(i64 %a2, i8 %a3, <4 x double> %a4, <4 x double> %a5) nounwind
 
 ; StackMap 3 (no liveness information available)
@@ -142,36 +111,25 @@ entry:
 ; Align
 ; CHECK-NEXT:   .align  3
 
-; StackMap 3 (stackmap liveness information enabled)
-; STACK-LABEL:  .long L{{.*}}-_stackmap_liveness
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .short  0
-; Padding
-; STACK-NEXT:   .short  0
-; Num LiveOut Entries: 2
-; STACK-NEXT:   .short  2
-; LiveOut Entry 1: %RSP (8 bytes)
-; STACK-NEXT:   .short  7
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 8
-; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2
-; STACK-NEXT:   .short  19
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 16
-; Align
-; STACK-NEXT:   .align  3
-
 ; StackMap 3 (patchpoint liveness information enabled)
 ; PATCH-LABEL:  .long L{{.*}}-_stackmap_liveness
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
 ; PATCH-NEXT:   .short  0
-; Num LiveOut Entries: 0
-; PATCH-NEXT:   .short  0
+; Num LiveOut Entries: 2
+; PATCH-NEXT:   .short  2
+; LiveOut Entry 1: %RSP (8 bytes)
+; PATCH-NEXT:   .short  7
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 8
+; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2
+; PATCH-NEXT:   .short  19
+; PATCH-NEXT:   .byte 0
+; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 5)
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 12, i8* null, i32 0)
   call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind
   ret void
 }
@@ -179,33 +137,6 @@ entry:
 define void @mixed_liveness() {
 entry:
   %a1 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind
-; StackMap 4 (stackmap liveness information enabled)
-; STACK-LABEL:  .long L{{.*}}-_mixed_liveness
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .short  0
-; Padding
-; STACK-NEXT:   .short  0
-; Num LiveOut Entries: 1
-; STACK-NEXT:   .short  1
-; LiveOut Entry 1: %YMM2 (16 bytes) --> %XMM2
-; STACK-NEXT:   .short  19
-; STACK-NEXT:   .byte 0
-; STACK-NEXT:   .byte 16
-; Align
-; STACK-NEXT:   .align  3
-
-
-; StackMap 5 (stackmap liveness information enabled)
-; STACK-LABEL:  .long L{{.*}}-_mixed_liveness
-; STACK-NEXT:   .short  0
-; STACK-NEXT:   .short  0
-; Padding
-; STACK-NEXT:   .short  0
-; Num LiveOut Entries: 0
-; STACK-NEXT:   .short  0
-; Align
-; STACK-NEXT:   .align  3
-
 ; StackMap 4 (patchpoint liveness information enabled)
 ; PATCH-LABEL:  .long L{{.*}}-_mixed_liveness
 ; PATCH-NEXT:   .short  0
diff --git a/test/CodeGen/X86/swizzle-2.ll b/test/CodeGen/X86/swizzle-2.ll
new file mode 100644
index 0000000..4b1f903
--- /dev/null
+++ b/test/CodeGen/X86/swizzle-2.ll
@@ -0,0 +1,515 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
+
+; Test that we correctly fold a shuffle that performs a swizzle of another
+; shuffle node according to the rule
+;  shuffle (shuffle (x, undef, M0), undef, M1) -> shuffle(x, undef, M2)
+;
+; We only do this if the resulting mask is legal to avoid introducing an
+; illegal shuffle that is expanded into a sub-optimal sequence of instructions
+; during lowering stage.
+
+
+define <4 x i32> @swizzle_1(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_1
+; Mask: [1,0,3,2]
+; CHECK: pshufd $-79
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_2(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_2
+; Mask: [2,1,3,0]
+; CHECK: pshufd $54
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_3(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_3
+; Mask: [1,0,3,2]
+; CHECK: pshufd $-79
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_4(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_4
+; Mask: [3,1,0,2]
+; CHECK: pshufd $-121
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_5(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_5
+; Mask: [2,3,0,1]
+; CHECK: pshufd $78
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_6(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_6
+; Mask: [2,0,1,3]
+; CHECK: pshufd $-46
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_7(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_7
+; Mask: [0,2,3,1]
+; CHECK: pshufd $120
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_8(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_8
+; Mask: [1,3,2,0]
+; CHECK: pshufd $45
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_9(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_9
+; Mask: [2,3,0,1]
+; CHECK: pshufd $78
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_10(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_10
+; Mask: [1,2,0,3]
+; CHECK: pshufd $-55
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_11(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_11
+; Mask: [3,2,1,0]
+; CHECK: pshufd $27
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_12(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_12
+; Mask: [0,3,1,2]
+; CHECK: pshufd $-100
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_13(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_13
+; Mask: [3,2,1,0]
+; CHECK: pshufd $27
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @swizzle_14(<4 x i32> %v) {
+  %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: swizzle_14
+; Mask: [3,0,2,1]
+; CHECK: pshufd $99
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_15(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_15
+; Mask: [1,0,3,2]
+; CHECK: pshufd $-79
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_16(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_16
+; Mask: [2,1,3,0]
+; CHECK: pshufd $54
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_17(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_17
+; Mask: [1,0,3,2]
+; CHECK: pshufd $-79
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_18(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_18
+; Mask: [3,1,0,2]
+; CHECK: pshufd $-121
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_19(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_19
+; Mask: [2,3,0,1]
+; CHECK: pshufd $78
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_20(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_20
+; Mask: [2,0,1,3]
+; CHECK: pshufd $-46
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_21(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_21
+; Mask: [0,2,3,1]
+; CHECK: pshufd $120
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_22(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_22
+; Mask: [1,3,2,0]
+; CHECK: pshufd $45
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_23(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_23
+; Mask: [2,3,0,1]
+; CHECK: pshufd $78
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_24(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_24
+; Mask: [1,2,0,3]
+; CHECK: pshufd $-55
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_25(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_25
+; Mask: [3,2,1,0]
+; CHECK: pshufd $27
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_26(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_26
+; Mask: [0,3,1,2]
+; CHECK: pshufd $-100
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_27(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_27
+; Mask: [3,2,1,0]
+; CHECK: pshufd $27
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_28(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_28
+; Mask: [3,0,2,1]
+; CHECK: pshufd $99
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x float> @swizzle_29(<4 x float> %v) {
+  %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
+  ret <4 x float> %2
+}
+; CHECK-LABEL: swizzle_29
+; Mask: [1,3,2,0]
+; CHECK: pshufd $45
+; CHECK-NOT: pshufd
+; CHECK-NEXT: ret
+
+; Make sure that we combine the shuffles from each function below into a single
+; legal shuffle (either pshuflw or pshufb depending on the masks).
+
+define <8 x i16> @swizzle_30(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 7, i32 5, i32 6, i32 4>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_30
+; Mask: [1,3,2,0,5,7,6,4]
+; CHECK: pshuflw $45
+; CHECK-NOT: pshufb
+; CHECK-NEXT: ret
+
+
+define <8 x i16> @swizzle_31(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 5, i32 6, i32 4>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 5, i32 6, i32 4>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_31
+; Mask: [1,3,2,0,4,5,6,7]
+; CHECK: pshuflw $45
+; CHECK-NOT: pshufb
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_32(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 7, i32 5, i32 6, i32 4>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 7, i32 5, i32 6, i32 4>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_32
+; Mask: [2,3,0,1,4,5,6,7] --> equivalent to pshufd mask [1,0,2,3]
+; CHECK: pshufd $-31
+; CHECK-NOT: pshufb
+; CHECK: ret
+
+define <8 x i16> @swizzle_33(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 5, i32 7, i32 2, i32 3, i32 1, i32 0>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 5, i32 7, i32 2, i32 3, i32 1, i32 0>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_33
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_34(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 7, i32 6, i32 5, i32 1, i32 2, i32 0, i32 3>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 7, i32 6, i32 5, i32 1, i32 2, i32 0, i32 3>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_34
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_35(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 1, i32 3, i32 0, i32 2>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 1, i32 3, i32 0, i32 2>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_35
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_36(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 7, i32 5, i32 0, i32 1, i32 3, i32 2>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 7, i32 5, i32 0, i32 1, i32 3, i32 2>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_36
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_37(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 7, i32 5, i32 6, i32 4>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 7, i32 4, i32 6, i32 5>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_37
+; Mask: [0,1,2,3,4,7,6,5]
+; CHECK: pshufhw $108
+; CHECK-NOT: pshufb
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_38(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 5, i32 6, i32 4, i32 7, i32 0, i32 2, i32 1, i32 3>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 5, i32 6, i32 4, i32 7, i32 0, i32 2, i32 1, i32 3>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_38
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_39(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 3, i32 2, i32 1, i32 0>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_39
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_40(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 6, i32 4, i32 7, i32 5, i32 1, i32 0, i32 3, i32 2>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 6, i32 4, i32 7, i32 5, i32 1, i32 0, i32 3, i32 2>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_40
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_41(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 0, i32 1, i32 3, i32 2>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 0, i32 1, i32 3, i32 2>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_41
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK-NOT: shufpd
+; CHECK: ret
+
+
+define <8 x i16> @swizzle_42(<8 x i16> %v) {
+  %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 7, i32 6, i32 4, i32 5>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 7, i32 6, i32 4, i32 5>
+  ret <8 x i16> %2
+}
+; CHECK-LABEL: swizzle_42
+; Mask: [0,1,2,3,5,4,7,6]
+; CHECK: pshufhw $-79
+; CHECK-NOT: pshufb
+; CHECK: ret
+
+
diff --git a/test/CodeGen/X86/swizzle-avx2.ll b/test/CodeGen/X86/swizzle-avx2.ll
new file mode 100644
index 0000000..29dfa6c
--- /dev/null
+++ b/test/CodeGen/X86/swizzle-avx2.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s
+
+; Test that we correctly fold a shuffle that performs a swizzle of another
+; shuffle node according to the rule
+;  shuffle (shuffle (x, undef, M0), undef, M1) -> shuffle(x, undef, M2)
+;
+; We only do this if the resulting mask is legal to avoid introducing an
+; illegal shuffle that is expanded into a sub-optimal sequence of instructions
+; during lowering stage.
+
+; Check that we produce a single vector permute / shuffle in all cases.
+
+define <8 x i32> @swizzle_1(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 7, i32 5, i32 6, i32 4>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_1
+; CHECK: vpermd
+; CHECK-NOT: vpermd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_2(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_2
+; CHECK: vpshufd $78
+; CHECK-NOT: vpermd
+; CHECK-NOT: vpshufd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_3(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_3
+; CHECK: vpshufd $78
+; CHECK-NOT: vpermd
+; CHECK-NOT: vpshufd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_4(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 7, i32 5, i32 6, i32 3, i32 2, i32 0, i32 1>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 7, i32 5, i32 6, i32 3, i32 2, i32 0, i32 1>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_4
+; CHECK: vpermd
+; CHECK-NOT: vpermd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_5(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 0, i32 2, i32 1, i32 3>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 0, i32 2, i32 1, i32 3>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_5
+; CHECK: vpermd
+; CHECK-NOT: vpermd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_6(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 0, i32 4, i32 7, i32 6, i32 5>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 0, i32 4, i32 7, i32 6, i32 5>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_6
+; CHECK: vpermd
+; CHECK-NOT: vpermd
+; CHECK: ret
+
+
+define <8 x i32> @swizzle_7(<8 x i32> %v) {
+  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x i32> %2
+}
+; CHECK-LABEL: swizzle_7
+; CHECK: vpermd
+; CHECK-NOT: vpermd
+; CHECK: ret
+
+
diff --git a/test/CodeGen/X86/testb-je-fusion.ll b/test/CodeGen/X86/testb-je-fusion.ll
new file mode 100644
index 0000000..9e946ae
--- /dev/null
+++ b/test/CodeGen/X86/testb-je-fusion.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s
+
+; testb should be scheduled right before je to enable macro-fusion.
+
+; CHECK: testb $2, %{{[abcd]}}h
+; CHECK-NEXT: je
+
+define i32 @check_flag(i32 %flags, ...) nounwind {
+entry:
+  %and = and i32 %flags, 512
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  br label %if.end
+
+if.end:
+  %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %hasflag
+}
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 5f6e7a8..1a6c05d 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,8 +1,20 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
 ;CHECK-LABEL: foo1_8:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo1_8:
+;CHECK-WIDE:      vpmovzxbd %xmm0, %xmm1
+;CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1
+;CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1
+;CHECK-WIDE-NEXT: vpshufb {{.*}}, %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+;CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
+;CHECK-WIDE-NEXT: ret
 define <8 x float> @foo1_8(<8 x i8> %src) {
   %res = sitofp <8 x i8> %src to <8 x float>
   ret <8 x float> %res
@@ -11,6 +23,13 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
 ;CHECK-LABEL: foo1_4:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo1_4:
+;CHECK-WIDE:      vpmovzxbd %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
+;CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
+;CHECK-WIDE-NEXT: ret
 define <4 x float> @foo1_4(<4 x i8> %src) {
   %res = sitofp <4 x i8> %src to <4 x float>
   ret <4 x float> %res
@@ -19,6 +38,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
 ;CHECK-LABEL: foo2_8:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo2_8:
+;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}}
+;CHECK-WIDE: ret
 define <8 x float> @foo2_8(<8 x i8> %src) {
   %res = uitofp <8 x i8> %src to <8 x float>
   ret <8 x float> %res
@@ -27,6 +50,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) {
 ;CHECK-LABEL: foo2_4:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo2_4:
+;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}}
+;CHECK-WIDE: ret
 define <4 x float> @foo2_4(<4 x i8> %src) {
   %res = uitofp <4 x i8> %src to <4 x float>
   ret <4 x float> %res
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index a02e383..28f2a90 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
@@ -37,6 +38,23 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
   %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
   %2 = load <4 x float>* %1, align 16
+  %3 = trunc i64 %j to i32
+  %4 = extractelement <4 x float> %2, i32 %3
+  %5 = insertelement <4 x float> undef, float %4, i32 0
+  %6 = insertelement <4 x float> %5, float %4, i32 1
+  %7 = insertelement <4 x float> %6, float %4, i32 2
+  %8 = insertelement <4 x float> %7, float %4, i32 3
+  ret <4 x float> %8
+  
+; AVX-LABEL: load_extract_splat
+; AVX-NOT: rsp
+; AVX: vbroadcastss
+}
+
+; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
+define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
+  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
+  %2 = load <4 x float>* %1, align 16
   %3 = extractelement <4 x float> %2, i64 %j
   %4 = insertelement <4 x float> undef, float %3, i32 0
   %5 = insertelement <4 x float> %4, float %3, i32 1
@@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64
   %7 = insertelement <4 x float> %6, float %3, i32 3
   ret <4 x float> %7
   
-; AVX-LABEL: load_extract_splat
+; AVX-LABEL: load_extract_splat1
 ; AVX-NOT: movs
 ; AVX: vbroadcastss
 }
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
index f9e7c20..bc2c663 100644
--- a/test/CodeGen/X86/vec_split.ll
+++ b/test/CodeGen/X86/vec_split.ll
@@ -40,3 +40,36 @@ define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
   %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
   ret <32 x i16> %2
 }
+
+; PR19492
+define i128 @split128(<2 x i128> %a, <2 x i128> %b) {
+; SSE4-LABEL: split128:
+; SSE4: addq
+; SSE4: adcq
+; SSE4: addq
+; SSE4: adcq
+; SSE4: addq
+; SSE4: adcq
+; SSE4: ret
+; AVX1-LABEL: split128:
+; AVX1: addq
+; AVX1: adcq
+; AVX1: addq
+; AVX1: adcq
+; AVX1: addq
+; AVX1: adcq
+; AVX1: ret
+; AVX2-LABEL: split128:
+; AVX2: addq
+; AVX2: adcq
+; AVX2: addq
+; AVX2: adcq
+; AVX2: addq
+; AVX2: adcq
+; AVX2: ret
+  %add = add nsw <2 x i128> %a, %b
+  %rdx.shuf = shufflevector <2 x i128> %add, <2 x i128> undef, <2 x i32> <i32 undef, i32 0>
+  %bin.rdx = add <2 x i128> %add, %rdx.shuf
+  %e = extractelement <2 x i128> %bin.rdx, i32 1
+  ret i128 %e
+}
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index 9c68f44..3f7ee3a 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s
 ; RUN: opt -instsimplify -disable-output < %s
 
 ;CHECK-LABEL: AGEP0:
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index 4c30184..b6d43e9 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -8,7 +8,7 @@ define <4 x i32> @test1(<4 x i32> %a) {
 
 ; SSE41-LABEL: test1:
 ; SSE41: pmuludq
-; SSE41: pshufd	$57
+; SSE41: pshufd	$49
 ; SSE41: pmuludq
 ; SSE41: shufps	$-35
 ; SSE41: psubd
@@ -18,7 +18,7 @@ define <4 x i32> @test1(<4 x i32> %a) {
 
 ; AVX-LABEL: test1:
 ; AVX: vpmuludq
-; AVX: vpshufd	$57
+; AVX: vpshufd	$49
 ; AVX: vpmuludq
 ; AVX: vshufps	$-35
 ; AVX: vpsubd
@@ -32,11 +32,11 @@ define <8 x i32> @test2(<8 x i32> %a) {
   ret <8 x i32> %div
 
 ; AVX-LABEL: test2:
-; AVX: vpermd
+; AVX: vpbroadcastd
+; AVX: vpalignr $4
 ; AVX: vpmuludq
-; AVX: vshufps	$-35
 ; AVX: vpmuludq
-; AVX: vshufps	$-35
+; AVX: vpblendd $170
 ; AVX: vpsubd
 ; AVX: vpsrld $1
 ; AVX: vpadd
@@ -107,6 +107,12 @@ define <16 x i16> @test6(<16 x i16> %a) {
 define <16 x i8> @test7(<16 x i8> %a) {
   %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <16 x i8> %div
+
+; FIXME: scalarized
+; SSE41-LABEL: test7:
+; SSE41: pext
+; AVX-LABEL: test7:
+; AVX: pext
 }
 
 define <4 x i32> @test8(<4 x i32> %a) {
@@ -115,8 +121,8 @@ define <4 x i32> @test8(<4 x i32> %a) {
 
 ; SSE41-LABEL: test8:
 ; SSE41: pmuldq
-; SSE41: pshufd	$57
-; SSE41-NOT: pshufd	$57
+; SSE41: pshufd	$49
+; SSE41-NOT: pshufd	$49
 ; SSE41: pmuldq
 ; SSE41: shufps	$-35
 ; SSE41: pshufd	$-40
@@ -130,8 +136,8 @@ define <4 x i32> @test8(<4 x i32> %a) {
 ; SSE: pand
 ; SSE: paddd
 ; SSE: pmuludq
-; SSE: pshufd	$57
-; SSE-NOT: pshufd	$57
+; SSE: pshufd	$49
+; SSE-NOT: pshufd	$49
 ; SSE: pmuludq
 ; SSE: shufps	$-35
 ; SSE: pshufd	$-40
@@ -143,8 +149,8 @@ define <4 x i32> @test8(<4 x i32> %a) {
 
 ; AVX-LABEL: test8:
 ; AVX: vpmuldq
-; AVX: vpshufd	$57
-; AVX-NOT: vpshufd	$57
+; AVX: vpshufd	$49
+; AVX-NOT: vpshufd	$49
 ; AVX: vpmuldq
 ; AVX: vshufps	$-35
 ; AVX: vpshufd	$-40
@@ -159,12 +165,11 @@ define <8 x i32> @test9(<8 x i32> %a) {
   ret <8 x i32> %div
 
 ; AVX-LABEL: test9:
+; AVX: vpalignr $4
 ; AVX: vpbroadcastd
 ; AVX: vpmuldq
-; AVX: vshufps	$-35
 ; AVX: vpmuldq
-; AVX: vshufps	$-35
-; AVX: vpshufd	$-40
+; AVX: vpblendd $170
 ; AVX: vpadd
 ; AVX: vpsrld $31
 ; AVX: vpsrad $2
@@ -177,10 +182,10 @@ define <8 x i32> @test10(<8 x i32> %a) {
 
 ; AVX-LABEL: test10:
 ; AVX: vpbroadcastd
+; AVX: vpalignr $4
 ; AVX: vpmuludq
-; AVX: vshufps	$-35
 ; AVX: vpmuludq
-; AVX: vshufps	$-35
+; AVX: vpblendd $170
 ; AVX: vpsubd
 ; AVX: vpsrld $1
 ; AVX: vpadd
@@ -193,12 +198,11 @@ define <8 x i32> @test11(<8 x i32> %a) {
   ret <8 x i32> %rem
 
 ; AVX-LABEL: test11:
+; AVX: vpalignr $4
 ; AVX: vpbroadcastd
 ; AVX: vpmuldq
-; AVX: vshufps	$-35
 ; AVX: vpmuldq
-; AVX: vshufps	$-35
-; AVX: vpshufd	$-40
+; AVX: vpblendd $170
 ; AVX: vpadd
 ; AVX: vpsrld $31
 ; AVX: vpsrad $2
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
new file mode 100644
index 0000000..4da7e42
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -0,0 +1,196 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    punpcklwd %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
+; CHECK-SSE2:         punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
+; CHECK-SSE2:         pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    punpckhbw %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    packuswb %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
+; CHECK-SSE2:         pxor %xmm2, %xmm2
+; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    packuswb %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
+; CHECK-SSE2:         pxor %xmm2, %xmm2
+; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm3
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm4
+; CHECK-SSE2-NEXT:    punpckhbw %xmm2, %xmm4
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
+; CHECK-SSE2-NEXT:    punpckhbw %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; CHECK-SSE2-NEXT:    packuswb %xmm4, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
+; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle
+; CHECK-SSE2:         pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
+; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle
+; CHECK-SSE2:         pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
+; CHECK-SSE2-LABEL: @trunc_v4i32_shuffle
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pand
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    packuswb %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
new file mode 100644
index 0000000..78b4ee7
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_00
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_10
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_11
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_22
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_32
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_33
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2f64_00
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2f64_10
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2f64_11
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
+; FIXME: Should these use movapd + shufpd to remove a domain change at the cost
+;        of a mov?
+;
+; CHECK-SSE2-LABEL: @shuffle_v2f64_22
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2f64_32
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2f64_33
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
+  ret <2 x double> %shuffle
+}
+
+
+define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_02
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_02_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm2[0]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_03
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_03_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_12
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_12_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_13
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_13_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_20
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_20_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[0],xmm1[0]
+; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_21
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_21_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_30
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_30_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
+; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_31
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm0[1]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
+  ret <2 x i64> %shuffle
+}
+define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v2i64_31_copy
+; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll
new file mode 100644
index 0000000..7d496fa
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0001
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,0,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0020
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,0,2,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0300
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_1000
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[1,0,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_2200
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,2,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_3330
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[3,3,3,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_3210
+; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_0001
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_0020
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,2,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_0300
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,3,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_1000
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[1,0,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_2200
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[2,2,0,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_3330
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[3,3,3,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4f32_3210
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[3,2,1,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0124
+; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0142
+; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,2]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0412
+; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2]
+; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_4012
+; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
+; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0145
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0451
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,3,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_4501
+; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x i32> %shuffle
+}
+define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_4015
+; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
+; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[2,0,1,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
+  ret <4 x i32> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
new file mode 100644
index 0000000..5d1922a
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -0,0 +1,493 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_01012323
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_67452301
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_456789AB
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_00000000
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_00004444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_31206745
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_44440000
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_75643120
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_10545410
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_54105410
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_54101054
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_04400440
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_40044004
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_26405173
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_20645173
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_26401375
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_00444444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_44004444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_04404444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_04400000
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,0,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_04404567
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_0X444444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_44X04444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_X4404444
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_0127XXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXX4563
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_4563XXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_01274563
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_45630127
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,1,2,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,0,1,3]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,4]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_08192a3b
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d2e3f
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_4c5d6e7f
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_48596a7b
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_08196e7f
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d6879
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_109832ba
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; CHECK-SSE2-NEXT:    punpcklqdq %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_8091a2b3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    punpcklwd %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_c4d5e6f7
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm2 = xmm0[2,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_0213cedf
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    punpcklqdq %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_032dXXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_XXXcXXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXcXXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm1[2,1,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_012dXXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXXcde3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_cde3XXXX
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v8i16_012dcde3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm2 = xmm0[0,1,2,1]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm3 = xmm1[2,1,2,3]
+; CHECK-SSE2-NEXT:    punpckhwd %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
+; CHECK-SSE2-NEXT:    punpcklwd %xmm3, %xmm0
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; CHECK-SSE2-NEXT:    punpcklqdq %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
+  ret <8 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
new file mode 100644
index 0000000..e60ecb7
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
+declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
+declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
+
+define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd1
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
+  %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27) 
+  ret <4 x i32> %c
+}
+
+define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd2
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
+  %b.cast = bitcast <4 x i32> %b to <8 x i16>
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
+  %c.cast = bitcast <8 x i16> %c to <4 x i32>
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 
+  ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
+  %b.cast = bitcast <4 x i32> %b to <8 x i16>
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
+  %c.cast = bitcast <8 x i16> %c to <4 x i32>
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 
+  ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd4
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31) 
+  %b.cast = bitcast <4 x i32> %b to <8 x i16>
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
+  %c.cast = bitcast <8 x i16> %c to <4 x i32>
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31) 
+  ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd5
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76) 
+  %b.cast = bitcast <4 x i32> %b to <8 x i16>
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
+  %c.cast = bitcast <8 x i16> %c to <4 x i32>
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
+  ret <4 x i32> %d
+}
+
+define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufd6
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufd $0
+; CHECK-SSE2-NEXT:    retq
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
+  %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
+  ret <4 x i32> %c
+}
+
+define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw1
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
+  ret <8 x i16> %c
+}
+
+define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw2
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
+define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshuflw3
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
+define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
+; CHECK-SSE2-LABEL: @combine_pshufhw1
+; CHECK-SSE2:       # BB#0:
+; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-SSE2-NEXT:    retq
+  %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
+  %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 
+  ret <8 x i16> %d
+}
+
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index 0cf03fc..42cf06a 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -262,3 +262,17 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK: movsd
 ; CHECK: ret
 
+define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
+; CHECK-LABEL: select_of_shuffles_0
+; CHECK-DAG: movlhps %xmm2, [[REGA:%xmm[0-9]+]]
+; CHECK-DAG: movlhps %xmm3, [[REGB:%xmm[0-9]+]]
+; CHECK: subps [[REGB]], [[REGA]]
+  %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1
+  %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4
+  %7 = fsub <4 x float> %3, %6
+  ret <4 x float> %7
+}
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 1bc06a7..19b84f1 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,8 +1,9 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: psraw
-; CHECK: psraw
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
 define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+; CHECK-LABEL: update:
+; CHECK-WIDE-LABEL: update:
 entry:
 	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
 	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
@@ -44,6 +45,26 @@ forbody:		; preds = %forcond
 	%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >		; <<8 x i8>> [#uses=1]
 	store <8 x i8> %shr, <8 x i8>* %arrayidx10
 	br label %forinc
+; CHECK: %forbody
+; CHECK:      pmovzxbw
+; CHECK-NEXT: paddw
+; CHECK-NEXT: psllw $8
+; CHECK-NEXT: psraw $8
+; CHECK-NEXT: psraw $2
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: movlpd
+;
+; FIXME: We shouldn't require both a movd and an insert.
+; CHECK-WIDE: %forbody
+; CHECK-WIDE:      movd
+; CHECK-WIDE-NEXT: pinsrd
+; CHECK-WIDE-NEXT: paddb
+; CHECK-WIDE-NEXT: psrlw $2
+; CHECK-WIDE-NEXT: pand
+; CHECK-WIDE-NEXT: pxor
+; CHECK-WIDE-NEXT: psubb
+; CHECK-WIDE-NEXT: pextrd
+; CHECK-WIDE-NEXT: movd
 
 forinc:		; preds = %forbody
 	%tmp15 = load i32* %i		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 7c06ad8..46d8dd7 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,9 +1,13 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
-; CHECK: movd
 
 ; Test bit convert that requires widening in the operand.
 
 define i32 @return_v2hi() nounwind {
+; CHECK-LABEL: @return_v2hi
+; CHECK:      pushl
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: popl
+; CHECK-NEXT: ret
 entry:
 	%retval12 = bitcast <2 x i16> zeroinitializer to i32		; <i32> [#uses=1]
 	ret i32 %retval12
diff --git a/test/CodeGen/X86/widen_conversions.ll b/test/CodeGen/X86/widen_conversions.ll
new file mode 100644
index 0000000..522ab47
--- /dev/null
+++ b/test/CodeGen/X86/widen_conversions.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) {
+; CHECK-LABEL: zext_v4i8_to_v4i32:
+; 
+; CHECK:      movd (%{{.*}}), %[[X:xmm[0-9]+]]
+; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]]
+; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
+; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
+; CHECK-NEXT: ret
+
+  %val = load <4 x i8>* %ptr
+  %ext = zext <4 x i8> %val to <4 x i32>
+  ret <4 x i32> %ext
+}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 803402b..a355b75 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -33,7 +33,9 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK-LABEL: shuf3:
-; CHECK: shufps
+; CHECK-NOT: movlhps
+; CHECK-NOT: shufps
+; CHECK: pshufd
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll
new file mode 100644
index 0000000..f1f874e
--- /dev/null
+++ b/test/CodeGen/X86/win64_eh.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
+
+; Check function without prolog
+define void @foo0() uwtable {
+entry:
+  ret void
+}
+; WIN64-LABEL: foo0:
+; WIN64: .seh_proc foo0
+; WIN64: .seh_endprologue
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a small stack allocation
+define void @foo1() uwtable {
+entry:
+  %baz = alloca [2000 x i16], align 2
+  ret void
+}
+; WIN64-LABEL: foo1:
+; WIN64: .seh_proc foo1
+; WIN64: subq $4000, %rsp
+; WIN64: .seh_stackalloc 4000
+; WIN64: .seh_endprologue
+; WIN64: addq $4000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a stack allocation requiring call to __chkstk/___chkstk_ms
+define void @foo2() uwtable {
+entry:
+  %baz = alloca [4000 x i16], align 2
+  ret void
+}
+; WIN64-LABEL: foo2:
+; WIN64: .seh_proc foo2
+; WIN64: movabsq $8000, %rax
+; WIN64: callq {{__chkstk|___chkstk_ms}}
+; WIN64: subq %rax, %rsp
+; WIN64: .seh_stackalloc 8000
+; WIN64: .seh_endprologue
+; WIN64: addq $8000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Checks stack push
+define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
+entry:
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  %d = alloca i32
+  %e = alloca i32
+  %f = alloca i32
+  store i32 %a_arg, i32* %a
+  store i32 %b_arg, i32* %b
+  store i32 %c_arg, i32* %c
+  store i32 %d_arg, i32* %d
+  store i32 %e_arg, i32* %e
+  store i32 %f_arg, i32* %f
+  %tmp = load i32* %a
+  %tmp1 = mul i32 %tmp, 2
+  %tmp2 = load i32* %b
+  %tmp3 = mul i32 %tmp2, 3
+  %tmp4 = add i32 %tmp1, %tmp3
+  %tmp5 = load i32* %c
+  %tmp6 = mul i32 %tmp5, 5
+  %tmp7 = add i32 %tmp4, %tmp6
+  %tmp8 = load i32* %d
+  %tmp9 = mul i32 %tmp8, 7
+  %tmp10 = add i32 %tmp7, %tmp9
+  %tmp11 = load i32* %e
+  %tmp12 = mul i32 %tmp11, 11
+  %tmp13 = add i32 %tmp10, %tmp12
+  %tmp14 = load i32* %f
+  %tmp15 = mul i32 %tmp14, 13
+  %tmp16 = add i32 %tmp13, %tmp15
+  ret i32 %tmp16
+}
+; WIN64-LABEL: foo3:
+; WIN64: .seh_proc foo3
+; WIN64: pushq %rsi
+; WIN64: .seh_pushreg 6
+; WIN64: subq $24, %rsp
+; WIN64: .seh_stackalloc 24
+; WIN64: .seh_endprologue
+; WIN64: addq $24, %rsp
+; WIN64: popq %rsi
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Check emission of eh handler and handler data
+declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*)
+declare void @_d_eh_resume_unwind(i8*)
+
+declare i32 @bar()
+
+define i32 @foo4() #0 {
+entry:
+  %step = alloca i32, align 4
+  store i32 0, i32* %step
+  %tmp = load i32* %step
+
+  %tmp1 = invoke i32 @bar()
+          to label %finally unwind label %landingpad
+
+finally:
+  store i32 1, i32* %step
+  br label %endtryfinally
+
+landingpad:
+  %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality
+          cleanup
+  %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0
+  store i32 2, i32* %step
+  call void @_d_eh_resume_unwind(i8* %tmp3)
+  unreachable
+
+endtryfinally:
+  %tmp10 = load i32* %step
+  ret i32 %tmp10
+}
+; WIN64-LABEL: foo4:
+; WIN64: .seh_proc foo4
+; WIN64: .seh_handler _d_eh_personality, @unwind, @except
+; WIN64: subq $56, %rsp
+; WIN64: .seh_stackalloc 56
+; WIN64: .seh_endprologue
+; WIN64: addq $56, %rsp
+; WIN64: ret
+; WIN64: .seh_handlerdata
+; WIN64: .seh_endproc
+
+
+; Check stack re-alignment and xmm spilling
+define void @foo5() uwtable {
+entry:
+  %s = alloca i32, align 64
+  call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"()
+  ret void
+}
+; WIN64-LABEL: foo5:
+; WIN64: .seh_proc foo5
+; WIN64: pushq %rbp
+; WIN64: .seh_pushreg 5
+; WIN64: movq  %rsp, %rbp
+; WIN64: pushq %rdi
+; WIN64: .seh_pushreg 7
+; WIN64: pushq %rbx
+; WIN64: .seh_pushreg 3
+; WIN64: andq  $-64, %rsp
+; WIN64: subq  $128, %rsp
+; WIN64: .seh_stackalloc 48
+; WIN64: .seh_setframe 5, 64
+; WIN64: movaps  %xmm7, -32(%rbp)        # 16-byte Spill
+; WIN64: movaps  %xmm6, -48(%rbp)        # 16-byte Spill
+; WIN64: .seh_savexmm 6, 16
+; WIN64: .seh_savexmm 7, 32
+; WIN64: .seh_endprologue
+; WIN64: movaps  -48(%rbp), %xmm6        # 16-byte Reload
+; WIN64: movaps  -32(%rbp), %xmm7        # 16-byte Reload
+; WIN64: leaq  -16(%rbp), %rsp
+; WIN64: popq  %rbx
+; WIN64: popq  %rdi
+; WIN64: popq  %rbp
+; WIN64: retq
+; WIN64: .seh_endproc
diff --git a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
index 5d7a10b..08d0257 100644
--- a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
+++ b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
@@ -3,7 +3,7 @@
 ; clang -Oz -c test1.cpp -emit-llvm -S -o
 ; Verify that we generate shld insruction when we are optimizing for size,
 ; even for X86_64 processors that are known to have poor latency double 
-; precision shift instuctions.
+; precision shift instructions.
 ; uint64_t lshift10(uint64_t a, uint64_t b)
 ; {
 ;     return (a << 10) | (b >> 54);
@@ -25,7 +25,7 @@ attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"
 ; clang -Os -c test2.cpp -emit-llvm -S
 ; Verify that we generate shld insruction when we are optimizing for size,
 ; even for X86_64 processors that are known to have poor latency double
-; precision shift instuctions.
+; precision shift instructions.
 ; uint64_t lshift11(uint64_t a, uint64_t b)
 ; {
 ;     return (a << 11) | (b >> 53);
@@ -46,7 +46,7 @@ attributes #1 = { nounwind optsize readnone uwtable "less-precise-fpmad"="false"
 ; clang -O2 -c test2.cpp -emit-llvm -S
 ; Verify that we do not generate shld insruction when we are not optimizing
 ; for size for X86_64 processors that are known to have poor latency double
-; precision shift instuctions.
+; precision shift instructions.
 ; uint64_t lshift12(uint64_t a, uint64_t b)
 ; {
 ;     return (a << 12) | (b >> 52);
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
deleted file mode 100644
index 7d36a7a..0000000
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-; CHECK: stack_end_address
-; CHECK: {{movq.+rbp.*$}}
-; CHECK: {{movq.+rbp.*$}}
-; CHECK: ret
-
-define i64* @stack_end_address() nounwind  {
-entry:
-	tail call i8* @llvm.frameaddress( i32 0 )
-	bitcast i8* %0 to i64*
-	ret i64* %1
-}
-
-declare i8* @llvm.frameaddress(i32) nounwind readnone 
diff --git a/test/CodeGen/X86/x86-64-static-relo-movl.ll b/test/CodeGen/X86/x86-64-static-relo-movl.ll
new file mode 100644
index 0000000..71e52bb
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-static-relo-movl.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=x86_64-pc-win32-macho -relocation-model=static -O0 < %s | FileCheck %s
+
+; Ensure that we don't generate a movl and not a lea for a static relocation
+; when compiling for 64 bit.
+
+%struct.MatchInfo = type [64 x i64]
+
+@NO_MATCH = internal constant %struct.MatchInfo zeroinitializer, align 8
+
+define void @setup() {
+  %pending = alloca %struct.MatchInfo, align 8
+  %t = bitcast %struct.MatchInfo* %pending to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t, i8* bitcast (%struct.MatchInfo* @NO_MATCH to i8*), i64 512, i32 8, i1 false)
+  %u = getelementptr inbounds %struct.MatchInfo* %pending, i32 0, i32 2
+  %v = load i64* %u, align 8
+  br label %done
+done:
+  ret void
+
+  ; CHECK: movabsq $_NO_MATCH, {{.*}}
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/CodeGen/X86/x86-frameaddr.ll b/test/CodeGen/X86/x86-frameaddr.ll
deleted file mode 100644
index d595874..0000000
--- a/test/CodeGen/X86/x86-frameaddr.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 | grep mov | grep ebp
-
-define i8* @t() nounwind {
-entry:
-	%0 = tail call i8* @llvm.frameaddress(i32 0)
-	ret i8* %0
-}
-
-declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/X86/x86-frameaddr2.ll b/test/CodeGen/X86/x86-frameaddr2.ll
deleted file mode 100644
index c509115..0000000
--- a/test/CodeGen/X86/x86-frameaddr2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 | grep mov | count 3
-
-define i8* @t() nounwind {
-entry:
-	%0 = tail call i8* @llvm.frameaddress(i32 2)
-	ret i8* %0
-}
-
-declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/X86/x86-upgrade-avx-vbroadcast.ll b/test/CodeGen/X86/x86-upgrade-avx-vbroadcast.ll
new file mode 100644
index 0000000..d885f1c
--- /dev/null
+++ b/test/CodeGen/X86/x86-upgrade-avx-vbroadcast.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mattr=+avx < %s | FileCheck %s
+
+; Check that we properly upgrade the AVX vbroadcast intrinsics to IR.  The
+; expectation is that we should still get the original instruction back that
+; maps to the intrinsic.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; CHECK-LABEL: test_mm_broadcast_ss:
+define <4 x float> @test_mm_broadcast_ss(float* readonly %__a){
+entry:
+  %0 = bitcast float* %__a to i8*
+; CHECK: vbroadcastss (%{{.*}}), %xmm
+  %1 = tail call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %0)
+  ret <4 x float> %1
+}
+
+; CHECK-LABEL: test_mm256_broadcast_sd:
+define <4 x double> @test_mm256_broadcast_sd(double* readonly %__a) {
+entry:
+  %0 = bitcast double* %__a to i8*
+; CHECK: vbroadcastsd (%{{.*}}), %ymm
+  %1 = tail call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %0)
+  ret <4 x double> %1
+}
+
+; CHECK-LABEL: test_mm256_broadcast_ss:
+define <8 x float> @test_mm256_broadcast_ss(float* readonly %__a) {
+entry:
+  %0 = bitcast float* %__a to i8*
+; CHECK: vbroadcastss (%{{.*}}), %ymm
+  %1 = tail call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %0)
+  ret <8 x float> %1
+}
+
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*)
+
+declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*)
+
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*)
diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll
new file mode 100644
index 0000000..f078631
--- /dev/null
+++ b/test/CodeGen/X86/xaluo.ll
@@ -0,0 +1,743 @@
+; RUN: llc -mtriple=x86_64-darwin-unknown < %s                             | FileCheck %s --check-prefix=DAG
+; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=FAST
+; RUN: llc -mtriple=x86_64-darwin-unknown < %s                             | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+;
+; Get the actual value of the overflow bit.
+;
+; SADDO reg, reg
+define zeroext i1 @saddo.i8(i8 signext %v1, i8 signext %v2, i8* %res) {
+entry:
+; DAG-LABEL:    saddo.i8
+; DAG:          addb %sil, %dil
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i8
+; FAST:         addb %sil, %dil
+; FAST-NEXT:    seto %al
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL:    saddo.i16
+; DAG:          addw %si, %di
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i16
+; FAST:         addw %si, %di
+; FAST-NEXT:    seto %al
+  %t = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    saddo.i32
+; DAG:          addl %esi, %edi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i32
+; FAST:         addl %esi, %edi
+; FAST-NEXT:    seto %al
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64
+; DAG:          addq %rsi, %rdi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i64
+; FAST:         addq %rsi, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; SADDO reg, imm | imm, reg
+; FIXME: INC isn't supported in FastISel yet
+define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm1
+; DAG:          incq %rdi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i64imm1
+; FAST:         addq $1, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 1)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; FIXME: DAG doesn't optimize immediates on the LHS.
+define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm2
+; DAG:          mov
+; DAG-NEXT:     addq
+; DAG-NEXT:     seto
+; FAST-LABEL:   saddo.i64imm2
+; FAST:         addq $1, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 1, i64 %v1)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; Check boundary conditions for large immediates.
+define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm3
+; DAG:          addq $-2147483648, %rdi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   saddo.i64imm3
+; FAST:         addq $-2147483648, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm4
+; DAG:          movabsq $-21474836489, %[[REG:[a-z]+]]
+; DAG-NEXT:     addq %rdi, %[[REG]]
+; DAG-NEXT:     seto
+; FAST-LABEL:   saddo.i64imm4
+; FAST:         movabsq $-21474836489, %[[REG:[a-z]+]]
+; FAST-NEXT:    addq %rdi, %[[REG]]
+; FAST-NEXT:    seto
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -21474836489)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm5
+; DAG:          addq $2147483647, %rdi
+; DAG-NEXT:     seto
+; FAST-LABEL:   saddo.i64imm5
+; FAST:         addq $2147483647, %rdi
+; FAST-NEXT:    seto
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483647)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; TODO: FastISel shouldn't use movabsq.
+define zeroext i1 @saddo.i64imm6(i64 %v1, i64* %res) {
+entry:
+; DAG-LABEL:    saddo.i64imm6
+; DAG:          movl $2147483648, %ecx
+; DAG:          addq %rdi, %rcx
+; DAG-NEXT:     seto
+; FAST-LABEL:   saddo.i64imm6
+; FAST:         movabsq $2147483648, %[[REG:[a-z]+]]
+; FAST:         addq %rdi, %[[REG]]
+; FAST-NEXT:     seto
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483648)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; UADDO
+define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    uaddo.i32
+; DAG:          addl %esi, %edi
+; DAG-NEXT:     setb %al
+; FAST-LABEL:   uaddo.i32
+; FAST:         addl %esi, %edi
+; FAST-NEXT:    setb %al
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    uaddo.i64
+; DAG:          addq %rsi, %rdi
+; DAG-NEXT:     setb %al
+; FAST-LABEL:   uaddo.i64
+; FAST:         addq %rsi, %rdi
+; FAST-NEXT:    setb %al
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; SSUBO
+define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    ssubo.i32
+; DAG:          subl %esi, %edi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   ssubo.i32
+; FAST:         subl %esi, %edi
+; FAST-NEXT:    seto %al
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    ssubo.i64
+; DAG:          subq %rsi, %rdi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   ssubo.i64
+; FAST:         subq %rsi, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; USUBO
+define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    usubo.i32
+; DAG:          subl %esi, %edi
+; DAG-NEXT:     setb %al
+; FAST-LABEL:   usubo.i32
+; FAST:         subl %esi, %edi
+; FAST-NEXT:    setb %al
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    usubo.i64
+; DAG:          subq %rsi, %rdi
+; DAG-NEXT:     setb %al
+; FAST-LABEL:   usubo.i64
+; FAST:         subq %rsi, %rdi
+; FAST-NEXT:    setb %al
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; SMULO
+define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL:   smulo.i8
+; FAST:         movb %dil, %al
+; FAST-NEXT:    imulb %sil
+; FAST-NEXT:    seto %cl
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL:    smulo.i16
+; DAG:          imulw %si, %di
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   smulo.i16
+; FAST:         imulw %si, %di
+; FAST-NEXT:    seto %al
+  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    smulo.i32
+; DAG:          imull %esi, %edi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   smulo.i32
+; FAST:         imull %esi, %edi
+; FAST-NEXT:    seto %al
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    smulo.i64
+; DAG:          imulq %rsi, %rdi
+; DAG-NEXT:     seto %al
+; FAST-LABEL:   smulo.i64
+; FAST:         imulq %rsi, %rdi
+; FAST-NEXT:    seto %al
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+; UMULO
+define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
+entry:
+; FAST-LABEL:   umulo.i8
+; FAST:         movb %dil, %al
+; FAST-NEXT:    mulb %sil
+; FAST-NEXT:    seto %cl
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
+entry:
+; DAG-LABEL:    umulo.i16
+; DAG:          mulw %si
+; DAG-NEXT:     seto
+; FAST-LABEL:   umulo.i16
+; FAST:         mulw %si
+; FAST-NEXT:    seto
+  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; DAG-LABEL:    umulo.i32
+; DAG:          mull %esi
+; DAG-NEXT:     seto
+; FAST-LABEL:   umulo.i32
+; FAST:         mull %esi
+; FAST-NEXT:    seto
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; DAG-LABEL:    umulo.i64
+; DAG:          mulq %rsi
+; DAG-NEXT:     seto
+; FAST-LABEL:   umulo.i64
+; FAST:         mulq %rsi
+; FAST-NEXT:    seto
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+;
+; Check the use of the overflow bit in combination with a select instruction.
+;
+define i32 @saddo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    saddo.select.i32
+; CHECK:          addl   %esi, %eax
+; CHECK-NEXT:     cmovol %edi, %esi
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    saddo.select.i64
+; CHECK:          addq   %rsi, %rax
+; CHECK-NEXT:     cmovoq %rdi, %rsi
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    uaddo.select.i32
+; CHECK:          addl   %esi, %eax
+; CHECK-NEXT:     cmovbl %edi, %esi
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    uaddo.select.i64
+; CHECK:          addq   %rsi, %rax
+; CHECK-NEXT:     cmovbq %rdi, %rsi
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    ssubo.select.i32
+; CHECK:          cmpl   %esi, %edi
+; CHECK-NEXT:     cmovol %edi, %esi
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    ssubo.select.i64
+; CHECK:          cmpq   %rsi, %rdi
+; CHECK-NEXT:     cmovoq %rdi, %rsi
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    usubo.select.i32
+; CHECK:          cmpl   %esi, %edi
+; CHECK-NEXT:     cmovbl %edi, %esi
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    usubo.select.i64
+; CHECK:          cmpq   %rsi, %rdi
+; CHECK-NEXT:     cmovbq %rdi, %rsi
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    smulo.select.i32
+; CHECK:          imull  %esi, %eax
+; CHECK-NEXT:     cmovol %edi, %esi
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    smulo.select.i64
+; CHECK:          imulq  %rsi, %rax
+; CHECK-NEXT:     cmovoq %rdi, %rsi
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    umulo.select.i32
+; CHECK:          mull   %esi
+; CHECK-NEXT:     cmovol %edi, %esi
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    umulo.select.i64
+; CHECK:          mulq   %rsi
+; CHECK-NEXT:     cmovoq %rdi, %rsi
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+
+;
+; Check the use of the overflow bit in combination with a branch instruction.
+;
+define zeroext i1 @saddo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    saddo.br.i32
+; CHECK:          addl   %esi, %edi
+; CHECK-NEXT:     jo
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    saddo.br.i64
+; CHECK:          addq   %rsi, %rdi
+; CHECK-NEXT:     jo
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    uaddo.br.i32
+; CHECK:          addl   %esi, %edi
+; CHECK-NEXT:     jb
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    uaddo.br.i64
+; CHECK:          addq   %rsi, %rdi
+; CHECK-NEXT:     jb
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    ssubo.br.i32
+; CHECK:          cmpl   %esi, %edi
+; CHECK-NEXT:     jo
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    ssubo.br.i64
+; CHECK:          cmpq   %rsi, %rdi
+; CHECK-NEXT:     jo
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    usubo.br.i32
+; CHECK:          cmpl   %esi, %edi
+; CHECK-NEXT:     jb
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    usubo.br.i64
+; CHECK:          cmpq   %rsi, %rdi
+; CHECK-NEXT:     jb
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    smulo.br.i32
+; CHECK:          imull  %esi, %edi
+; CHECK-NEXT:     jo
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    smulo.br.i64
+; CHECK:          imulq  %rsi, %rdi
+; CHECK-NEXT:     jo
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:    umulo.br.i32
+; CHECK:          mull  %esi
+; CHECK-NEXT:     jo
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:    umulo.br.i64
+; CHECK:          mulq  %rsi
+; CHECK-NEXT:     jo
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue, !prof !0
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+declare {i8,  i1} @llvm.sadd.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8,  i1} @llvm.umul.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/XCore/dwarf_debug.ll b/test/CodeGen/XCore/dwarf_debug.ll
new file mode 100644
index 0000000..2f4b231
--- /dev/null
+++ b/test/CodeGen/XCore/dwarf_debug.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=xcore-unknown-unknown -O0 | FileCheck %s
+
+; target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"
+; target triple = "xcore"
+
+; CHECK-LABEL: f
+; CHECK: entsp 2
+; ...the prologue...
+; CHECK: .loc 1 2 0 prologue_end      # :2:0
+; CHECK: add r0, r0, 1
+; CHECK: retsp 2
+define i32 @f(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !11), !dbg !12
+  %0 = load i32* %a.addr, align 4, !dbg !12
+  %add = add nsw i32 %0, 1, !dbg !12
+  ret i32 %add, !dbg !12
+}
+
+declare void @llvm.dbg.declare(metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1}
+!1 = metadata !{metadata !"", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @f, null, null, metadata !2, i32 2}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null}
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777218, metadata !8, i32 0, i32 0}
+!12 = metadata !{i32 2, i32 0, metadata !4, null}
+
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index 3e84c1b..0b947bb 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index c78b8b8..65907d6 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o /dev/null
-; Here variable bar is optimzied away. Do not trip over while trying to generate debug info.
+; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
 
 
 define i32 @foo() nounwind uwtable readnone ssp {
diff --git a/test/DebugInfo/2010-01-19-DbgScope.ll b/test/DebugInfo/2010-01-19-DbgScope.ll
deleted file mode 100644
index 1a7e378..0000000
--- a/test/DebugInfo/2010-01-19-DbgScope.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -O0 < %s -o /dev/null
-; Ignore unreachable scopes.
-declare void @foo(i32) noreturn
-
-define i32 @bar() nounwind ssp {
-entry:
-  br i1 undef, label %bb, label %bb11, !dbg !0
-
-bb:                                               ; preds = %entry
-  call void @foo(i32 0) noreturn nounwind, !dbg !7
-  unreachable, !dbg !7
-
-bb11:                                             ; preds = %entry
-  ret i32 1, !dbg !11
-}
-
-!llvm.dbg.cu = !{!3}
-!llvm.module.flags = !{!15}
-
-!0 = metadata !{i32 8647, i32 0, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !12, metadata !2, i32 0, i32 0, i32 0}          ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, null, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, metadata !12, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 8648, i32 0, metadata !8, null}
-!8 = metadata !{i32 458763, metadata !12, metadata !9, i32 0, i32 0, i32 0}          ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 458763, metadata !12, metadata !10, i32 0, i32 0, i32 0}         ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, null, metadata !3, metadata !"bar2", metadata !"bar2", metadata !"bar2", i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 8652, i32 0, metadata !1, null}
-!12 = metadata !{metadata !"c-parser.c", metadata !"llvmgcc"}
-!13 = metadata !{i32 0}
-!14 = metadata !{metadata !2}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/eh_frame.s b/test/DebugInfo/AArch64/eh_frame.s
index d8d6b6d..12a5896 100644
--- a/test/DebugInfo/AArch64/eh_frame.s
+++ b/test/DebugInfo/AArch64/eh_frame.s
@@ -17,7 +17,7 @@ foo:
 // Output is:
 
 // CHECK: Contents of section .eh_frame:
-// CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+// CHECK-NEXT: 0000 10000000 00000000 037a5200 017c1e01  .........zR..|..
 // CHECK-NEXT: 0010 1b0c1f00 10000000 18000000 00000000  ................
 
 
@@ -30,7 +30,7 @@ foo:
 // -------------------
 // 10000000: length of first CIE = 0x10
 // 00000000: This is a CIE
-// 01: version = 0x1
+// 03: version = 0x3
 // 7a 52 00: augmentation string "zR" -- pointer format is specified
 // 01: code alignment factor 1
 // 7c: data alignment factor -4
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
index d35f2a2..51d6bf8 100644
--- a/test/DebugInfo/AArch64/eh_frame_personality.ll
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -16,7 +16,7 @@ clean:
 }
 
 ; CHECK: Contents of section .eh_frame:
-; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0000 1c000000 00000000 037a504c 5200017c  .........zPLR..|
 ; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
 
 ; Don't really care about the rest:
@@ -33,7 +33,7 @@ clean:
 ; ----------
 ; 1c000000: Length = 0x1c
 ; 00000000: This is a CIE
-; 01: Version 1
+; 03: Version 3
 ; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
 ; 01: Code alignment factor 1
 ; 78: Data alignment factor: -8
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
index a75a42b..cec29af 100644
--- a/test/DebugInfo/AArch64/lit.local.cfg
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/DebugInfo/AArch64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
index 0023c3d..0e336f7 100644
--- a/test/DebugInfo/AArch64/struct_by_value.ll
+++ b/test/DebugInfo/AArch64/struct_by_value.ll
@@ -1,7 +1,9 @@
 ; A by-value struct is a register-indirect value (breg).
 ; RUN: llc %s -filetype=asm -o - | FileCheck %s
 
-; CHECK: DW_OP_breg0
+; CHECK: DW_AT_location
+; CHECK-NEXT: .byte 112
+; 112 = 0x70 = DW_OP_breg0
 
 ; rdar://problem/13658587
 ;
diff --git a/test/DebugInfo/ARM/lit.local.cfg b/test/DebugInfo/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/DebugInfo/ARM/lit.local.cfg
+++ b/test/DebugInfo/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/DebugInfo/COFF/lit.local.cfg b/test/DebugInfo/COFF/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/DebugInfo/COFF/lit.local.cfg
+++ b/test/DebugInfo/COFF/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/DebugInfo/Inputs/arange-overlap.cc b/test/DebugInfo/Inputs/arange-overlap.cc
new file mode 100644
index 0000000..82e3f12
--- /dev/null
+++ b/test/DebugInfo/Inputs/arange-overlap.cc
@@ -0,0 +1,26 @@
+void call();
+
+struct S {
+  static void foo() { call(); call(); }
+  static void bar() { call(); call(); }
+  static void baz() {}
+};
+
+#ifdef FILE1
+# define FUNC_NAME func1
+# define FUNC_BODY \
+    S::foo(); S::bar(); S::baz();
+#else
+# define FUNC_NAME func2
+# define FUNC_BODY \
+    S::bar();
+#endif
+
+void FUNC_NAME() {
+  FUNC_BODY
+}
+
+// Build instructions:
+// $ clang -g -fPIC -c -DFILE1 arange-overlap.cc -o obj1.o
+// $ clang -g -fPIC -c arange-overlap.cc -o obj2.o
+// $ clang -shared obj1.o obj2.o -o <output>
diff --git a/test/DebugInfo/Inputs/arange-overlap.elf-x86_64 b/test/DebugInfo/Inputs/arange-overlap.elf-x86_64
new file mode 100755
index 0000000..075e9c2
--- /dev/null
+++ b/test/DebugInfo/Inputs/arange-overlap.elf-x86_64
diff --git a/test/DebugInfo/Inputs/fission-ranges.cc b/test/DebugInfo/Inputs/fission-ranges.cc
new file mode 100644
index 0000000..a585bf9
--- /dev/null
+++ b/test/DebugInfo/Inputs/fission-ranges.cc
@@ -0,0 +1,17 @@
+static inline int inlined_f() {
+  volatile int x = 2;
+  return x;
+}
+
+int main() {
+  return inlined_f();
+}
+
+// Build instructions:
+// $ mkdir /tmp/dbginfo
+// $ cp fission-ranges.cc /tmp/dbginfo/
+// $ cd /tmp/dbginfo
+// $ gcc -gsplit-dwarf -O2 -fPIC fission-ranges.cc -c -o obj2.o
+// $ clang -gsplit-dwarf -O2 -fsanitize=address -fPIC -Dmain=foo fission-ranges.cc -c -o obj1.o
+// $ gcc obj1.o obj2.o -shared -o <output>
+// $ objcopy --remove-section=.debug_aranges <output>
diff --git a/test/DebugInfo/Inputs/fission-ranges.elf-x86_64 b/test/DebugInfo/Inputs/fission-ranges.elf-x86_64
new file mode 100755
index 0000000..3d2fd79
--- /dev/null
+++ b/test/DebugInfo/Inputs/fission-ranges.elf-x86_64
diff --git a/test/DebugInfo/Mips/lit.local.cfg b/test/DebugInfo/Mips/lit.local.cfg
index 88262fb..7d12f7a 100644
--- a/test/DebugInfo/Mips/lit.local.cfg
+++ b/test/DebugInfo/Mips/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
diff --git a/test/DebugInfo/PR20038.ll b/test/DebugInfo/PR20038.ll
new file mode 100644
index 0000000..61145e5
--- /dev/null
+++ b/test/DebugInfo/PR20038.ll
@@ -0,0 +1,168 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; IR generated from clang -O0 with:
+; struct C {
+;   ~C();
+; };
+; extern bool b;
+; void fun4() { b && (C(), 1); }
+; __attribute__((always_inline)) C::~C() { }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "C"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[C_DTOR_DECL:.*]]:  DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "~C"
+
+; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1CD1Ev"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[D1_THIS_ABS:.*]]:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "this"
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "fun4"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:       DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:         DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]}
+
+; FIXME: D2 is actually inlined into D1 but doesn't show up here, possibly due
+; to there being no work in D2 (calling another member function from the dtor
+; causes D2 to show up, calling a free function doesn't).
+
+; CHECK-NOT: DW_TAG
+; CHECK:       NULL
+; CHECK-NOT: DW_TAG
+; CHECK:     NULL
+; CHECK-NOT: DW_TAG
+; CHECK:   NULL
+
+%struct.C = type { i8 }
+
+@b = external global i8
+
+; Function Attrs: nounwind
+define void @_Z4fun4v() #0 {
+entry:
+  %this.addr.i.i = alloca %struct.C*, align 8, !dbg !21
+  %this.addr.i = alloca %struct.C*, align 8, !dbg !22
+  %agg.tmp.ensured = alloca %struct.C, align 1
+  %cleanup.cond = alloca i1
+  %0 = load i8* @b, align 1, !dbg !24
+  %tobool = trunc i8 %0 to i1, !dbg !24
+  store i1 false, i1* %cleanup.cond
+  br i1 %tobool, label %land.rhs, label %land.end, !dbg !24
+
+land.rhs:                                         ; preds = %entry
+  store i1 true, i1* %cleanup.cond, !dbg !25
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %1 = phi i1 [ false, %entry ], [ true, %land.rhs ]
+  %cleanup.is_active = load i1* %cleanup.cond, !dbg !27
+  br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done, !dbg !27
+
+cleanup.action:                                   ; preds = %land.end
+  store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i}, metadata !29), !dbg !31
+  %this1.i = load %struct.C** %this.addr.i, !dbg !22
+  store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i.i}, metadata !32), !dbg !33
+  %this1.i.i = load %struct.C** %this.addr.i.i, !dbg !21
+  br label %cleanup.done, !dbg !22
+
+cleanup.done:                                     ; preds = %cleanup.action, %land.end
+  ret void, !dbg !34
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @_ZN1CD1Ev(%struct.C* %this) unnamed_addr #1 align 2 {
+entry:
+  %this.addr.i = alloca %struct.C*, align 8, !dbg !37
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !29), !dbg !38
+  %this1 = load %struct.C** %this.addr
+  store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i}, metadata !32), !dbg !39
+  %this1.i = load %struct.C** %this.addr.i, !dbg !37
+  ret void, !dbg !37
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !32), !dbg !40
+  %this1 = load %struct.C** %this.addr
+  ret void, !dbg !41
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !"PR20038.cpp", metadata !"/tmp/dbginfo"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"", i32 2, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [~C]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{null, metadata !10}
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!11 = metadata !{metadata !12, metadata !16, metadata !17}
+!12 = metadata !{i32 786478, metadata !5, metadata !13, metadata !"fun4", metadata !"fun4", metadata !"_Z4fun4v", i32 5, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4fun4v, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [fun4]
+!13 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/PR20038.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null}
+!16 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"_ZN1CD2Ev", i32 6, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1CD2Ev, null, metadata !7, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
+!17 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"_ZN1CD1Ev", i32 6, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1CD1Ev, null, metadata !7, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!20 = metadata !{metadata !"clang version 3.5.0 "}
+!21 = metadata !{i32 6, i32 0, metadata !17, metadata !22}
+!22 = metadata !{i32 5, i32 0, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!24 = metadata !{i32 5, i32 0, metadata !12, null}
+!25 = metadata !{i32 5, i32 0, metadata !26, null}
+!26 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!27 = metadata !{i32 5, i32 0, metadata !28, null}
+!28 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!29 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!31 = metadata !{i32 0, i32 0, metadata !17, metadata !22}
+!32 = metadata !{i32 786689, metadata !16, metadata !"this", null, i32 16777216, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!33 = metadata !{i32 0, i32 0, metadata !16, metadata !21}
+!34 = metadata !{i32 5, i32 0, metadata !35, null}
+!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 5, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!36 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!37 = metadata !{i32 6, i32 0, metadata !17, null}
+!38 = metadata !{i32 0, i32 0, metadata !17, null}
+!39 = metadata !{i32 0, i32 0, metadata !16, metadata !37}
+!40 = metadata !{i32 0, i32 0, metadata !16, null}
+!41 = metadata !{i32 6, i32 0, metadata !16, null}
diff --git a/test/DebugInfo/PowerPC/lit.local.cfg b/test/DebugInfo/PowerPC/lit.local.cfg
index 193ebeb..0913324 100644
--- a/test/DebugInfo/PowerPC/lit.local.cfg
+++ b/test/DebugInfo/PowerPC/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
diff --git a/test/DebugInfo/Sparc/lit.local.cfg b/test/DebugInfo/Sparc/lit.local.cfg
index e4cee97..d86c9e6 100644
--- a/test/DebugInfo/Sparc/lit.local.cfg
+++ b/test/DebugInfo/Sparc/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s
index 4e7afd5..6189b90 100644
--- a/test/DebugInfo/SystemZ/eh_frame.s
+++ b/test/DebugInfo/SystemZ/eh_frame.s
@@ -11,9 +11,25 @@ check_largest_class:
 	.cfi_offset %r13, -56
 	.cfi_offset %r14, -48
 	.cfi_offset %r15, -40
-	aghi	%r15, -160
-	.cfi_def_cfa_offset 320
-	lmg	%r13, %r15, 264(%r15)
+	aghi	%r15, -224
+	.cfi_def_cfa_offset 384
+	std	%f8, 160(%r15)
+	std	%f9, 168(%r15)
+	std	%f10, 176(%r15)
+	std	%f11, 184(%r15)
+	std	%f12, 192(%r15)
+	std	%f13, 200(%r15)
+	std	%f14, 208(%r15)
+	std	%f15, 216(%r15)
+	.cfi_offset %f8, -224
+	.cfi_offset %f9, -216
+	.cfi_offset %f10, -208
+	.cfi_offset %f11, -200
+	.cfi_offset %f12, -192
+	.cfi_offset %f13, -184
+	.cfi_offset %f14, -176
+	.cfi_offset %f15, -168
+	lmg	%r13, %r15, 328(%r15)
 	br	%r14
 	.size	check_largest_class, .-check_largest_class
 	.cfi_endproc
@@ -22,8 +38,8 @@ check_largest_class:
 #
 # Contents of the .eh_frame section:
 #
-# 00000000 0000001c 00000000 CIE
-#   Version:               1
+# 00000000 0000000000000014 00000000 CIE
+#   Version:               3
 #   Augmentation:          "zR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -35,20 +51,29 @@ check_largest_class:
 #   DW_CFA_nop
 #   DW_CFA_nop
 #
-# 00000020 0000001c 00000024 FDE cie=00000000 pc=00000000..00000012
-#   DW_CFA_advance_loc: 6 to 00000006
+# 000000.. 000000000000002c 0000001c FDE cie=00000000 pc=0000000000000000..0000000000000032
+#   DW_CFA_advance_loc: 6 to 0000000000000006
 #   DW_CFA_offset: r13 at cfa-56
 #   DW_CFA_offset: r14 at cfa-48
 #   DW_CFA_offset: r15 at cfa-40
-#   DW_CFA_advance_loc: 4 to 0000000a
-#   DW_CFA_def_cfa_offset: 320
-#   DW_CFA_nop
+#   DW_CFA_advance_loc: 4 to 000000000000000a
+#   DW_CFA_def_cfa_offset: 384
+#   DW_CFA_advance_loc: 32 to 000000000000002a
+#   DW_CFA_offset: r24 at cfa-224
+#   DW_CFA_offset: r28 at cfa-216
+#   DW_CFA_offset: r25 at cfa-208
+#   DW_CFA_offset: r29 at cfa-200
+#   DW_CFA_offset: r26 at cfa-192
+#   DW_CFA_offset: r30 at cfa-184
+#   DW_CFA_offset: r27 at cfa-176
+#   DW_CFA_offset: r31 at cfa-168
 #   DW_CFA_nop
 #   DW_CFA_nop
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 00000014 00000000 017a5200 01780e01  .........zR..x..
-# CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c  ................
-# CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05  .........F......
-# CHECK-NEXT: 0030 440ec002 00000000                    D.......
+# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01  {{.*}}
+# CHECK-NEXT: 0010 1b0c0fa0 01000000 0000002c 0000001c  {{.*}}
+# CHECK-NEXT: 0020 00000000 00000032 00468d07 8e068f05  {{.*}}
+# CHECK-NEXT: 0030 440e8003 60981c9c 1b991a9d 199a189e  {{.*}}
+# CHECK-NEXT: 0040 179b169f 15000000                    {{.*}}
diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.s b/test/DebugInfo/SystemZ/eh_frame_personality.s
index 46b46db..456e0a6 100644
--- a/test/DebugInfo/SystemZ/eh_frame_personality.s
+++ b/test/DebugInfo/SystemZ/eh_frame_personality.s
@@ -37,7 +37,7 @@ DW.ref.__gxx_personality_v0:
 # Contents of the .eh_frame section:
 #
 # 00000000 0000001c 00000000 CIE
-#   Version:               1
+#   Version:               3
 #   Augmentation:          "zPLR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -61,7 +61,7 @@ DW.ref.__gxx_personality_v0:
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 0000001c 00000000 017a504c 52000178  .........zPLR..x
+# CHECK-NEXT: 0000 0000001c 00000000 037a504c 52000178  .........zPLR..x
 # CHECK-NEXT: 0010 0e079b00 0000001b 1b0c0fa0 01000000  ................
 # CHECK-NEXT: 0020 0000001c 00000024 00000000 00000012  .......$........
 # CHECK-NEXT: 0030 04000000 00468e06 8f05440e c0020000  .....F....D.....
diff --git a/test/DebugInfo/SystemZ/lit.local.cfg b/test/DebugInfo/SystemZ/lit.local.cfg
index b12af09..5c02dd3 100644
--- a/test/DebugInfo/SystemZ/lit.local.cfg
+++ b/test/DebugInfo/SystemZ/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'SystemZ' in targets:
+if not 'SystemZ' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index e0e4156..23df1cb 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -14,11 +14,11 @@
 ; CHECK: brasl   %r14, populate_array@PLT
 
 ; DEBUG: DW_TAG_variable
-; DEBUG-NOT: DW_TAG
-; DEBUG: DW_AT_name {{.*}} "main_arr"
 ; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xa401 is SLEB128 encoded 164.
 ; DEBUG-NOT: DW_TAG
 ; DEBUG: DW_AT_location {{.*}}(<0x3> 91 a4 01 )
+; DEBUG-NOT: DW_TAG
+; DEBUG: DW_AT_name {{.*}} "main_arr"
 
 
 @.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 1bbfbf4..4dc747f 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -37,13 +37,19 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"}
 
 ; CHECK: DW_TAG_variable
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "GLB")
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "GLB")
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
 
 ; CHECK: DW_TAG_variable
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x{{[0-9a-f]*}}] = "LOC")
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x{{[0-9a-f]*}}] = "LOC")
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_decl_file [DW_FORM_data1]     (0x01)
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_decl_line [DW_FORM_data1]     (0x04)
 
 !21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 6c5e32c0..f31b0ad 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -31,11 +31,11 @@
 ; // The 'x' variable and its symbol reference location
 ; CHECK: .debug_info contents:
 ; CHECK:      DW_TAG_variable
+; CHECK-NEXT:   DW_AT_location [DW_FORM_sec_offset] (0x00000000)
 ; CHECK-NEXT:   DW_AT_name {{.*}} "x"
 ; CHECK-NEXT:   DW_AT_decl_file
 ; CHECK-NEXT:   DW_AT_decl_line
 ; CHECK-NEXT:   DW_AT_type
-; CHECK-NEXT:   DW_AT_location [DW_FORM_sec_offset] (0x00000000)
 
 ; Check that the location contains only 4 ranges - this verifies that the 4th
 ; and 5th ranges were successfully merged into a single range.
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 5fa9699..4b9fae8 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -7,7 +7,8 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x{{[0-9a-f]*}} => {[[PARAM:0x[0-9a-f]*]]})
 ; CHECK: [[PARAM]]:     DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]*}}] = "this")
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]*}}] = "this")
 
 %class.A = type { i32 }
 
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index b93cdf0..4f45f36 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -6,7 +6,8 @@
 ; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv"
 ; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_specification {{.*}} {[[BAR_DECL]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}} {[[BAR_DECL]]}
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
@@ -36,6 +37,6 @@ entry:
 !21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
 !22 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 6, i32 1, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
 !28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 3597b2c..989e4ff 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -19,10 +19,12 @@
 ; CHECK: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name{{.*}}"f"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"f"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name{{.*}}"g"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"g"
 
 %struct.foo = type { i32 }
 
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 31b4fa9..e842afe 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -6,13 +6,13 @@
 ; Checks that we emit debug info for the block variable declare.
 ; CHECK: DW_TAG_subprogram
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_name {{.*}} "block"
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]
+; CHECK: DW_AT_name {{.*}} "block"
 
 ; DWARF3: DW_TAG_subprogram
 ; DWARF3: DW_TAG_variable
-; DWARF3: DW_AT_name {{.*}} "block"
 ; DWARF3: DW_AT_location [DW_FORM_data4]
+; DWARF3: DW_AT_name {{.*}} "block"
 
 %struct.__block_descriptor = type { i64, i64 }
 %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
@@ -118,15 +118,16 @@ declare i32 @__objc_personality_v0(...)
 !50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
 !51 = metadata !{i32 7, i32 18, metadata !28, null}
 !52 = metadata !{i32 7, i32 19, metadata !28, null}
-!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
+!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, metadata !65} ; [ DW_TAG_auto_variable ]
 !54 = metadata !{i32 5, i32 27, metadata !28, null}
 !55 = metadata !{i32 8, i32 22, metadata !56, null}
-!56 = metadata !{i32 786443, metadata !57, i32 7, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
-!57 = metadata !{i32 786443, metadata !28, i32 7, i32 19, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!56 = metadata !{i32 786443, metadata !6, metadata !57, i32 7, i32 26, i32 2} ; [ DW_TAG_lexical_block ]
+!57 = metadata !{i32 786443, metadata !6, metadata !28, i32 7, i32 19, i32 1} ; [ DW_TAG_lexical_block ]
 !58 = metadata !{i32 10, i32 20, metadata !59, null}
-!59 = metadata !{i32 786443, metadata !60, i32 9, i32 35, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
-!60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!59 = metadata !{i32 786443, metadata !6, metadata !60, i32 9, i32 35, i32 4} ; [ DW_TAG_lexical_block ]
+!60 = metadata !{i32 786443, metadata !6, metadata !57, i32 9, i32 35, i32 3} ; [ DW_TAG_lexical_block ]
 !61 = metadata !{i32 10, i32 21, metadata !28, null}
 !62 = metadata !{i32 9, i32 20, metadata !56, null}
 !63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
 !64 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!65 = metadata !{i64 1, i64 32}
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
index 731f8db..d787ef3 100644
--- a/test/DebugInfo/X86/byvalstruct.ll
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -6,7 +6,8 @@
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name {{.*}} "info"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "info"
 ;
 ; generated from
 ;
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index a0b8ccc..a1051c3 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -6,6 +6,8 @@
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
 ; WIN32:    .section .debug$S,"rnd"
 
+; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s
+
 ; generated from:
 ; clang -g -S -emit-llvm test.c -o test.ll
 ; int main()
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 40300de..ac038f3 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -79,7 +79,7 @@ declare void @_Z8moz_freePv(i8*)
 !0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_ZN17nsAutoRefCnt7ReleaseEv , null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
 !6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
@@ -95,7 +95,7 @@ declare void @_Z8moz_freePv(i8*)
 !18 = metadata !{}
 !20 = metadata !{metadata !22}
 !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
+!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_ZN17nsAutoRefCntD1Ev, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
 !24 = metadata !{metadata !26}
 !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index f2f51c9..bf7ee08 100644
--- a/test/DebugInfo/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -1,12 +1,14 @@
-; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj %s -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj < %s \
+; RUN:    | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
 ; Radar 9511391
 
 ; CHECK: DW_TAG_variable
-; CHECK: "i"
-; CHECK: DW_AT_const_value [DW_FORM_sdata]   (42)
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_const_value [DW_FORM_sdata]   (42)
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "i"
 
 define i32 @foo() nounwind uwtable readnone optsize ssp {
 entry:
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index baba0cd..23fa352 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -20,9 +20,10 @@
 ;
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_variable
-; CHECK-NEXT: DW_AT_name{{.*}}"i"
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_location [DW_FORM_data4]	([[LOC:.*]])
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"i"
 ; CHECK: .debug_loc contents:
 ; CHECK: [[LOC]]:
 ;        consts 0x00000003
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 1922272..4d18f7d 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -7,19 +7,22 @@
 
 ; CHECK: DW_TAG_subprogram
 ; CHECK:   DW_AT_abstract_origin {{.*}}{[[ABS:.*]]}
-; FIXME: An out of line definition preceeding an inline usage doesn't properly
-; reference abstract variables.
 ; CHECK:   DW_TAG_formal_parameter
-; CHECK-NEXT:     DW_AT_name {{.*}} "sp"
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}}{[[ABS_SP:.*]]}
 ; CHECK:   DW_TAG_formal_parameter
-; CHECK-NEXT:     DW_AT_name {{.*}} "nums"
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}}{[[ABS_NUMS:.*]]}
 
 ; CHECK: [[ABS]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "foo"
-; CHECK: [[ABS_SP:.*]]:   DW_TAG_formal_parameter
-; CHECK-NEXT:     DW_AT_name {{.*}} "sp"
-; CHECK: [[ABS_NUMS:.*]]:  DW_TAG_formal_parameter
-; CHECK-NEXT:     DW_AT_name {{.*}} "nums"
+; CHECK: [[ABS_SP]]:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "sp"
+; CHECK: [[ABS_NUMS]]:  DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "nums"
 
 ;CHECK: DW_TAG_inlined_subroutine
 ;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS]]}
@@ -30,9 +33,10 @@
 
 ;CHECK: DW_TAG_formal_parameter
 ;FIXME: Linux shouldn't drop this parameter either...
-;DARWIN-NEXT:   DW_AT_abstract_origin {{.*}}{[[ABS_SP]]}
+;CHECK-NOT: DW_TAG
+;DARWIN:   DW_AT_abstract_origin {{.*}}{[[ABS_SP]]}
 ;DARWIN: DW_TAG_formal_parameter
-;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS_NUMS]]}
+;CHECK: DW_AT_abstract_origin {{.*}}{[[ABS_NUMS]]}
 ;CHECK-NOT: DW_TAG_formal_parameter
 
 %struct.S1 = type { float*, i32 }
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index f899f48..155f76f 100644
--- a/test/DebugInfo/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -92,7 +92,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 32, metadata !0, null}
 !10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 2, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 5, i32 24, metadata !11, null}
 !13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !14 = metadata !{i32 6, i32 25, metadata !11, null}
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index 9184217..55d1ae6 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -6,11 +6,11 @@ target triple = "x86_64-apple-darwin10.0.0"
 ; rdar://8950491
 
 ;CHECK: DW_TAG_formal_parameter
+;CHECK-NEXT: DW_AT_location
 ;CHECK-NEXT: DW_AT_name {{.*}} "var"
 ;CHECK-NEXT: DW_AT_decl_file
 ;CHECK-NEXT: DW_AT_decl_line
 ;CHECK-NEXT: DW_AT_type
-;CHECK-NEXT: DW_AT_location
 
 @dfm = external global i32, align 4
 
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index f08f281..974e0ad 100644
--- a/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -11,84 +11,84 @@
 
 define hidden fastcc %a* @test() #1 {
 entry:
-  %0 = icmp eq %a* undef, null, !dbg !1
-  br i1 %0, label %"14", label %return, !dbg !1
+  %0 = icmp eq %a* undef, null, !dbg !12
+  br i1 %0, label %"14", label %return, !dbg !12
 
 "14":                                             ; preds = %"8"
-  br i1 undef, label %"25", label %"21", !dbg !1
+  br i1 undef, label %"25", label %"21", !dbg !12
 
 "21":                                             ; preds = %"14"
-  br i1 undef, label %may_unswitch_on.exit, label %"6.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"6.i", !dbg !12
 
 "6.i":                                            ; preds = %"21"
-  br i1 undef, label %"10.i", label %may_unswitch_on.exit, !dbg !1
+  br i1 undef, label %"10.i", label %may_unswitch_on.exit, !dbg !12
 
 "10.i":                                           ; preds = %"6.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"12.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"12.i", !dbg !12
 
 "12.i":                                           ; preds = %"10.i"
-  br i1 undef, label %"4.i.i", label %"3.i.i", !dbg !1
+  br i1 undef, label %"4.i.i", label %"3.i.i", !dbg !12
 
 "3.i.i":                                          ; preds = %"12.i"
-  br i1 undef, label %"4.i.i", label %VEC_edge_base_index.exit.i, !dbg !1
+  br i1 undef, label %"4.i.i", label %VEC_edge_base_index.exit.i, !dbg !12
 
 "4.i.i":                                          ; preds = %"3.i.i", %"12.i"
-  unreachable, !dbg !1
+  unreachable, !dbg !12
 
 VEC_edge_base_index.exit.i:                       ; preds = %"3.i.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"16.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"16.i", !dbg !12
 
 "16.i":                                           ; preds = %VEC_edge_base_index.exit.i
-  br i1 undef, label %"4.i6.i", label %"3.i5.i", !dbg !1
+  br i1 undef, label %"4.i6.i", label %"3.i5.i", !dbg !12
 
 "3.i5.i":                                         ; preds = %"16.i"
-  br i1 undef, label %VEC_edge_base_index.exit7.i, label %"4.i6.i", !dbg !1
+  br i1 undef, label %VEC_edge_base_index.exit7.i, label %"4.i6.i", !dbg !12
 
 "4.i6.i":                                         ; preds = %"3.i5.i", %"16.i"
-  unreachable, !dbg !1
+  unreachable, !dbg !12
 
 VEC_edge_base_index.exit7.i:                      ; preds = %"3.i5.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"21.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"21.i", !dbg !12
 
 "21.i":                                           ; preds = %VEC_edge_base_index.exit7.i
-  br i1 undef, label %may_unswitch_on.exit, label %"23.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"23.i", !dbg !12
 
 "23.i":                                           ; preds = %"21.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"26.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"26.i", !dbg !12
 
 "26.i":                                           ; preds = %"34.i", %"23.i"
-  %1 = icmp eq i32 undef, 9, !dbg !1
-  br i1 %1, label %"34.i", label %"28.i", !dbg !1
+  %1 = icmp eq i32 undef, 9, !dbg !12
+  br i1 %1, label %"34.i", label %"28.i", !dbg !12
 
 "28.i":                                           ; preds = %"26.i"
   unreachable
 
 "34.i":                                           ; preds = %"26.i"
-  br i1 undef, label %"26.i", label %"36.i", !dbg !1
+  br i1 undef, label %"26.i", label %"36.i", !dbg !12
 
 "36.i":                                           ; preds = %"34.i"
-  br i1 undef, label %"37.i", label %"38.i", !dbg !1
+  br i1 undef, label %"37.i", label %"38.i", !dbg !12
 
 "37.i":                                           ; preds = %"36.i"
-  br label %"38.i", !dbg !1
+  br label %"38.i", !dbg !12
 
 "38.i":                                           ; preds = %"37.i", %"36.i"
-  br i1 undef, label %"39.i", label %"45.i", !dbg !1
+  br i1 undef, label %"39.i", label %"45.i", !dbg !12
 
 "39.i":                                           ; preds = %"38.i"
-  br i1 undef, label %"41.i", label %may_unswitch_on.exit, !dbg !1
+  br i1 undef, label %"41.i", label %may_unswitch_on.exit, !dbg !12
 
 "41.i":                                           ; preds = %"39.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"42.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"42.i", !dbg !12
 
 "42.i":                                           ; preds = %"41.i"
-  br i1 undef, label %may_unswitch_on.exit, label %"44.i", !dbg !1
+  br i1 undef, label %may_unswitch_on.exit, label %"44.i", !dbg !12
 
 "44.i":                                           ; preds = %"42.i"
-  %2 = load %a** undef, align 8, !dbg !1
-  %3 = bitcast %a* %2 to %a*, !dbg !1
+  %2 = load %a** undef, align 8, !dbg !12
+  %3 = bitcast %a* %2 to %a*, !dbg !12
   call void @llvm.dbg.value(metadata !{%a* %3}, i64 0, metadata !6), !dbg !12
-  br label %may_unswitch_on.exit, !dbg !1
+  br label %may_unswitch_on.exit, !dbg !12
 
 "45.i":                                           ; preds = %"38.i"
   unreachable
@@ -102,7 +102,7 @@ may_unswitch_on.exit:                             ; preds = %"44.i", %"42.i", %"
 
 "return":
   %result = phi %a* [ null, %entry ], [ %4, %may_unswitch_on.exit ]
-  ret %a* %result, !dbg !1
+  ret %a* %result, !dbg !12
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 28b7dc6..db947ac 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -170,8 +170,9 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !20 = metadata !{i32 786468}
 !21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !22 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!23 = metadata !{i32 786689, metadata !4, metadata !"", metadata !5, i32 16777222, metadata !21, i32 0, i32 0, i64 2} ; [ DW_TAG_arg_variable ] [line 6]
+!23 = metadata !{i32 786689, metadata !4, metadata !"", metadata !5, i32 16777222, metadata !21, i32 0, i32 0, metadata !28} ; [ DW_TAG_arg_variable ] [line 6]
 !24 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 7, metadata !8, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 7]
 !25 = metadata !{i32 7, i32 0, metadata !4, null}
 !26 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
 !27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!28 = metadata !{i64 2}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index 87e8f03..95eda60 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -7,17 +7,19 @@
 ; This test is split into two parts, the frontend part can be found at
 ; llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m
 ;
-; CHECK:      {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
-; CHECK:      DW_TAG_variable
-; CHECK:      {{.*}}DW_AT_name{{.*}}"self"{{.*}}
+; CHECK: {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
+; CHECK: DW_TAG_variable
 ; CHECK-NOT:  DW_TAG
-; CHECK:      DW_AT_location
+; CHECK:   DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name{{.*}}"self"{{.*}}
 ;
-; CHECK:      {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
-; CHECK:      DW_TAG_variable
-; CHECK:      {{.*}}DW_AT_name{{.*}}"self"{{.*}}
+; CHECK: {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
+; CHECK: DW_TAG_variable
 ; CHECK-NOT:  DW_TAG
-; CHECK:      DW_AT_location
+; CHECK:   DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name{{.*}}"self"{{.*}}
 ;
 ; Generated (and then reduced) from
 ; ----------------------------------------------------------------------
@@ -99,10 +101,12 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 !41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
 !84 = metadata !{i32 33, i32 0, metadata !38, null}
-!86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 41]
+!86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, metadata !110} ; [ DW_TAG_auto_variable ] [self] [line 41]
 !87 = metadata !{i32 41, i32 0, metadata !38, null}
 !103 = metadata !{i32 35, i32 0, metadata !42, null}
-!105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 40]
+!105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, metadata !109} ; [ DW_TAG_auto_variable ] [self] [line 40]
 !106 = metadata !{i32 40, i32 0, metadata !42, null}
 !107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""}
 !108 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!109 = metadata !{i64 1, i64 32}
+!110 = metadata !{i64 1, i64 32}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index 430c157..8a1a125 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -20,22 +20,23 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}.block_descriptor
-; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}.block_descriptor
 
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_variable
-; CHECK-NEXT: DW_AT_name{{.*}}"self"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_type{{.*}}{[[APTR:.*]]}
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_artificial
 ; CHECK-NOT: DW_TAG
 ; 0x06 = DW_OP_deref
 ; 0x23 = DW_OP_uconst
 ; 0x91 = DW_OP_fbreg
 ; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"self"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_type{{.*}}{[[APTR:.*]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_artificial
 
 ; CHECK: [[APTR]]:   DW_TAG_pointer_type
 ; CHECK-NEXT: {[[A]]}
@@ -358,7 +359,7 @@ attributes #3 = { nounwind }
 !86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [decl] [from ]
 !87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
 !88 = metadata !{i32 49, i32 0, metadata !27, null}
-!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52]
+!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, metadata !111} ; [ DW_TAG_auto_variable ] [self] [line 52]
 !90 = metadata !{i32 52, i32 0, metadata !27, null}
 !91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
 !92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
@@ -380,3 +381,4 @@ attributes #3 = { nounwind }
 !108 = metadata !{i32 61, i32 0, metadata !36, null}
 !109 = metadata !{i32 62, i32 0, metadata !36, null}
 !110 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!111 = metadata !{i64 2, i64 1, i64 32}
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
new file mode 100644
index 0000000..b1980ec
--- /dev/null
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -0,0 +1,186 @@
+; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify that we have correct debug info for local variables in code
+; instrumented with AddressSanitizer.
+
+; Generated from the source file test.cc:
+; int bar(int y) {
+;   return y + 2;
+; }
+; with "clang++ -S -emit-llvm -fsanitize=address -O0 -g test.cc"
+
+; First, argument variable "y" resides in %rdi:
+; CHECK: DEBUG_VALUE: bar:y <- RDI
+
+; Then its address is stored in a location on a stack:
+; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp)
+; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
+; CHECK-NEXT: DEBUG_VALUE: bar:y <- [RSP+[[OFFSET]]]
+; This location should be valid until the end of the function.
+
+; CHECK: .Ldebug_loc{{[0-9]+}}:
+; We expect two location ranges for the variable.
+
+; First, it is stored in %rdx:
+; CHECK:      .Lset{{[0-9]+}} = .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK-NEXT: .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK: DW_OP_reg5
+
+; Then it's addressed via %rsp:
+; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK-NEXT: .Lset{{[0-9]+}} = .Lfunc_end0-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK: DW_OP_breg7
+; CHECK-NEXT: [[OFFSET]]
+; CHECK: DW_OP_deref
+
+; ModuleID = 'test.cc'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
+@__asan_option_detect_stack_use_after_return = external global i32
+@__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 6 y.addr\00", align 1
+
+; Function Attrs: nounwind sanitize_address uwtable
+define i32 @_Z3bari(i32 %y) #0 {
+entry:
+  %MyAlloca = alloca [64 x i8], align 32
+  %0 = ptrtoint [64 x i8]* %MyAlloca to i64
+  %1 = load i32* @__asan_option_detect_stack_use_after_return
+  %2 = icmp ne i32 %1, 0
+  br i1 %2, label %3, label %5
+
+; <label>:3                                       ; preds = %entry
+  %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0)
+  br label %5
+
+; <label>:5                                       ; preds = %entry, %3
+  %6 = phi i64 [ %0, %entry ], [ %4, %3 ]
+  %7 = add i64 %6, 32
+  %8 = inttoptr i64 %7 to i32*
+  %9 = inttoptr i64 %6 to i64*
+  store i64 1102416563, i64* %9
+  %10 = add i64 %6, 8
+  %11 = inttoptr i64 %10 to i64*
+  store i64 ptrtoint ([16 x i8]* @__asan_gen_ to i64), i64* %11
+  %12 = add i64 %6, 16
+  %13 = inttoptr i64 %12 to i64*
+  store i64 ptrtoint (i32 (i32)* @_Z3bari to i64), i64* %13
+  %14 = lshr i64 %6, 3
+  %15 = add i64 %14, 2147450880
+  %16 = add i64 %15, 0
+  %17 = inttoptr i64 %16 to i64*
+  store i64 -868083100587789839, i64* %17
+  %18 = ptrtoint i32* %8 to i64
+  %19 = lshr i64 %18, 3
+  %20 = add i64 %19, 2147450880
+  %21 = inttoptr i64 %20 to i8*
+  %22 = load i8* %21
+  %23 = icmp ne i8 %22, 0
+  call void @llvm.dbg.declare(metadata !{i32* %8}, metadata !12)
+  br i1 %23, label %24, label %30
+
+; <label>:24                                      ; preds = %5
+  %25 = and i64 %18, 7
+  %26 = add i64 %25, 3
+  %27 = trunc i64 %26 to i8
+  %28 = icmp sge i8 %27, %22
+  br i1 %28, label %29, label %30
+
+; <label>:29                                      ; preds = %24
+  call void @__asan_report_store4(i64 %18)
+  call void asm sideeffect "", ""()
+  unreachable
+
+; <label>:30                                      ; preds = %24, %5
+  store i32 %y, i32* %8, align 4
+  %31 = ptrtoint i32* %8 to i64, !dbg !13
+  %32 = lshr i64 %31, 3, !dbg !13
+  %33 = add i64 %32, 2147450880, !dbg !13
+  %34 = inttoptr i64 %33 to i8*, !dbg !13
+  %35 = load i8* %34, !dbg !13
+  %36 = icmp ne i8 %35, 0, !dbg !13
+  br i1 %36, label %37, label %43, !dbg !13
+
+; <label>:37                                      ; preds = %30
+  %38 = and i64 %31, 7, !dbg !13
+  %39 = add i64 %38, 3, !dbg !13
+  %40 = trunc i64 %39 to i8, !dbg !13
+  %41 = icmp sge i8 %40, %35, !dbg !13
+  br i1 %41, label %42, label %43
+
+; <label>:42                                      ; preds = %37
+  call void @__asan_report_load4(i64 %31), !dbg !13
+  call void asm sideeffect "", ""()
+  unreachable
+
+; <label>:43                                      ; preds = %37, %30
+  %44 = load i32* %8, align 4, !dbg !13
+  %add = add nsw i32 %44, 2, !dbg !13
+  store i64 1172321806, i64* %9, !dbg !13
+  %45 = icmp ne i64 %6, %0, !dbg !13
+  br i1 %45, label %46, label %53, !dbg !13
+
+; <label>:46                                      ; preds = %43
+  %47 = add i64 %15, 0, !dbg !13
+  %48 = inttoptr i64 %47 to i64*, !dbg !13
+  store i64 -723401728380766731, i64* %48, !dbg !13
+  %49 = add i64 %6, 56, !dbg !13
+  %50 = inttoptr i64 %49 to i64*, !dbg !13
+  %51 = load i64* %50, !dbg !13
+  %52 = inttoptr i64 %51 to i8*, !dbg !13
+  store i8 0, i8* %52, !dbg !13
+  br label %56, !dbg !13
+
+; <label>:53                                      ; preds = %43
+  %54 = add i64 %15, 0, !dbg !13
+  %55 = inttoptr i64 %54 to i64*, !dbg !13
+  store i64 0, i64* %55, !dbg !13
+  br label %56, !dbg !13
+
+; <label>:56                                      ; preds = %53, %46
+  ret i32 %add, !dbg !13
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+define internal void @asan.module_ctor() {
+  call void @__asan_init_v3()
+  ret void
+}
+
+declare void @__asan_init_v3()
+
+declare void @__asan_report_load4(i64)
+
+declare void @__asan_report_store4(i64)
+
+declare i64 @__asan_stack_malloc_0(i64, i64)
+
+attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (209308)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/test.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"test.cc", metadata !"/llvm_cmake_gcc"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3bari", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3bari, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/test.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0 (209308)"}
+!12 = metadata !{i32 786689, metadata !4, metadata !"y", metadata !5, i32 16777217, metadata !8, i32 0, i32 0, metadata !14} ; [ DW_TAG_arg_variable ] [y] [line 1]
+!13 = metadata !{i32 2, i32 0, metadata !4, null}
+!14 = metadata !{i64 2}
diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll
index 3f4d39d..7866d0e 100644
--- a/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/test/DebugInfo/X86/debug-loc-offset.ll
@@ -3,20 +3,23 @@
 
 ; From the code:
 
-; bar.cpp
+; debug-loc-offset1.cc
 ; int bar (int b) {
 ;   return b+4;
 ; }
 
-; foo.cpp
+; debug-loc-offset2.cc
 ; struct A {
-;   int a;
-;   int b;
-;   int c;
+;   int var;
+;   virtual char foo();
 ; };
 
-; int a (struct A var) {
-;   return var.a;
+; void baz(struct A a) {
+;   int z = 2;
+;   if (a.var > 2)
+;     z++;
+;   if (a.foo() == 'a')
+;     z++;
 ; }
 
 ; Compiled separately for i386-pc-linux-gnu and linked together.
@@ -38,78 +41,113 @@
 
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z1a1A"
+; CHECK: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z3baz1A"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"var"
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]   (0x00000000)
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name [DW_FORM_strp]{{.*}}"a"
+
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location [DW_FORM_exprloc]
 ; CHECK-NOT: DW_AT_location
 
 ; CHECK: .debug_loc contents:
 ; CHECK: 0x00000000: Beginning address offset: 0x0000000000000000
-; CHECK:                Ending address offset: 0x0000000000000009
+; CHECK:                Ending address offset: 0x000000000000001a
 
-
-%struct.A = type { i32, i32, i32 }
+%struct.A = type { i32 (...)**, i32 }
 
 ; Function Attrs: nounwind
 define i32 @_Z3bari(i32 %b) #0 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !25), !dbg !26
-  %0 = load i32* %b.addr, align 4, !dbg !27
-  %add = add nsw i32 %0, 4, !dbg !27
-  ret i32 %add, !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !21), !dbg !22
+  %0 = load i32* %b.addr, align 4, !dbg !23
+  %add = add nsw i32 %0, 4, !dbg !23
+  ret i32 %add, !dbg !23
 }
 
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata) #1
 
-; Function Attrs: nounwind
-define i32 @_Z1a1A(%struct.A* byval align 4 %var) #0 {
+define void @_Z3baz1A(%struct.A* %a) #2 {
 entry:
-  call void @llvm.dbg.declare(metadata !{%struct.A* %var}, metadata !28), !dbg !29
-  %a = getelementptr inbounds %struct.A* %var, i32 0, i32 0, !dbg !30
-  %0 = load i32* %a, align 4, !dbg !30
-  ret i32 %0, !dbg !30
+  %z = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata !{%struct.A* %a}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata !{i32* %z}, metadata !26), !dbg !27
+  store i32 2, i32* %z, align 4, !dbg !27
+  %var = getelementptr inbounds %struct.A* %a, i32 0, i32 1, !dbg !28
+  %0 = load i32* %var, align 4, !dbg !28
+  %cmp = icmp sgt i32 %0, 2, !dbg !28
+  br i1 %cmp, label %if.then, label %if.end, !dbg !28
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* %z, align 4, !dbg !30
+  %inc = add nsw i32 %1, 1, !dbg !30
+  store i32 %inc, i32* %z, align 4, !dbg !30
+  br label %if.end, !dbg !30
+
+if.end:                                           ; preds = %if.then, %entry
+  %call = call signext i8 @_ZN1A3fooEv(%struct.A* %a), !dbg !31
+  %conv = sext i8 %call to i32, !dbg !31
+  %cmp1 = icmp eq i32 %conv, 97, !dbg !31
+  br i1 %cmp1, label %if.then2, label %if.end4, !dbg !31
+
+if.then2:                                         ; preds = %if.end
+  %2 = load i32* %z, align 4, !dbg !33
+  %inc3 = add nsw i32 %2, 1, !dbg !33
+  store i32 %inc3, i32* %z, align 4, !dbg !33
+  br label %if.end4, !dbg !33
+
+if.end4:                                          ; preds = %if.then2, %if.end
+  ret void, !dbg !34
 }
 
+declare signext i8 @_ZN1A3fooEv(%struct.A*) #2
+
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0, !9}
-!llvm.module.flags = !{!22, !23}
-!llvm.ident = !{!24, !24}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20, !20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 204264) (llvm/trunk 204286)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1}
-!1 = metadata !{metadata !"bar.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (210479)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset1.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"debug-loc-offset1.cc", metadata !"/llvm_cmake_gcc"}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3bari", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3bari, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/bar.cpp]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset1.cc]
 !6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 (trunk 204264) (llvm/trunk 204286)", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !17, metadata !2, metadata !2, metadata !"", i32 1}
-!10 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 (210479)", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !13, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset2.cc] [DW_LANG_C_plus_plus]
+!10 = metadata !{metadata !"debug-loc-offset2.cc", metadata !"/llvm_cmake_gcc"}
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786451, metadata !10, null, metadata !"A", i32 1, i64 96, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 96, align 32, offset 0] [def] [from ]
-!13 = metadata !{metadata !14, metadata !15, metadata !16}
-!14 = metadata !{i32 786445, metadata !10, metadata !"_ZTS1A", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!15 = metadata !{i32 786445, metadata !10, metadata !"_ZTS1A", metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !8} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from int]
-!16 = metadata !{i32 786445, metadata !10, metadata !"_ZTS1A", metadata !"c", i32 4, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] [c] [line 4, size 32, align 32, offset 64] [from int]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786478, metadata !10, metadata !19, metadata !"a", metadata !"a", metadata !"_Z1a1A", i32 7, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z1a1A, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [a]
-!19 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cpp]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !8, metadata !12}
-!22 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!24 = metadata !{metadata !"clang version 3.5.0 (trunk 204264) (llvm/trunk 204286)"}
-!25 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
-!26 = metadata !{i32 1, i32 0, metadata !4, null}
-!27 = metadata !{i32 2, i32 0, metadata !4, null}
-!28 = metadata !{i32 786689, metadata !18, metadata !"var", metadata !19, i32 16777223, metadata !"_ZTS1A", i32 0, i32 0}
-!29 = metadata !{i32 7, i32 0, metadata !18, null}
-!30 = metadata !{i32 8, i32 0, metadata !18, null} ; [ DW_TAG_imported_declaration ]
+!12 = metadata !{i32 786451, metadata !10, null, metadata !"A", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786478, metadata !10, metadata !15, metadata !"baz", metadata !"baz", metadata !"_Z3baz1A", i32 6, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_Z3baz1A, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [baz]
+!15 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{null, metadata !12}
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!20 = metadata !{metadata !"clang version 3.5.0 (210479)"}
+!21 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!22 = metadata !{i32 1, i32 0, metadata !4, null}
+!23 = metadata !{i32 2, i32 0, metadata !4, null}
+!24 = metadata !{i32 786689, metadata !14, metadata !"a", metadata !15, i32 16777222, metadata !"_ZTS1A", i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 6]
+!25 = metadata !{i32 6, i32 0, metadata !14, null}
+!26 = metadata !{i32 786688, metadata !14, metadata !"z", metadata !15, i32 7, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [z] [line 7]
+!27 = metadata !{i32 7, i32 0, metadata !14, null}
+!28 = metadata !{i32 8, i32 0, metadata !29, null} ; [ DW_TAG_imported_declaration ]
+!29 = metadata !{i32 786443, metadata !10, metadata !14, i32 8, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!30 = metadata !{i32 9, i32 0, metadata !29, null}
+!31 = metadata !{i32 10, i32 0, metadata !32, null}
+!32 = metadata !{i32 786443, metadata !10, metadata !14, i32 10, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!33 = metadata !{i32 11, i32 0, metadata !32, null}
+!34 = metadata !{i32 12, i32 0, metadata !14, null}
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index d870ccb..793971a 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -43,12 +43,14 @@
 ; LINUX: debug_pubnames
 
 ; Check for each name in the output.
-; LINUX: global_namespace_variable
-; LINUX: global_namespace_function
-; LINUX: static_member_function
-; LINUX: global_variable
-; LINUX: global_function
-; LINUX: member_function
+; LINUX-DAG: "ns"
+; LINUX-DAG: "C::static_member_function"
+; LINUX-DAG: "global_variable"
+; LINUX-DAG: "ns::global_namespace_variable"
+; LINUX-DAG: "ns::global_namespace_function"
+; LINUX-DAG: "global_function"
+; LINUX-DAG: "C::static_member_variable"
+; LINUX-DAG: "C::member_function"
 
 %struct.C = type { i8 }
 
@@ -112,7 +114,7 @@ attributes #1 = { nounwind readnone }
 !18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
 !19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
 !20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
-!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!21 = metadata !{i32 786489, metadata !4, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
 !22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25, metadata !26, metadata !27}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 176c2af..36fd232 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -96,14 +96,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !35 = metadata !{i32 786689, metadata !31, metadata !"d", metadata !6, i32 33554451, metadata !23, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 19]
 !36 = metadata !{i32 12, i32 0, metadata !5, null}
 !37 = metadata !{i32 13, i32 0, metadata !38, null}
-!38 = metadata !{i32 786443, metadata !5, i32 12, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
+!38 = metadata !{i32 786443, metadata !6, metadata !5, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
 !42 = metadata !{i32 14, i32 0, metadata !38, null}
 !43 = metadata !{i32 15, i32 0, metadata !38, null}
 !44 = metadata !{i32 16, i32 0, metadata !38, null}
 !45 = metadata !{i32 17, i32 0, metadata !38, null}
 !46 = metadata !{i32 19, i32 0, metadata !31, null}
 !47 = metadata !{i32 20, i32 0, metadata !48, null}
-!48 = metadata !{i32 786443, metadata !31, i32 19, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
+!48 = metadata !{i32 786443, metadata !6, metadata !31, i32 19, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
 !49 = metadata !{i32 21, i32 0, metadata !48, null}
 !50 = metadata !{i32 22, i32 0, metadata !48, null}
 !51 = metadata !{i32 23, i32 0, metadata !48, null}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index 974bd73..f5c37df 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -28,6 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; An empty array should not have an AT_upper_bound attribute. But an array of 1
 ; should.
 
+; CHECK:      DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+
 ; int foo::b[1]:
 ; CHECK: DW_TAG_structure_type
 ; CHECK: DW_AT_name{{.*}}"foo"
@@ -36,11 +41,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 
-; CHECK:      DW_TAG_base_type
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
-; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
-
 ; int[1]:
 ; CHECK:      DW_TAG_array_type [{{.*}}] *
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index 057039c..1358375 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -44,6 +44,13 @@
 ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
 ; HDR-NOT: .rela.{{.*}}.dwo
 
+; Make sure we have enough stuff in the debug_addr to cover the address indexes
+; (6 is the last index in debug_loc.dwo, making 7 entries of 8 bytes each, 7 * 8
+; == 56 base 10 == 38 base 16)
+
+; HDR: .debug_addr 00000038
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; From the code:
 
 ; extern int c;
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
index 3445f46..2fdab7a 100644
--- a/test/DebugInfo/X86/formal_parameter.ll
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -19,7 +19,8 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; rdar://problem/14874886
 ;
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name {{.*}}map
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}map
 ; CHECK-NOT: DW_AT_name {{.*}}map
 
 ; Function Attrs: nounwind ssp uwtable
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index e713f14..2256b3e 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -156,10 +156,10 @@
 ; Don't emit pubtype entries for type DIEs in the compile unit that just indirect to a type unit.
 ; CHECK-NEXT: unit_size = [[CU_SIZE]]
 ; CHECK-NEXT: Offset Name
-; CHECK-NEXT: [[BAR]] "bar"
-; CHECK-NEXT: [[WOMBAT]] "wombat"
-; CHECK-NEXT: [[FLUFFY]] "echidna::capybara::mongoose::fluffy"
-; CHECK-NEXT: [[WALRUS]] "walrus"
+; CHECK-DAG: [[BAR]] "bar"
+; CHECK-DAG: [[WALRUS]] "(anonymous namespace)::walrus"
+; CHECK-DAG: [[WOMBAT]] "wombat"
+; CHECK-DAG: [[FLUFFY]] "echidna::capybara::mongoose::fluffy"
 
 %struct.bar = type { i8 }
 %"class.echidna::capybara::mongoose::fluffy" = type { i32, i32 }
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index 4e35dbe..96fa52b 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections < %s | FileCheck -check-prefix=ASM %s
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
-; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj -dwarf-version=3 < %s | llvm-dwarfdump - | FileCheck %s -check-prefix=DWARF3
 ; ModuleID = 'dwarf-public-names.cpp'
 ;
 ; Generated from:
@@ -46,73 +45,135 @@
 ; ASM-NEXT: .asciz  "C"                     # External Name
 
 ; CHECK: .debug_info contents:
-; CHECK: Compile Unit: length = [[UNIT_SIZE:[0-9a-f]+]]
+; CHECK: Compile Unit:
 ; CHECK: DW_AT_GNU_pubnames [DW_FORM_flag_present]   (true)
 ; CHECK-NOT: DW_AT_GNU_pubtypes [
 
-; CHECK: [[C:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK: [[C:0x[0-9a-f]+]]: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "C"
 
-; CHECK: [[STATIC_MEM_DECL:[0-9a-f]+]]: DW_TAG_member
+; CHECK: [[STATIC_MEM_DECL:0x[0-9a-f]+]]: DW_TAG_member
 ; CHECK-NEXT: DW_AT_name {{.*}} "static_member_variable"
 
-; CHECK: [[MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: [[MEM_FUNC_DECL:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name
 ; CHECK-NEXT: DW_AT_name {{.*}} "member_function"
 
-; CHECK: [[STATIC_MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: [[STATIC_MEM_FUNC_DECL:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name
 ; CHECK-NEXT: DW_AT_name {{.*}} "static_member_function"
 
-; CHECK: [[INT:[0-9a-f]+]]: DW_TAG_base_type
+; CHECK: [[INT:0x[0-9a-f]+]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "int"
 
-; CHECK: [[STATIC_MEM_VAR:[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_DECL]]
+; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}} {[[STATIC_MEM_DECL]]}
 
-; CHECK: [[GLOB_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "global_variable"
 
-; CHECK: [[NS:[0-9a-f]+]]: DW_TAG_namespace
+; CHECK: [[NS:0x[0-9a-f]+]]: DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name {{.*}} "ns"
 
-; CHECK: [[GLOB_NS_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK: [[GLOB_NS_VAR_DECL:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_variable"
 
-; CHECK: [[D_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK: [[D_VAR_DECL:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "d"
 
-; CHECK: [[D:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK: [[D:0x[0-9a-f]+]]: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "D"
 
-; CHECK: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: [[GLOB_NS_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_MIPS_linkage_name
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name {{.*}} "global_namespace_function"
 
-; CHECK: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
+; CHECK: [[GLOB_NS_VAR:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}} {[[GLOB_NS_VAR_DECL]]}
 
-; CHECK: [[D_VAR:[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
+; CHECK: [[D_VAR:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}} {[[D_VAR_DECL]]}
 
-; CHECK: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
+; CHECK:   DW_AT_name {{.*}} "f3"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[F3_Z:.*]]:   DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "z"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_AT_location
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+
+; CHECK: [[OUTER:.*]]: DW_TAG_namespace
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "outer"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[OUTER_ANON:.*]]:  DW_TAG_namespace
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK-NOT:     DW_AT_name
+; CHECK: [[OUTER_ANON_C_DECL:.*]]:     DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "c"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+; FIXME: We probably shouldn't bother describing the implicit
+; import of the preceding anonymous namespace. This should be fixed
+; in clang.
+; CHECK:     DW_TAG_imported_module
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[OUTER_ANON_C:.*]]: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK-NEXT:   DW_AT_specification {{.*}} {[[OUTER_ANON_C_DECL]]}
+
+; CHECK: [[ANON:.*]]: DW_TAG_namespace
+; CHECK-NOT:   DW_AT_name
+; CHECK: [[ANON_INNER:.*]]:  DW_TAG_namespace
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "inner"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ANON_INNER_B_DECL:.*]]:     DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ANON_I_DECL:.*]]:   DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "i"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ANON_INNER_B:.*]]: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK-NEXT:   DW_AT_specification {{.*}} {[[ANON_INNER_B_DECL]]}
+; CHECK: [[ANON_I:.*]]: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK-NEXT:   DW_AT_specification {{.*}} {[[ANON_I_DECL]]}
+
+; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}} {[[MEM_FUNC_DECL]]}
 
-; CHECK: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
+; CHECK: DW_AT_specification {{.*}} {[[STATIC_MEM_FUNC_DECL]]}
 
-; CHECK: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_MIPS_linkage_name
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name {{.*}} "global_function"
 
 ; CHECK-LABEL: .debug_gnu_pubnames contents:
-; CHECK-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]]
+; CHECK-NEXT: length = {{.*}} version = 0x0002 unit_offset = 0x00000000 unit_size = {{.*}}
 ; CHECK-NEXT: Offset     Linkage  Kind     Name
 ; CHECK-DAG:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
 ; CHECK-DAG:  [[NS]] EXTERNAL TYPE     "ns"
@@ -123,6 +184,20 @@
 ; CHECK-DAG:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
 ; CHECK-DAG:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
 ; CHECK-DAG:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
+; CHECK-DAG:  [[ANON]] EXTERNAL TYPE "(anonymous namespace)"
+; CHECK-DAG:  [[ANON_INNER]] EXTERNAL TYPE "(anonymous namespace)::inner"
+; CHECK-DAG:  [[OUTER]] EXTERNAL TYPE "outer"
+; CHECK-DAG:  [[OUTER_ANON]] EXTERNAL TYPE "outer::(anonymous namespace)"
+; CHECK-DAG:  [[ANON_I]] STATIC VARIABLE "(anonymous namespace)::i"
+; CHECK-DAG:  [[ANON_INNER_B]] STATIC VARIABLE "(anonymous namespace)::inner::b"
+; CHECK-DAG:  [[OUTER_ANON_C]] STATIC VARIABLE "outer::(anonymous namespace)::c"
+
+; GCC Doesn't put local statics in pubnames, but it seems not unreasonable and
+; comes out naturally from LLVM's implementation, so I'm OK with it for now. If
+; it's demonstrated that this is a major size concern or degrades debug info
+; consumer behavior, feel free to change it.
+
+; CHECK-DAG:  [[F3_Z]] STATIC VARIABLE "f3::z"
 
 
 ; CHECK-LABEL: debug_gnu_pubtypes contents:
@@ -131,92 +206,6 @@
 ; CHECK-DAG:  [[D]] EXTERNAL TYPE     "ns::D"
 ; CHECK-DAG:  [[INT]] STATIC   TYPE     "int"
 
-; DWARF3: .debug_info contents:
-; DWARF3: Compile Unit: length = [[UNIT_SIZE:[0-9a-f]+]]
-; DWARF3: DW_AT_GNU_pubnames [DW_FORM_flag]   (0x01)
-; DWARF3-NOT: DW_AT_GNU_pubtypes [
-
-; DWARF3: [[C:[0-9a-f]+]]: DW_TAG_structure_type
-; DWARF3-NEXT: DW_AT_name {{.*}} "C"
-
-; DWARF3: [[STATIC_MEM_DECL:[0-9a-f]+]]: DW_TAG_member
-; DWARF3-NEXT: DW_AT_name {{.*}} "static_member_variable"
-
-; DWARF3: [[MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_MIPS_linkage_name
-; DWARF3-NEXT: DW_AT_name {{.*}} "member_function"
-
-; DWARF3: [[STATIC_MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_MIPS_linkage_name
-; DWARF3-NEXT: DW_AT_name {{.*}} "static_member_function"
-
-; DWARF3: [[INT:[0-9a-f]+]]: DW_TAG_base_type
-; DWARF3-NEXT: DW_AT_name {{.*}} "int"
-
-; DWARF3: [[STATIC_MEM_VAR:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_DECL]]
-
-; DWARF3: [[GLOB_VAR:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_name {{.*}} "global_variable"
-
-; DWARF3: [[NS:[0-9a-f]+]]: DW_TAG_namespace
-; DWARF3-NEXT: DW_AT_name {{.*}} "ns"
-
-; DWARF3: [[GLOB_NS_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_name {{.*}} "global_namespace_variable"
-
-; DWARF3: [[D_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_name {{.*}} "d"
-
-; DWARF3: [[D:[0-9a-f]+]]: DW_TAG_structure_type
-; DWARF3-NEXT: DW_AT_name {{.*}} "D"
-
-; DWARF3: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_MIPS_linkage_name
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_name {{.*}} "global_namespace_function"
-
-; DWARF3: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
-
-; DWARF3: [[D_VAR:[0-9a-f]+]]: DW_TAG_variable
-; DWARF3-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
-
-; DWARF3: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
-
-; DWARF3: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
-
-; DWARF3: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_MIPS_linkage_name
-; DWARF3-NOT: DW_TAG
-; DWARF3: DW_AT_name {{.*}} "global_function"
-
-; DWARF3-LABEL: .debug_gnu_pubnames contents:
-; DWARF3-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]]
-; DWARF3-NEXT: Offset     Linkage  Kind     Name
-; DWARF3-DAG:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
-; DWARF3-DAG:  [[NS]] EXTERNAL TYPE     "ns"
-; DWARF3-DAG:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
-; DWARF3-DAG:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
-; DWARF3-DAG:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
-; DWARF3-DAG:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
-; DWARF3-DAG:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
-; DWARF3-DAG:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
-; DWARF3-DAG:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
-
-
-; DWARF3-LABEL: debug_gnu_pubtypes contents:
-; DWARF3: Offset     Linkage  Kind     Name
-; DWARF3-DAG:  [[C]] EXTERNAL TYPE     "C"
-; DWARF3-DAG:  [[D]] EXTERNAL TYPE     "ns::D"
-; DWARF3-DAG:  [[INT]] STATIC   TYPE     "int"
-
 %struct.C = type { i8 }
 %"struct.ns::D" = type { i32 }
 
@@ -224,16 +213,20 @@
 @global_variable = global %struct.C zeroinitializer, align 1
 @_ZN2ns25global_namespace_variableE = global i32 1, align 4
 @_ZN2ns1dE = global %"struct.ns::D" zeroinitializer, align 4
+@_ZZ2f3vE1z = internal global i32 0, align 4
+@_ZN12_GLOBAL__N_11iE = internal global i32 0, align 4
+@_ZN12_GLOBAL__N_15inner1bE = internal global i32 0, align 4
+@_ZN5outer12_GLOBAL__N_11cE = internal global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
 define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !50), !dbg !52
   %this1 = load %struct.C** %this.addr
-  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !39
-  ret void, !dbg !39
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !53
+  ret void, !dbg !54
 }
 
 ; Function Attrs: nounwind readnone
@@ -242,72 +235,108 @@ declare void @llvm.dbg.declare(metadata, metadata) #1
 ; Function Attrs: nounwind uwtable
 define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
 entry:
-  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !40
-  ret i32 %0, !dbg !40
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !55
+  ret i32 %0, !dbg !55
 }
 
 ; Function Attrs: nounwind uwtable
 define i32 @_Z15global_functionv() #0 {
 entry:
-  ret i32 -1, !dbg !41
+  ret i32 -1, !dbg !56
 }
 
 ; Function Attrs: nounwind uwtable
 define void @_ZN2ns25global_namespace_functionEv() #0 {
 entry:
-  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !42
-  ret void, !dbg !42
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !57
+  ret void, !dbg !58
+}
+
+; Function Attrs: nounwind uwtable
+define i32* @_Z2f3v() #0 {
+entry:
+  ret i32* @_ZZ2f3vE1z, !dbg !59
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z2f7v() #0 {
+entry:
+  %0 = load i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !60
+  %call = call i32* @_Z2f3v(), !dbg !60
+  %1 = load i32* %call, align 4, !dbg !60
+  %add = add nsw i32 %0, %1, !dbg !60
+  %2 = load i32* @_ZN12_GLOBAL__N_15inner1bE, align 4, !dbg !60
+  %add1 = add nsw i32 %add, %2, !dbg !60
+  %3 = load i32* @_ZN5outer12_GLOBAL__N_11cE, align 4, !dbg !60
+  %add2 = add nsw i32 %add1, %3, !dbg !60
+  ret i32 %add2, !dbg !60
 }
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!34, !43}
-!llvm.ident = !{!35}
+!llvm.module.flags = !{!47, !48}
+!llvm.ident = !{!49}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !29, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/pubnames.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"pubnames.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !19, metadata !32, metadata !45, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/pubnames.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"pubnames.cpp", metadata !"/tmp/dbginfo"}
 !2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !17}
+!3 = metadata !{metadata !4, metadata !15}
 !4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !8, metadata !13}
+!5 = metadata !{metadata !6, metadata !8, metadata !12}
 !6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1C", metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !7, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
 !7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
 !9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !10 = metadata !{null, metadata !11}
 !11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!12 = metadata !{i32 786468}
-!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{metadata !7}
-!16 = metadata !{i32 786468}
-!17 = metadata !{i32 786451, metadata !1, metadata !18, metadata !"D", i32 21, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, metadata !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 21, size 32, align 32, offset 0] [def] [from ]
-!18 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 17} ; [ DW_TAG_namespace ] [ns] [line 17]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN2ns1DE", metadata !"A", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [A] [line 22, size 32, align 32, offset 0] [from int]
-!21 = metadata !{metadata !22, metadata !23, metadata !24, metadata !26}
-!22 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !8, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
-!23 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !13, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [static_member_function]
-!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 15, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !2, i32 15} ; [ DW_TAG_subprogram ] [line 15] [def] [global_function]
-!25 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/pubnames.cpp]
-!26 = metadata !{i32 786478, metadata !1, metadata !18, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 18, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !2, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [global_namespace_function]
-!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!28 = metadata !{null}
-!29 = metadata !{metadata !30, metadata !31, metadata !32, metadata !33}
-!30 = metadata !{i32 786484, i32 0, metadata !4, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !25, i32 7, metadata !7, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
-!31 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !25, i32 13, metadata !4, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 13] [def]
-!32 = metadata !{i32 786484, i32 0, metadata !18, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !25, i32 19, metadata !7, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 19] [def]
-!33 = metadata !{i32 786484, i32 0, metadata !18, metadata !"d", metadata !"d", metadata !"_ZN2ns1dE", metadata !25, i32 23, metadata !17, i32 0, i32 1, %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 23] [def]
-!34 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!35 = metadata !{metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)"}
-!36 = metadata !{i32 786689, metadata !22, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
-!38 = metadata !{i32 0, i32 0, metadata !22, null}
-!39 = metadata !{i32 9, i32 0, metadata !22, null}
-!40 = metadata !{i32 11, i32 0, metadata !23, null}
-!41 = metadata !{i32 15, i32 0, metadata !24, null}
-!42 = metadata !{i32 18, i32 0, metadata !26, null}
-
-!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{metadata !7}
+!15 = metadata !{i32 786451, metadata !1, metadata !16, metadata !"D", i32 28, i64 32, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null, metadata !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 28, size 32, align 32, offset 0] [def] [from ]
+!16 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [ns] [line 23]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN2ns1DE", metadata !"A", i32 29, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [A] [line 29, size 32, align 32, offset 0] [from int]
+!19 = metadata !{metadata !20, metadata !21, metadata !22, metadata !24, metadata !27, metadata !31}
+!20 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !8, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!21 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !12, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!22 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!23 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/pubnames.cpp]
+!24 = metadata !{i32 786478, metadata !1, metadata !16, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !25, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !2, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!25 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = metadata !{null}
+!27 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"f3", metadata !"f3", metadata !"_Z2f3v", i32 37, metadata !28, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32* ()* @_Z2f3v, null, null, metadata !2, i32 37} ; [ DW_TAG_subprogram ] [line 37] [def] [f3]
+!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!31 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"f7", metadata !"f7", metadata !"_Z2f7v", i32 54, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z2f7v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f7]
+!32 = metadata !{metadata !33, metadata !34, metadata !35, metadata !36, metadata !37, metadata !38, metadata !41, metadata !44}
+!33 = metadata !{i32 786484, i32 0, metadata !4, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !23, i32 7, metadata !7, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!34 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !23, i32 17, metadata !"_ZTS1C", i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!35 = metadata !{i32 786484, i32 0, metadata !16, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !23, i32 27, metadata !7, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!36 = metadata !{i32 786484, i32 0, metadata !16, metadata !"d", metadata !"d", metadata !"_ZN2ns1dE", metadata !23, i32 30, metadata !"_ZTSN2ns1DE", i32 0, i32 1, %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 30] [def]
+!37 = metadata !{i32 786484, i32 0, metadata !27, metadata !"z", metadata !"z", metadata !"", metadata !23, i32 38, metadata !7, i32 1, i32 1, i32* @_ZZ2f3vE1z, null} ; [ DW_TAG_variable ] [z] [line 38] [local] [def]
+!38 = metadata !{i32 786484, i32 0, metadata !39, metadata !"c", metadata !"c", metadata !"_ZN5outer12_GLOBAL__N_11cE", metadata !23, i32 50, metadata !7, i32 1, i32 1, i32* @_ZN5outer12_GLOBAL__N_11cE, null} ; [ DW_TAG_variable ] [c] [line 50] [local] [def]
+!39 = metadata !{i32 786489, metadata !1, metadata !40, metadata !"", i32 49} ; [ DW_TAG_namespace ] [line 49]
+!40 = metadata !{i32 786489, metadata !1, null, metadata !"outer", i32 48} ; [ DW_TAG_namespace ] [outer] [line 48]
+!41 = metadata !{i32 786484, i32 0, metadata !42, metadata !"b", metadata !"b", metadata !"_ZN12_GLOBAL__N_15inner1bE", metadata !23, i32 44, metadata !7, i32 1, i32 1, i32* @_ZN12_GLOBAL__N_15inner1bE, null} ; [ DW_TAG_variable ] [b] [line 44] [local] [def]
+!42 = metadata !{i32 786489, metadata !1, metadata !43, metadata !"inner", i32 43} ; [ DW_TAG_namespace ] [inner] [line 43]
+!43 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 33} ; [ DW_TAG_namespace ] [line 33]
+!44 = metadata !{i32 786484, i32 0, metadata !43, metadata !"i", metadata !"i", metadata !"_ZN12_GLOBAL__N_11iE", metadata !23, i32 34, metadata !7, i32 1, i32 1, i32* @_ZN12_GLOBAL__N_11iE, null} ; [ DW_TAG_variable ] [i] [line 34] [local] [def]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786490, metadata !40, metadata !39, i32 40} ; [ DW_TAG_imported_module ]
+!47 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!48 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!49 = metadata !{metadata !"clang version 3.5.0 "}
+!50 = metadata !{i32 786689, metadata !20, metadata !"this", null, i32 16777216, metadata !51, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!51 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!52 = metadata !{i32 0, i32 0, metadata !20, null}
+!53 = metadata !{i32 10, i32 0, metadata !20, null}
+!54 = metadata !{i32 11, i32 0, metadata !20, null}
+!55 = metadata !{i32 14, i32 0, metadata !21, null}
+!56 = metadata !{i32 20, i32 0, metadata !22, null}
+!57 = metadata !{i32 25, i32 0, metadata !24, null}
+!58 = metadata !{i32 26, i32 0, metadata !24, null}
+!59 = metadata !{i32 39, i32 0, metadata !27, null}
+!60 = metadata !{i32 55, i32 0, metadata !31, null}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/DebugInfo/X86/lit.local.cfg
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index 478f221..c713e65 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -6,20 +6,33 @@
 ; function parameters.
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
-; CHECK: DW_TAG_subprogram
-; CHECK: Proc8
-; CHECK: DW_TAG_formal_parameter
-; CHECK: Array1Par
-; CHECK: DW_AT_location
-; CHECK: DW_TAG_formal_parameter
-; CHECK: Array2Par
-; CHECK: DW_AT_location
-; CHECK: DW_TAG_formal_parameter
-; CHECK: IntParI1
-; CHECK: DW_AT_location
-; CHECK: DW_TAG_formal_parameter
-; CHECK: IntParI2
-; CHECK: DW_AT_location
+; CHECK:   DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "Proc8"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "Array1Par"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "Array2Par"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "IntParI1"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "IntParI2"
 
 %struct.Record = type { %struct.Record*, i32, i32, i32, [31 x i8] }
 
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 810ebbc..31003ee 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -1,18 +1,21 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DW-CHECK
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj -dwarf-version=3
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DWARF3
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s -filetype=obj \
+; RUN:     | llvm-dwarfdump -debug-dump=info - \
+; RUN:     | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF4
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s -filetype=obj -dwarf-version=3 \
+; RUN:     | llvm-dwarfdump -debug-dump=info - \
+; RUN:     | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3
 
-; DW-CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
 ; DW_AT_location lists yet.
-; DW-CHECK: DW_AT_location [DW_FORM_sec_offset]                      (0x00000000)
+; DWARF4: DW_AT_location [DW_FORM_sec_offset]                      (0x00000000)
 
-; DWARF3: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
 ; DW_AT_location lists yet.
 ; DWARF3: DW_AT_location [DW_FORM_data4]                      (0x00000000)
 
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
+
 ; Unfortunately llvm-dwarfdump can't unparse a list of DW_AT_locations
 ; right now, so we check the asm output:
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
@@ -86,7 +89,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !11 = metadata !{i32 1, i32 26, metadata !5, null}
 !12 = metadata !{i32 3, i32 13, metadata !13, null}
 !13 = metadata !{i32 786443, metadata !28, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 8192, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 8192, i32 0, metadata !30} ; [ DW_TAG_auto_variable ]
 !15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
@@ -102,3 +105,4 @@ declare void @llvm.stackrestore(i8*) nounwind
 !27 = metadata !{i32 8, i32 1, metadata !13, null}
 !28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"}
 !29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!30 = metadata !{i64 2}
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 8248cf6..4215c21 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -23,13 +23,15 @@
 ; }
 
 ; CHECK: debug_info contents
-; CHECK: DW_AT_name{{.*}} = "f"
 ; 0x74 is DW_OP_breg4, showing that the parameter is accessed indirectly
 ; (with a zero offset) from the register parameter
 ; CHECK: DW_AT_location{{.*}}(<0x0{{.}}> 74 00
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}} = "f"
 
-; CHECK: DW_AT_name{{.*}} = "g"
 ; CHECK: DW_AT_location{{.*}}([[G_LOC:0x[0-9]*]])
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}} = "g"
 ; CHECK: debug_loc contents
 ; CHECK-NEXT: [[G_LOC]]: Beginning
 ; CHECK-NEXT:               Ending
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 117e426..79d00ed 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -212,7 +212,7 @@ entry:
 !134 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
 !135 = metadata !{i32 19, i32 39, metadata !5, null}
 !136 = metadata !{i32 20, i32 17, metadata !137, null}
-!137 = metadata !{i32 786443, metadata !5, i32 19, i32 51, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!137 = metadata !{i32 786443, metadata !6, metadata !5, i32 19, i32 51, i32 0} ; [ DW_TAG_lexical_block ]
 !138 = metadata !{i32 23, i32 17, metadata !137, null}
 !139 = metadata !{i32 26, i32 15, metadata !137, null}
 !140 = metadata !{i32 786689, metadata !106, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
@@ -221,19 +221,19 @@ entry:
 !143 = metadata !{i32 786689, metadata !106, metadata !"__f", metadata !6, i32 33554440, metadata !61, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !144 = metadata !{i32 8, i32 63, metadata !106, null}
 !145 = metadata !{i32 9, i32 9, metadata !146, null}
-!146 = metadata !{i32 786443, metadata !106, i32 8, i32 81, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!146 = metadata !{i32 786443, metadata !6, metadata !106, i32 8, i32 81, i32 1} ; [ DW_TAG_lexical_block ]
 !147 = metadata !{i32 10, i32 13, metadata !146, null}
 !148 = metadata !{i32 4, i32 5, metadata !149, null}
-!149 = metadata !{i32 786443, metadata !107, i32 3, i32 105, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!149 = metadata !{i32 786443, metadata !6, metadata !107, i32 3, i32 105, i32 2} ; [ DW_TAG_lexical_block ]
 !150 = metadata !{i32 786689, metadata !126, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !151 = metadata !{i32 8, i32 45, metadata !126, null}
 !152 = metadata !{i32 786689, metadata !126, metadata !"__f", metadata !6, i32 33554440, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !153 = metadata !{i32 8, i32 63, metadata !126, null}
 !154 = metadata !{i32 9, i32 9, metadata !155, null}
-!155 = metadata !{i32 786443, metadata !126, i32 8, i32 81, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!155 = metadata !{i32 786443, metadata !6, metadata !126, i32 8, i32 81, i32 3} ; [ DW_TAG_lexical_block ]
 !156 = metadata !{i32 10, i32 13, metadata !155, null}
 !157 = metadata !{i32 4, i32 5, metadata !158, null}
-!158 = metadata !{i32 786443, metadata !127, i32 3, i32 105, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
+!158 = metadata !{i32 786443, metadata !6, metadata !127, i32 3, i32 105, i32 4} ; [ DW_TAG_lexical_block ]
 !159 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !160 = metadata !{metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta"}
 !161 = metadata !{metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta"}
diff --git a/test/DebugInfo/X86/pr19307.ll b/test/DebugInfo/X86/pr19307.ll
new file mode 100644
index 0000000..07e3a42
--- /dev/null
+++ b/test/DebugInfo/X86/pr19307.ll
@@ -0,0 +1,147 @@
+; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; Generated from the source file pr19307.cc:
+; #include <string>
+; void parse_range(unsigned long long &offset, unsigned long long &limit,
+;                  std::string range) {
+;   if (range.compare(0, 6, "items=") != 0 || range[6] == '-')
+;     offset = 1;
+;   range.erase(0, 6);
+;   limit = 2;
+; }
+; with "clang++ -S -emit-llvm -O0 -g pr19307.cc"
+
+; Location of "range" string is spilled from %rdx to stack and is
+; addressed via %rbp.
+; CHECK: movq %rdx, {{[-0-9]+}}(%rbp)
+; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
+; This location should be valid until the end of the function.
+
+; Verify that we have proper range in debug_loc section:
+; CHECK: .Ldebug_loc{{[0-9]+}}:
+; CHECK: DW_OP_breg1
+; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK-NEXT: .Lset{{[0-9]+}} = .Lfunc_end0-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK: DW_OP_breg6
+; CHECK: DW_OP_deref
+
+; ModuleID = 'pr19307.cc'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+
+@.str = private unnamed_addr constant [7 x i8] c"items=\00", align 1
+
+; Function Attrs: uwtable
+define void @_Z11parse_rangeRyS_Ss(i64* %offset, i64* %limit, %"class.std::basic_string"* %range) #0 {
+entry:
+  %offset.addr = alloca i64*, align 8
+  %limit.addr = alloca i64*, align 8
+  store i64* %offset, i64** %offset.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64** %offset.addr}, metadata !45), !dbg !46
+  store i64* %limit, i64** %limit.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64** %limit.addr}, metadata !47), !dbg !46
+  call void @llvm.dbg.declare(metadata !{%"class.std::basic_string"* %range}, metadata !48), !dbg !49
+  %call = call i32 @_ZNKSs7compareEmmPKc(%"class.std::basic_string"* %range, i64 0, i64 6, i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)), !dbg !50
+  %cmp = icmp ne i32 %call, 0, !dbg !50
+  br i1 %cmp, label %if.then, label %lor.lhs.false, !dbg !50
+
+lor.lhs.false:                                    ; preds = %entry
+  %call1 = call i8* @_ZNSsixEm(%"class.std::basic_string"* %range, i64 6), !dbg !52
+  %0 = load i8* %call1, !dbg !52
+  %conv = sext i8 %0 to i32, !dbg !52
+  %cmp2 = icmp eq i32 %conv, 45, !dbg !52
+  br i1 %cmp2, label %if.then, label %if.end, !dbg !52
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %1 = load i64** %offset.addr, align 8, !dbg !54
+  store i64 1, i64* %1, align 8, !dbg !54
+  br label %if.end, !dbg !54
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false
+  %call3 = call %"class.std::basic_string"* @_ZNSs5eraseEmm(%"class.std::basic_string"* %range, i64 0, i64 6), !dbg !55
+  %2 = load i64** %limit.addr, align 8, !dbg !56
+  store i64 2, i64* %2, align 8, !dbg !56
+  ret void, !dbg !57
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i32 @_ZNKSs7compareEmmPKc(%"class.std::basic_string"*, i64, i64, i8*) #2
+
+declare i8* @_ZNSsixEm(%"class.std::basic_string"*, i64) #2
+
+declare %"class.std::basic_string"* @_ZNSs5eraseEmm(%"class.std::basic_string"*, i64, i64) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!42, !43}
+!llvm.ident = !{!44}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (209308)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !2, metadata !21, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/pr19307.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"pr19307.cc", metadata !"/llvm_cmake_gcc"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !6, metadata !8}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"", i32 83, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS11__mbstate_t"} ; [ DW_TAG_structure_type ] [line 83, size 0, align 0, offset 0] [decl] [from ]
+!5 = metadata !{metadata !"/usr/include/wchar.h", metadata !"/llvm_cmake_gcc"}
+!6 = metadata !{i32 786451, metadata !7, null, metadata !"lconv", i32 54, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS5lconv"} ; [ DW_TAG_structure_type ] [lconv] [line 54, size 0, align 0, offset 0] [decl] [from ]
+!7 = metadata !{metadata !"/usr/include/locale.h", metadata !"/llvm_cmake_gcc"}
+!8 = metadata !{i32 786434, metadata !9, metadata !10, metadata !"basic_string<char, std::char_traits<char>, std::allocator<char> >", i32 1134, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTSSs"} ; [ DW_TAG_class_type ] [basic_string<char, std::char_traits<char>, std::allocator<char> >] [line 1134, size 0, align 0, offset 0] [decl] [from ]
+!9 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/basic_string.tcc", metadata !"/llvm_cmake_gcc"}
+!10 = metadata !{i32 786489, metadata !11, null, metadata !"std", i32 153} ; [ DW_TAG_namespace ] [std] [line 153]
+!11 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", metadata !"/llvm_cmake_gcc"}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"parse_range", metadata !"parse_range", metadata !"_Z11parse_rangeRyS_Ss", i32 3, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i64*, i64*, %"class.std::basic_string"*)* @_Z11parse_rangeRyS_Ss, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [parse_range]
+!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/pr19307.cc]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17, metadata !17, metadata !19}
+!17 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!18 = metadata !{i32 786468, null, null, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!19 = metadata !{i32 786454, metadata !20, metadata !10, metadata !"string", i32 65, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTSSs"} ; [ DW_TAG_typedef ] [string] [line 65, size 0, align 0, offset 0] [from _ZTSSs]
+!20 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/stringfwd.h", metadata !"/llvm_cmake_gcc"}
+!21 = metadata !{metadata !22, metadata !26, metadata !29, metadata !33, metadata !38, metadata !41}
+!22 = metadata !{i32 786490, metadata !23, metadata !25, i32 57} ; [ DW_TAG_imported_module ]
+!23 = metadata !{i32 786489, metadata !24, null, metadata !"__gnu_debug", i32 55} ; [ DW_TAG_namespace ] [__gnu_debug] [line 55]
+!24 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/debug/debug.h", metadata !"/llvm_cmake_gcc"}
+!25 = metadata !{i32 786489, metadata !24, metadata !10, metadata !"__debug", i32 49} ; [ DW_TAG_namespace ] [__debug] [line 49]
+!26 = metadata !{i32 786440, metadata !10, metadata !27, i32 66} ; [ DW_TAG_imported_declaration ]
+!27 = metadata !{i32 786454, metadata !5, null, metadata !"mbstate_t", i32 106, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [mbstate_t] [line 106, size 0, align 0, offset 0] [from __mbstate_t]
+!28 = metadata !{i32 786454, metadata !5, null, metadata !"__mbstate_t", i32 95, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS11__mbstate_t"} ; [ DW_TAG_typedef ] [__mbstate_t] [line 95, size 0, align 0, offset 0] [from _ZTS11__mbstate_t]
+!29 = metadata !{i32 786440, metadata !10, metadata !30, i32 141} ; [ DW_TAG_imported_declaration ]
+!30 = metadata !{i32 786454, metadata !31, null, metadata !"wint_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ] [wint_t] [line 141, size 0, align 0, offset 0] [from unsigned int]
+!31 = metadata !{metadata !"/llvm_cmake_gcc/bin/../lib/clang/3.5.0/include/stddef.h", metadata !"/llvm_cmake_gcc"}
+!32 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!33 = metadata !{i32 786440, metadata !34, metadata !36, i32 42} ; [ DW_TAG_imported_declaration ]
+!34 = metadata !{i32 786489, metadata !35, null, metadata !"__gnu_cxx", i32 69} ; [ DW_TAG_namespace ] [__gnu_cxx] [line 69]
+!35 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/cpp_type_traits.h", metadata !"/llvm_cmake_gcc"}
+!36 = metadata !{i32 786454, metadata !11, metadata !10, metadata !"size_t", i32 155, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_typedef ] [size_t] [line 155, size 0, align 0, offset 0] [from long unsigned int]
+!37 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!38 = metadata !{i32 786440, metadata !34, metadata !39, i32 43} ; [ DW_TAG_imported_declaration ]
+!39 = metadata !{i32 786454, metadata !11, metadata !10, metadata !"ptrdiff_t", i32 156, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ] [ptrdiff_t] [line 156, size 0, align 0, offset 0] [from long int]
+!40 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!41 = metadata !{i32 786440, metadata !10, metadata !"_ZTS5lconv", i32 55} ; [ DW_TAG_imported_declaration ]
+!42 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!43 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!44 = metadata !{metadata !"clang version 3.5.0 (209308)"}
+!45 = metadata !{i32 786689, metadata !13, metadata !"offset", metadata !14, i32 16777219, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [offset] [line 3]
+!46 = metadata !{i32 3, i32 0, metadata !13, null}
+!47 = metadata !{i32 786689, metadata !13, metadata !"limit", metadata !14, i32 33554435, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [limit] [line 3]
+!48 = metadata !{i32 786689, metadata !13, metadata !"range", metadata !14, i32 50331652, metadata !19, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [range] [line 4]
+!49 = metadata !{i32 4, i32 0, metadata !13, null}
+!50 = metadata !{i32 5, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !1, metadata !13, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
+!52 = metadata !{i32 5, i32 0, metadata !53, null}
+!53 = metadata !{i32 786443, metadata !1, metadata !51, i32 5, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
+!54 = metadata !{i32 6, i32 0, metadata !51, null}
+!55 = metadata !{i32 7, i32 0, metadata !13, null}
+!56 = metadata !{i32 8, i32 0, metadata !13, null} ; [ DW_TAG_imported_declaration ]
+!57 = metadata !{i32 9, i32 0, metadata !13, null}
+
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index fed4334..faf5158 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -3,8 +3,8 @@
 
 ; Based on the debuginfo-tests/sret.cpp code.
 
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298)
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x5b59949640ec1580)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x5b59949640ec1580)
 
 %class.A = type { i32 (...)**, i32 }
 %class.B = type { i8 }
diff --git a/test/DebugInfo/X86/subregisters.ll b/test/DebugInfo/X86/subregisters.ll
index 738ab02..d46a95f 100644
--- a/test/DebugInfo/X86/subregisters.ll
+++ b/test/DebugInfo/X86/subregisters.ll
@@ -6,8 +6,8 @@
 ;
 ; rdar://problem/16015314
 ;
+; CHECK:  DW_AT_location [DW_FORM_block1]       (<0x03> 54 93 04 )
 ; CHECK:  DW_AT_name [DW_FORM_strp]{{.*}} "a"
-; CHECK:    DW_AT_location [DW_FORM_block1]       (<0x03> 54 93 04 )
 ;
 ; struct bar {
 ;   int a;
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll
index 266a24d..899558a 100644
--- a/test/DebugInfo/cross-cu-inlining.ll
+++ b/test/DebugInfo/cross-cu-inlining.ll
@@ -56,8 +56,9 @@
 ; CHECK:   DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]}
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]}
 ; CHECK:     DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]}
 
 
 @i = external global i32
diff --git a/test/DebugInfo/cross-cu-linkonce-distinct.ll b/test/DebugInfo/cross-cu-linkonce-distinct.ll
new file mode 100644
index 0000000..67eb6c0
--- /dev/null
+++ b/test/DebugInfo/cross-cu-linkonce-distinct.ll
@@ -0,0 +1,95 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Testing that two distinct (distinct by writing them in separate files, while
+; still fulfilling C++'s ODR by having identical token sequences) functions,
+; linked under LTO, get plausible debug info (and don't crash).
+
+; Built from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+
+; This change is intended to tickle a case where the subprogram MDNode
+; associated with the llvm::Function will differ from the subprogram
+; referenced by the DbgLocs in the function.
+
+; $ sed -ie "s/!12, !0/!0, !12/" ab.ll
+; $ cat a.cpp
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*x)(int) = &func;
+; $ cat b.cpp
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*y)(int) = &func;
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "func"
+; CHECK: DW_TAG_compile_unit
+
+; FIXME: Maybe we should drop the subprogram here - since the function was
+; emitted in one CU, due to linkonce_odr uniquing. We certainly don't emit the
+; subprogram here if the source location for this definition is the same (see
+; test/DebugInfo/cross-cu-linkonce.ll), though it's very easy to tickle that
+; into failing even without duplicating the source as has been done in this
+; case (two cpp files in different directories, including the same header that
+; contains an inline function - clang will produce distinct subprogram metadata
+; that won't deduplicate owing to the file location information containing the
+; directory of the source file even though the file name is absolute, not
+; relative)
+
+; CHECK: DW_TAG_subprogram
+
+@x = global i32 (i32)* @_Z4funci, align 8
+@y = global i32 (i32)* @_Z4funci, align 8
+
+; Function Attrs: inlinehint nounwind uwtable
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
+  %1 = alloca i32, align 4
+  store i32 %i, i32* %1, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !22), !dbg !23
+  %2 = load i32* %1, align 4, !dbg !24
+  %3 = mul nsw i32 %2, 2, !dbg !24
+  ret i32 %3, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!12, !0}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21, !21}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 4, metadata !11, i32 0, i32 1, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!12 = metadata !{i32 786449, metadata !13, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !14, metadata !17, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!13 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786478, metadata !13, metadata !16, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!16 = metadata !{i32 786473, metadata !13}        ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !16, i32 4, metadata !11, i32 0, i32 1, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def]
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!21 = metadata !{metadata !"clang version 3.5.0 "}
+!22 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!23 = metadata !{i32 1, i32 0, metadata !4, null}
+!24 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/dead-argument-order.ll b/test/DebugInfo/dead-argument-order.ll
new file mode 100644
index 0000000..ea805a4
--- /dev/null
+++ b/test/DebugInfo/dead-argument-order.ll
@@ -0,0 +1,81 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Built from the following source with clang -O1
+; struct S { int i; };
+; int function(struct S s, int i) { return s.i + i; }
+
+; Due to the X86_64 ABI, 's' is passed in registers and once optimized, the
+; entirety of 's' is never reconstituted, since only the int is required, and
+; thus the variable's location is unknown/dead to debug info.
+
+; Future/current work should enable us to describe partial variables, which, in
+; this case, happens to be the entire variable.
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "function"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "s"
+; CHECK-NOT: DW_TAG
+; FIXME: Even though 's' is never reconstituted into a struct, the one member
+; variable is still live and used, and so we should be able to describe 's's
+; location as the location of that int.
+; CHECK-NOT: DW_AT_location
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "i"
+
+
+%struct.S = type { i32 }
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 {
+entry:
+  tail call void @llvm.dbg.declare(metadata !19, metadata !14), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  %add = add nsw i32 %i, %s.coerce, !dbg !20
+  ret i32 %add, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !8, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dead-argument-order.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"dead-argument-order.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"S", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1S"} ; [ DW_TAG_structure_type ] [S] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1S", metadata !"i", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [i] [line 1, size 32, align 32, offset 0] [from int]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"function", metadata !"function", metadata !"_Z8function1Si", i32 2, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32)* @_Z8function1Si, null, null, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [function]
+!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/dead-argument-order.cpp]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !7, metadata !4, metadata !7}
+!13 = metadata !{metadata !14, metadata !15}
+!14 = metadata !{i32 786689, metadata !9, metadata !"s", metadata !10, i32 16777218, metadata !"_ZTS1S", i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 2]
+!15 = metadata !{i32 786689, metadata !9, metadata !"i", metadata !10, i32 33554434, metadata !7, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 2]
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!17 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!18 = metadata !{metadata !"clang version 3.5.0 "}
+!19 = metadata !{%struct.S* undef}
+!20 = metadata !{i32 2, i32 0, metadata !9, null}
+
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index ca0d721..7218964 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -40,12 +40,14 @@
 ; CHECK: version = 0x0002
 
 ; Check for each name in the output.
-; CHECK: global_namespace_variable
-; CHECK: global_namespace_function
-; CHECK: static_member_function
-; CHECK: global_variable
-; CHECK: global_function
-; CHECK: member_function
+; CHECK-DAG: "ns"
+; CHECK-DAG: "C::static_member_function"
+; CHECK-DAG: "global_variable"
+; CHECK-DAG: "ns::global_namespace_variable"
+; CHECK-DAG: "ns::global_namespace_function"
+; CHECK-DAG: "global_function"
+; CHECK-DAG: "C::static_member_variable"
+; CHECK-DAG: "C::member_function"
 
 %struct.C = type { i8 }
 
@@ -109,7 +111,7 @@ attributes #1 = { nounwind readnone }
 !18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
 !19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
 !20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
-!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!21 = metadata !{i32 786489, metadata !4, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
 !22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25, metadata !26, metadata !27}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
index c515114..3c97f0c 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/global.ll
@@ -3,6 +3,9 @@
 ; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
+; Also test that the null streamer doesn't crash with debug info.
+; RUN: %llc_dwarf -O0 -filetype=null < %s
+
 ; generated from the following source compiled to bitcode with clang -g -O1
 ; static int i;
 ; int main() {
diff --git a/test/DebugInfo/incorrect-variable-debugloc.ll b/test/DebugInfo/incorrect-variable-debugloc.ll
new file mode 100644
index 0000000..284704c
--- /dev/null
+++ b/test/DebugInfo/incorrect-variable-debugloc.ll
@@ -0,0 +1,391 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O2 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; This is a test case that's as reduced as I can get it, though I haven't fully
+; understood the mechanisms by which this bug occurs, so perhaps there's further
+; simplification to be had (it's certainly a bit non-obvious what's going on). I
+; hesitate to hand-craft or otherwise simplify the IR compared to what Clang
+; generates as this is a particular tickling of optimizations and debug location
+; propagation I want a realistic example of.
+
+; Generated with clang-tot -cc1 -g -O2 -w -std=c++11  -fsanitize=address,use-after-return -fcxx-exceptions -fexceptions -x c++ incorrect-variable-debug-loc.cpp -emit-llvm
+
+; struct A {
+;   int m_fn1();
+; };
+;
+; struct B {
+;   void __attribute__((always_inline)) m_fn2() { i = 0; }
+;   int i;
+; };
+;
+; struct C {
+;   void m_fn3();
+;   int j;
+;   B b;
+; };
+;
+; int fn1() {
+;   C A;
+;   A.b.m_fn2();
+;   A.m_fn3();
+; }
+; void C::m_fn3() {
+;   A().m_fn1();
+;   b.m_fn2();
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "C"
+; CHECK: [[FN3_DECL:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "m_fn3"
+
+; CHECK: DW_AT_specification {{.*}} {[[FN3_DECL]]}
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "this"
+
+%struct.C = type { i32, %struct.B }
+%struct.B = type { i32 }
+%struct.A = type { i8 }
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
+@__asan_option_detect_stack_use_after_return = external global i32
+@__asan_gen_ = private unnamed_addr constant [11 x i8] c"1 32 8 1 A\00", align 1
+@__asan_gen_1 = private unnamed_addr constant [13 x i8] c"1 32 1 3 tmp\00", align 1
+
+; Function Attrs: noreturn sanitize_address
+define i32 @_Z3fn1v() #0 {
+entry:
+  %MyAlloca = alloca [64 x i8], align 32, !dbg !39
+  %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !39
+  %1 = load i32* @__asan_option_detect_stack_use_after_return, !dbg !39
+  %2 = icmp ne i32 %1, 0, !dbg !39
+  br i1 %2, label %3, label %5
+
+; <label>:3                                       ; preds = %entry
+  %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !39
+  br label %5
+
+; <label>:5                                       ; preds = %entry, %3
+  %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !39
+  %7 = add i64 %6, 32, !dbg !39
+  %8 = inttoptr i64 %7 to %struct.C*, !dbg !39
+  %9 = inttoptr i64 %6 to i64*, !dbg !39
+  store i64 1102416563, i64* %9, !dbg !39
+  %10 = add i64 %6, 8, !dbg !39
+  %11 = inttoptr i64 %10 to i64*, !dbg !39
+  store i64 ptrtoint ([11 x i8]* @__asan_gen_ to i64), i64* %11, !dbg !39
+  %12 = add i64 %6, 16, !dbg !39
+  %13 = inttoptr i64 %12 to i64*, !dbg !39
+  store i64 ptrtoint (i32 ()* @_Z3fn1v to i64), i64* %13, !dbg !39
+  %14 = lshr i64 %6, 3, !dbg !39
+  %15 = add i64 %14, 2147450880, !dbg !39
+  %16 = add i64 %15, 0, !dbg !39
+  %17 = inttoptr i64 %16 to i64*, !dbg !39
+  store i64 -868083117767659023, i64* %17, !dbg !39
+  %i.i = getelementptr inbounds %struct.C* %8, i64 0, i32 1, i32 0, !dbg !39
+  %18 = ptrtoint i32* %i.i to i64, !dbg !39
+  %19 = lshr i64 %18, 3, !dbg !39
+  %20 = add i64 %19, 2147450880, !dbg !39
+  %21 = inttoptr i64 %20 to i8*, !dbg !39
+  %22 = load i8* %21, !dbg !39
+  %23 = icmp ne i8 %22, 0, !dbg !39
+  br i1 %23, label %24, label %30, !dbg !39
+
+; <label>:24                                      ; preds = %5
+  %25 = and i64 %18, 7, !dbg !39
+  %26 = add i64 %25, 3, !dbg !39
+  %27 = trunc i64 %26 to i8, !dbg !39
+  %28 = icmp sge i8 %27, %22, !dbg !39
+  br i1 %28, label %29, label %30
+
+; <label>:29                                      ; preds = %24
+  call void @__asan_report_store4(i64 %18), !dbg !39
+  call void asm sideeffect "", ""()
+  unreachable
+
+; <label>:30                                      ; preds = %24, %5
+  store i32 0, i32* %i.i, align 4, !dbg !39, !tbaa !41
+  tail call void @llvm.dbg.value(metadata !{%struct.C* %8}, i64 0, metadata !27), !dbg !46
+  call void @_ZN1C5m_fn3Ev(%struct.C* %8), !dbg !47
+  unreachable, !dbg !47
+}
+
+; Function Attrs: sanitize_address
+define void @_ZN1C5m_fn3Ev(%struct.C* nocapture %this) #1 align 2 {
+entry:
+  %MyAlloca = alloca [64 x i8], align 32, !dbg !48
+  %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !48
+  %1 = load i32* @__asan_option_detect_stack_use_after_return, !dbg !48
+  %2 = icmp ne i32 %1, 0, !dbg !48
+  br i1 %2, label %3, label %5
+
+; <label>:3                                       ; preds = %entry
+  %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !48
+  br label %5
+
+; <label>:5                                       ; preds = %entry, %3
+  %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !48
+  %7 = add i64 %6, 32, !dbg !48
+  %8 = inttoptr i64 %7 to %struct.A*, !dbg !48
+  %9 = inttoptr i64 %6 to i64*, !dbg !48
+  store i64 1102416563, i64* %9, !dbg !48
+  %10 = add i64 %6, 8, !dbg !48
+  %11 = inttoptr i64 %10 to i64*, !dbg !48
+  store i64 ptrtoint ([13 x i8]* @__asan_gen_1 to i64), i64* %11, !dbg !48
+  %12 = add i64 %6, 16, !dbg !48
+  %13 = inttoptr i64 %12 to i64*, !dbg !48
+  store i64 ptrtoint (void (%struct.C*)* @_ZN1C5m_fn3Ev to i64), i64* %13, !dbg !48
+  %14 = lshr i64 %6, 3, !dbg !48
+  %15 = add i64 %14, 2147450880, !dbg !48
+  %16 = add i64 %15, 0, !dbg !48
+  %17 = inttoptr i64 %16 to i64*, !dbg !48
+  store i64 -868083113472691727, i64* %17, !dbg !48
+  tail call void @llvm.dbg.value(metadata !{%struct.C* %this}, i64 0, metadata !30), !dbg !48
+  %call = call i32 @_ZN1A5m_fn1Ev(%struct.A* %8), !dbg !49
+  %i.i = getelementptr inbounds %struct.C* %this, i64 0, i32 1, i32 0, !dbg !50
+  %18 = ptrtoint i32* %i.i to i64, !dbg !50
+  %19 = lshr i64 %18, 3, !dbg !50
+  %20 = add i64 %19, 2147450880, !dbg !50
+  %21 = inttoptr i64 %20 to i8*, !dbg !50
+  %22 = load i8* %21, !dbg !50
+  %23 = icmp ne i8 %22, 0, !dbg !50
+  br i1 %23, label %24, label %30, !dbg !50
+
+; <label>:24                                      ; preds = %5
+  %25 = and i64 %18, 7, !dbg !50
+  %26 = add i64 %25, 3, !dbg !50
+  %27 = trunc i64 %26 to i8, !dbg !50
+  %28 = icmp sge i8 %27, %22, !dbg !50
+  br i1 %28, label %29, label %30
+
+; <label>:29                                      ; preds = %24
+  call void @__asan_report_store4(i64 %18), !dbg !50
+  call void asm sideeffect "", ""()
+  unreachable
+
+; <label>:30                                      ; preds = %24, %5
+  store i32 0, i32* %i.i, align 4, !dbg !50, !tbaa !41
+  store i64 1172321806, i64* %9, !dbg !52
+  %31 = icmp ne i64 %6, %0, !dbg !52
+  br i1 %31, label %32, label %39, !dbg !52
+
+; <label>:32                                      ; preds = %30
+  %33 = add i64 %15, 0, !dbg !52
+  %34 = inttoptr i64 %33 to i64*, !dbg !52
+  store i64 -723401728380766731, i64* %34, !dbg !52
+  %35 = add i64 %6, 56, !dbg !52
+  %36 = inttoptr i64 %35 to i64*, !dbg !52
+  %37 = load i64* %36, !dbg !52
+  %38 = inttoptr i64 %37 to i8*, !dbg !52
+  store i8 0, i8* %38, !dbg !52
+  br label %42, !dbg !52
+
+; <label>:39                                      ; preds = %30
+  %40 = add i64 %15, 0, !dbg !52
+  %41 = inttoptr i64 %40 to i64*, !dbg !52
+  store i64 0, i64* %41, !dbg !52
+  br label %42, !dbg !52
+
+; <label>:42                                      ; preds = %39, %32
+  ret void, !dbg !52
+}
+
+declare i32 @_ZN1A5m_fn1Ev(%struct.A*) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #3
+
+define internal void @asan.module_ctor() {
+  tail call void @__asan_init_v3()
+  ret void
+}
+
+declare void @__asan_init_v3()
+
+declare void @__asan_report_load1(i64)
+
+declare void @__asan_load1(i64)
+
+declare void @__asan_report_load2(i64)
+
+declare void @__asan_load2(i64)
+
+declare void @__asan_report_load4(i64)
+
+declare void @__asan_load4(i64)
+
+declare void @__asan_report_load8(i64)
+
+declare void @__asan_load8(i64)
+
+declare void @__asan_report_load16(i64)
+
+declare void @__asan_load16(i64)
+
+declare void @__asan_report_store1(i64)
+
+declare void @__asan_store1(i64)
+
+declare void @__asan_report_store2(i64)
+
+declare void @__asan_store2(i64)
+
+declare void @__asan_report_store4(i64)
+
+declare void @__asan_store4(i64)
+
+declare void @__asan_report_store8(i64)
+
+declare void @__asan_store8(i64)
+
+declare void @__asan_report_store16(i64)
+
+declare void @__asan_store16(i64)
+
+declare void @__asan_report_load_n(i64, i64)
+
+declare void @__asan_report_store_n(i64, i64)
+
+declare void @__asan_loadN(i64, i64)
+
+declare void @__asan_storeN(i64, i64)
+
+declare i8* @__asan_memmove(i8*, i8*, i64)
+
+declare i8* @__asan_memcpy(i8*, i8*, i64)
+
+declare i8* @__asan_memset(i8*, i32, i64)
+
+declare void @__asan_handle_no_return()
+
+declare void @__sanitizer_cov()
+
+declare void @__sanitizer_ptr_cmp(i64, i64)
+
+declare void @__sanitizer_ptr_sub(i64, i64)
+
+declare i64 @__asan_stack_malloc_0(i64, i64)
+
+declare void @__asan_stack_free_0(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_1(i64, i64)
+
+declare void @__asan_stack_free_1(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_2(i64, i64)
+
+declare void @__asan_stack_free_2(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_3(i64, i64)
+
+declare void @__asan_stack_free_3(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_4(i64, i64)
+
+declare void @__asan_stack_free_4(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_5(i64, i64)
+
+declare void @__asan_stack_free_5(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_6(i64, i64)
+
+declare void @__asan_stack_free_6(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_7(i64, i64)
+
+declare void @__asan_stack_free_7(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_8(i64, i64)
+
+declare void @__asan_stack_free_8(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_9(i64, i64)
+
+declare void @__asan_stack_free_9(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_10(i64, i64)
+
+declare void @__asan_stack_free_10(i64, i64, i64)
+
+declare void @__asan_poison_stack_memory(i64, i64)
+
+declare void @__asan_unpoison_stack_memory(i64, i64)
+
+declare void @__asan_before_dynamic_init(i64)
+
+declare void @__asan_after_dynamic_init()
+
+declare void @__asan_register_globals(i64, i64)
+
+declare void @__asan_unregister_globals(i64, i64)
+
+declare void @__sanitizer_cov_module_init(i64)
+
+attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+!llvm.ident = !{!38}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !14}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"C", i32 10, i64 64, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 10, size 64, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"incorrect-variable-debug-loc.cpp", metadata !"/tmp/dbginfo"}
+!6 = metadata !{metadata !7, metadata !9, metadata !10}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1C", metadata !"j", i32 12, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [j] [line 12, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1C", metadata !"b", i32 13, i64 32, i64 32, i64 32, i32 0, metadata !"_ZTS1B"} ; [ DW_TAG_member ] [b] [line 13, size 32, align 32, offset 32] [from _ZTS1B]
+!10 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"m_fn3", metadata !"m_fn3", metadata !"_ZN1C5m_fn3Ev", i32 11, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 11} ; [ DW_TAG_subprogram ] [line 11] [m_fn3]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!14 = metadata !{i32 786451, metadata !5, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_structure_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"i", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [i] [line 7, size 32, align 32, offset 0] [from int]
+!17 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1B", metadata !"m_fn2", metadata !"m_fn2", metadata !"_ZN1B5m_fn2Ev", i32 6, metadata !18, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 6} ; [ DW_TAG_subprogram ] [line 6] [m_fn2]
+!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{null, metadata !20}
+!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!21 = metadata !{metadata !22, metadata !28, metadata !32}
+!22 = metadata !{i32 786478, metadata !5, metadata !23, metadata !"fn1", metadata !"fn1", metadata !"_Z3fn1v", i32 16, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z3fn1v, null, null, metadata !26, i32 16} ; [ DW_TAG_subprogram ] [line 16] [def] [fn1]
+!23 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/incorrect-variable-debug-loc.cpp]
+!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = metadata !{metadata !8}
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786688, metadata !22, metadata !"A", metadata !23, i32 17, metadata !"_ZTS1C", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [A] [line 17]
+!28 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"m_fn3", metadata !"m_fn3", metadata !"_ZN1C5m_fn3Ev", i32 21, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.C*)* @_ZN1C5m_fn3Ev, null, metadata !10, metadata !29, i32 21} ; [ DW_TAG_subprogram ] [line 21] [def] [m_fn3]
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786689, metadata !28, metadata !"this", null, i32 16777216, metadata !31, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!32 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1B", metadata !"m_fn2", metadata !"m_fn2", metadata !"_ZN1B5m_fn2Ev", i32 6, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !17, metadata !33, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [m_fn2]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786689, metadata !32, metadata !"this", null, i32 16777216, metadata !35, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!35 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
+!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!37 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!38 = metadata !{metadata !"clang version 3.5.0 "}
+!39 = metadata !{i32 6, i32 0, metadata !32, metadata !40}
+!40 = metadata !{i32 18, i32 0, metadata !22, null}
+!41 = metadata !{metadata !42, metadata !43, i64 0}
+!42 = metadata !{metadata !"_ZTS1B", metadata !43, i64 0}
+!43 = metadata !{metadata !"int", metadata !44, i64 0}
+!44 = metadata !{metadata !"omnipotent char", metadata !45, i64 0}
+!45 = metadata !{metadata !"Simple C/C++ TBAA"}
+!46 = metadata !{i32 17, i32 0, metadata !22, null}
+!47 = metadata !{i32 19, i32 0, metadata !22, null}
+!48 = metadata !{i32 0, i32 0, metadata !28, null}
+!49 = metadata !{i32 22, i32 0, metadata !28, null}
+!50 = metadata !{i32 6, i32 0, metadata !32, metadata !51}
+!51 = metadata !{i32 23, i32 0, metadata !28, null}
+!52 = metadata !{i32 24, i32 0, metadata !28, null}
diff --git a/test/DebugInfo/inline-no-debug-info.ll b/test/DebugInfo/inline-no-debug-info.ll
new file mode 100644
index 0000000..2257b89
--- /dev/null
+++ b/test/DebugInfo/inline-no-debug-info.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; This was generated from the following source:
+; int a, b;
+; __attribute__((__always_inline__)) static void callee2() { b = 2; }
+; __attribute__((__nodebug__)) void callee() { a = 1; callee2(); }
+; void caller() { callee(); }
+; by running
+;   clang -S test.c -emit-llvm -O1 -gline-tables-only -fno-strict-aliasing
+
+; CHECK-LABEL: @caller(
+
+; This instruction did not have a !dbg metadata in the callee.
+; CHECK: store i32 1, {{.*}}, !dbg [[A:!.*]]
+
+; This instruction came from callee with a !dbg metadata.
+; CHECK: store i32 2, {{.*}}, !dbg [[B:!.*]]
+
+; The remaining instruction from the caller.
+; CHECK: ret void, !dbg [[A]]
+
+; Debug location of the code in caller() and of the inlined code that did not
+; have any debug location before.
+; CHECK-DAG: [[A]] = metadata !{i32 4, i32 0, metadata !{{[01-9]+}}, null}
+
+; Debug location of the inlined code.
+; CHECK-DAG: [[B]] = metadata !{i32 2, i32 0, metadata !{{[01-9]+}}, metadata [[A]]}
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @callee() #0 {
+entry:
+  store i32 1, i32* @a, align 4
+  store i32 2, i32* @b, align 4, !dbg !11
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @caller() #0 {
+entry:
+  tail call void @callee(), !dbg !12
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (210174)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/code/llvm/build0"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"caller", metadata !"caller", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @caller, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [caller]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/code/llvm/build0/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"callee2", metadata !"callee2", metadata !"", i32 2, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [callee2]
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 (210174)"}
+!11 = metadata !{i32 2, i32 0, metadata !7, null}
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
index ebc81a6..6979862 100644
--- a/test/DebugInfo/inlined-arguments.ll
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -16,9 +16,11 @@
 
 ; CHECK: DW_AT_name{{.*}}"f1"
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name{{.*}}"x"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"x"
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name{{.*}}"y"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"y"
 
 ; Function Attrs: uwtable
 define void @_Z2f2v() #0 {
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 6aa1287..20d3dda 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -17,6 +17,8 @@ RUN: echo "%p/Inputs/macho-universal 0x1f84" >> %t.input
 RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input
 RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input
 RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input
+RUN: echo "%p/Inputs/fission-ranges.elf-x86_64 0x720" >> %t.input
+RUN: echo "%p/Inputs/arange-overlap.elf-x86_64 0x714" >> %t.input
 
 RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
 RUN:    --default-arch=i386 < %t.input | FileCheck %s
@@ -90,6 +92,12 @@ CHECK:      _Z3inci
 CHECK: main
 CHECK-NEXT: llvm-symbolizer-dwo-test.cc:11
 
+CHECK: main
+CHECK-NEXT: {{.*}}fission-ranges.cc:6
+
+CHECK: _ZN1S3bazEv
+CHECK-NEXT: {{.*}}arange-overlap.cc:6
+
 RUN: echo "unexisting-file 0x1234" > %t.input2
 RUN: llvm-symbolizer < %t.input2
 
diff --git a/test/DebugInfo/missing-abstract-variable.ll b/test/DebugInfo/missing-abstract-variable.ll
new file mode 100644
index 0000000..59a38cf
--- /dev/null
+++ b/test/DebugInfo/missing-abstract-variable.ll
@@ -0,0 +1,191 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on
+; mips and powerpc64 (and on x86_64 at at least -O2). Presumably this is a
+; SelectionDAG issue (do mips/powerpc64 use FastISel?).
+; XFAIL: mips, powerpc64, s390x
+
+; Build from the following source with clang -O2.
+
+; The important details are that 'x's abstract definition is first built during
+; the definition of 'b', where the parameter to 'x' is constant and so 'x's 's'
+; variable is optimized away. No abstract definition DIE for 's' is constructed.
+; Then, during 'a' emission, the abstract DbgVariable for 's' is created, but
+; the abstract DIE isn't (since the abstract definition for 'b' is already
+; built). This results in 's' inlined in 'a' being emitted with its name, line,
+; file there, rather than referencing an abstract definition.
+
+; extern int t;
+;
+; void f(int);
+;
+; inline void x(bool b) {
+;   if (b) {
+;     int s = t;
+;     f(s);
+;   }
+;   f(0);
+; }
+;
+; void b() {
+;   x(false);
+; }
+;
+; void a(bool u) {
+;   x(u);
+; }
+
+; CHECK: [[ABS_X:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "x"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ABS_B:.*]]:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:       DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ABS_S:.*]]:       DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:         DW_AT_name {{.*}} "s"
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_X]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_B]]}
+; Notice 'x's local variable 's' is missing. Not necessarily a bug here,
+; since it's been optimized entirely away and it should be described in
+; abstract subprogram.
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "a"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_X]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; FIXME: This formal parameter goes missing at least at -O2 (& on
+; mips/powerpc), maybe before that. Perhaps SelectionDAG is to blame (and
+; fastisel succeeds).
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_B]]}
+
+; The two lexical blocks here are caused by the scope of the if that includes
+; the condition variable, and the scope within the if's composite statement. I'm
+; not sure we really need both of them since there's no variable declared in the
+; outer of the two
+
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:       DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:         DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:           DW_AT_abstract_origin {{.*}} {[[ABS_S]]}
+
+@t = external global i32
+
+; Function Attrs: uwtable
+define void @_Z1bv() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !24, i64 0, metadata !25), !dbg !27
+  tail call void @_Z1fi(i32 0), !dbg !28
+  ret void, !dbg !29
+}
+
+; Function Attrs: uwtable
+define void @_Z1ab(i1 zeroext %u) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i1 %u}, i64 0, metadata !13), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i1 %u}, i64 0, metadata !31), !dbg !33
+  br i1 %u, label %if.then.i, label %_Z1xb.exit, !dbg !34
+
+if.then.i:                                        ; preds = %entry
+  %0 = load i32* @t, align 4, !dbg !35, !tbaa !36
+  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !40), !dbg !35
+  tail call void @_Z1fi(i32 %0), !dbg !41
+  br label %_Z1xb.exit, !dbg !42
+
+_Z1xb.exit:                                       ; preds = %entry, %if.then.i
+  tail call void @_Z1fi(i32 0), !dbg !43
+  ret void, !dbg !44
+}
+
+declare void @_Z1fi(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/missing-abstract-variables.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"missing-abstract-variables.cc", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !8, metadata !14}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"b", metadata !"b", metadata !"_Z1bv", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1bv, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [b]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"a", metadata !"a", metadata !"_Z1ab", i32 17, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i1)* @_Z1ab, null, null, metadata !12, i32 17} ; [ DW_TAG_subprogram ] [line 17] [def] [a]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786689, metadata !8, metadata !"u", metadata !5, i32 16777233, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [u] [line 17]
+!14 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x", metadata !"x", metadata !"_Z1xb", i32 5, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !15, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [x]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!17 = metadata !{i32 786688, metadata !18, metadata !"s", metadata !5, i32 7, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 7]
+!18 = metadata !{i32 786443, metadata !1, metadata !19, i32 6, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!19 = metadata !{i32 786443, metadata !1, metadata !14, i32 6, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!20 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!22 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!23 = metadata !{metadata !"clang version 3.5.0 "}
+!24 = metadata !{i1 false}
+!25 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, metadata !26} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!26 = metadata !{i32 14, i32 0, metadata !4, null}
+!27 = metadata !{i32 5, i32 0, metadata !14, metadata !26}
+!28 = metadata !{i32 10, i32 0, metadata !14, metadata !26}
+!29 = metadata !{i32 15, i32 0, metadata !4, null}
+!30 = metadata !{i32 17, i32 0, metadata !8, null}
+!31 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!32 = metadata !{i32 18, i32 0, metadata !8, null}
+!33 = metadata !{i32 5, i32 0, metadata !14, metadata !32}
+!34 = metadata !{i32 6, i32 0, metadata !19, metadata !32}
+!35 = metadata !{i32 7, i32 0, metadata !18, metadata !32}
+!36 = metadata !{metadata !37, metadata !37, i64 0}
+!37 = metadata !{metadata !"int", metadata !38, i64 0}
+!38 = metadata !{metadata !"omnipotent char", metadata !39, i64 0}
+!39 = metadata !{metadata !"Simple C/C++ TBAA"}
+!40 = metadata !{i32 786688, metadata !18, metadata !"s", metadata !5, i32 7, metadata !20, i32 0, metadata !32} ; [ DW_TAG_auto_variable ] [s] [line 7]
+!41 = metadata !{i32 8, i32 0, metadata !18, metadata !32} ; [ DW_TAG_imported_declaration ]
+!42 = metadata !{i32 9, i32 0, metadata !18, metadata !32}
+!43 = metadata !{i32 10, i32 0, metadata !14, metadata !32}
+!44 = metadata !{i32 19, i32 0, metadata !8, null}
diff --git a/test/DebugInfo/nodebug.ll b/test/DebugInfo/nodebug.ll
new file mode 100644
index 0000000..4d86b24
--- /dev/null
+++ b/test/DebugInfo/nodebug.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Test that a nodebug function (a function not appearing in the debug info IR
+; metadata subprogram list) with DebugLocs on its IR doesn't cause crashes/does
+; the right thing.
+
+; Build with clang from the following:
+; extern int i;
+; inline __attribute__((always_inline)) void f1() {
+;   i = 3;
+; }
+;
+; __attribute__((nodebug)) void f2() {
+;   f1();
+; }
+
+; Check that there's only one DW_TAG_subprogram, nothing for the 'f2' function.
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "f1"
+; CHECK-NOT: DW_TAG_subprogram
+
+@i = external global i32
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
+entry:
+  store i32 3, i32* @i, align 4, !dbg !11
+  ret void
+}
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/nodebug.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"nodebug.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/nodebug.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 "}
+!11 = metadata !{i32 3, i32 0, metadata !4, null}
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index 7c0227d..539c890 100644
--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli_mcjit -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips, powerpc64, i686, i386, aarch64, arm
+; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
index 5dc749d..f981403 100644
--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -1,5 +1,5 @@
 root = config.root
-targets = set(root.targets_to_build.split())
+targets = root.targets
 if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
    ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets):
     config.unsupported = False
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
new file mode 100644
index 0000000..e87b449
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -0,0 +1,32 @@
+# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -relocation-model=pic -filetype=obj -o %t.o %s
+# RUN: llvm-rtdyld -triple=x86_64-apple-macosx10.9 -verify -check=%s %t.o
+# RUN: rm %t.o
+
+        .section	__TEXT,__text,regular,pure_instructions
+	.globl	foo
+	.align	4, 0x90
+foo:
+        retq
+
+	.globl	main
+	.align	4, 0x90
+main:
+# Test PC-rel branch.
+# rtdyld-check: decode_operand(insn1, 0) = foo - next_pc(insn1)
+insn1:
+        callq	foo
+
+# Test PC-rel signed.
+# rtdyld-check: decode_operand(insn2, 4) = x - next_pc(insn2)
+insn2:
+	movl	x(%rip), %eax
+	movl	$0, %eax
+	retq
+
+        .section	__DATA,__data
+	.globl	x
+	.align	2
+x:
+        .long   5
+
+.subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/X86/lit.local.cfg
new file mode 100644
index 0000000..e71f3cc
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index 7f0b69e..f6673df 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,7 +1,10 @@
-if config.root.host_arch in ['PowerPC', 'AArch64', 'ARM64', 'SystemZ']:
+if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
     config.unsupported = True
 
 # CMake and autoconf diverge in naming or host_arch
+if 'powerpc64' in config.root.target_triple:
+    config.unsupported = True
+
 if 'aarch64' in config.root.target_triple \
     or 'arm64' in config.root.target_triple:
         config.unsupported = True
diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll
index 693ef7c..73c874f 100644
--- a/test/Feature/alias2.ll
+++ b/test/Feature/alias2.ll
@@ -6,14 +6,23 @@
 @v2 = global [1 x i32] zeroinitializer
 ; CHECK: @v2 = global [1 x i32] zeroinitializer
 
-@v3 = alias i16, i32* @v1
-; CHECK: @v3 = alias i16, i32* @v1
+@v3 = global [2 x i16] zeroinitializer
+; CHECK: @v3 = global [2 x i16] zeroinitializer
 
-@v4 = alias i32, [1 x i32]* @v2
-; CHECK: @v4 = alias i32, [1 x i32]* @v2
+@a1 = alias bitcast (i32* @v1 to i16*)
+; CHECK: @a1 = alias bitcast (i32* @v1 to i16*)
 
-@v5 = alias addrspace(2) i32, i32* @v1
-; CHECK: @v5 = alias addrspace(2) i32, i32* @v1
+@a2 = alias bitcast([1 x i32]* @v2 to i32*)
+; CHECK: @a2 = alias getelementptr inbounds ([1 x i32]* @v2, i32 0, i32 0)
 
-@v6 = alias i16, i32* @v1
-; CHECK: @v6 = alias i16, i32* @v1
+@a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+; CHECK: @a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+
+@a4 = alias bitcast (i32* @v1 to i16*)
+; CHECK: @a4 = alias bitcast (i32* @v1 to i16*)
+
+@a5 = thread_local(localdynamic) alias i32* @v1
+; CHECK: @a5 = thread_local(localdynamic) alias i32* @v1
+
+@a6 = alias getelementptr ([2 x i16]* @v3, i32 1, i32 1)
+; CHECK: @a6 = alias getelementptr ([2 x i16]* @v3, i32 1, i32 1)
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index b2ce82a..ad1d1b0 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -7,6 +7,14 @@
 @bar = global i32 0
 @foo1 = alias i32* @bar
 @foo2 = alias i32* @bar
+@foo3 = alias i32* @foo2
+@foo4 = unnamed_addr alias i32* @foo2
+
+; Make sure the verifier does not complain about references to a global
+; declaration from an initializer.
+@decl = external global i32
+@ptr = global i32* @decl
+@ptr_a = alias i32** @ptr
 
 %FunTy = type i32()
 
@@ -14,10 +22,11 @@ define i32 @foo_f() {
   ret i32 0
 }
 @bar_f = alias weak_odr %FunTy* @foo_f
+@bar_ff = alias i32()* @bar_f
 
 @bar_i = alias internal i32* @bar
 
-@A = alias i64, i32* @bar
+@A = alias bitcast (i32* @bar to i64*)
 
 define i32 @test() {
 entry:
diff --git a/test/Feature/comdat.ll b/test/Feature/comdat.ll
new file mode 100644
index 0000000..05fb87c
--- /dev/null
+++ b/test/Feature/comdat.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+$f = comdat any
+; CHECK: $f = comdat any
+
+$f2 = comdat any
+; CHECK-NOT: f2
+
+@v = global i32 0, comdat $f
+; CHECK: @v = global i32 0, comdat $f
+
+@a = alias i32* @v
+; CHECK: @a = alias i32* @v{{$}}
+
+define void @f() comdat $f {
+  ret void
+}
+; CHECK: define void @f() comdat $f
diff --git a/test/Feature/globalvars.ll b/test/Feature/globalvars.ll
index dad1cf3..84b4bdf 100644
--- a/test/Feature/globalvars.ll
+++ b/test/Feature/globalvars.ll
@@ -16,3 +16,5 @@ define i32 @foo(i32 %blah) {
         ret i32 %blah
 }
 
+hidden dllexport global i32 42
+dllexport global i32 42
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll b/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll
index b83a7e9..0667a14 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll
@@ -4,16 +4,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: mov_no_attr
-; CHECK-NOT: callq __sanitizer_sanitize_load8@PLT
-; CHECK-NOT: callq __sanitizer_sanitize_store8@PLT
+; CHECK-NOT: callq __asan_report_load@PLT
+; CHECK-NOT: callq __asan_report_store@PLT
 define void @mov_no_attr(i64* %dst, i64* %src) {
   tail call void asm sideeffect "movq ($1), %rax  \0A\09movq %rax, ($0)  \0A\09", "r,r,~{memory},~{rax},~{dirflag},~{fpsr},~{flags}"(i64* %dst, i64* %src)
   ret void
 }
 
 ; CHECK-LABEL: mov_sanitize
-; CHECK: callq __sanitizer_sanitize_load8@PLT
-; CHECK: callq __sanitizer_sanitize_store8@PLT
+; CHECK: callq __asan_report_load8@PLT
+; CHECK: callq __asan_report_store8@PLT
 define void @mov_sanitize(i64* %dst, i64* %src) sanitize_address {
   tail call void asm sideeffect "movq ($1), %rax  \0A\09movq %rax, ($0)  \0A\09", "r,r,~{memory},~{rax},~{dirflag},~{fpsr},~{flags}"(i64* %dst, i64* %src)
   ret void
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
index 030af7e..ad5e02e 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
@@ -5,18 +5,35 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: mov1b
 ; CHECK: leaq -128(%rsp), %rsp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %rcx
 ; CHECK-NEXT: pushq %rdi
+; CHECK-NEXT: pushfq
 ; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_load1@PLT
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq $3, %rax
+; CHECK-NEXT: movb 2147450880(%rax), %al
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je [[A:.*]]
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: andl $7, %ecx
+; CHECK-NEXT: movsbl %al, %eax
+; CHECK-NEXT: cmpl %eax, %ecx
+; CHECK-NEXT: jl {{.*}}
+; CHECK-NEXT: cld
+; CHECK-NEXT: emms
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: callq __asan_report_load1@PLT
+; CHECK-NEXT: [[A]]:
+; CHECK-NEXT: popfq
 ; CHECK-NEXT: popq %rdi
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_store1@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: callq __asan_report_store1@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: movb {{.*}}, {{.*}}
 define void @mov1b(i8* %dst, i8* %src) #0 {
@@ -27,18 +44,14 @@ entry:
 
 ; CHECK-LABEL: mov2b
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_load2@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: leal 1(%ecx), %ecx
+; CHECK: callq __asan_report_load2@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_store2@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: leal 1(%ecx), %ecx
+; CHECK: callq __asan_report_store2@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: movw {{.*}}, {{.*}}
 define void @mov2b(i16* %dst, i16* %src) #0 {
@@ -49,18 +62,14 @@ entry:
 
 ; CHECK-LABEL: mov4b
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_load4@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: addl $3, %ecx
+; CHECK: callq __asan_report_load4@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_store4@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: addl $3, %ecx
+; CHECK: callq __asan_report_store4@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: movl {{.*}}, {{.*}}
 define void @mov4b(i32* %dst, i32* %src) #0 {
@@ -71,17 +80,35 @@ entry:
 
 ; CHECK-LABEL: mov8b
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
-; CHECK-NEXT: popq %rdi
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushfq
+; CHECK-NEXT: leaq {{.*}}, %rax
+; CHECK-NEXT: shrq $3, %rax
+; CHECK-NEXT: cmpb $0, 2147450880(%rax)
+; CHECK-NEXT: je [[A:.*]]
+; CHECK-NEXT: cld
+; CHECK-NEXT: emms
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: callq __asan_report_load8@PLT
+; CHECK-NEXT: [[A]]:
+; CHECK-NEXT: popfq
+; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
-; CHECK-NEXT: popq %rdi
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushfq
+; CHECK-NEXT: leaq {{.*}}, %rax
+; CHECK-NEXT: shrq $3, %rax
+; CHECK-NEXT: cmpb $0, 2147450880(%rax)
+; CHECK-NEXT: je [[A:.*]]
+; CHECK-NEXT: cld
+; CHECK-NEXT: emms
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: callq __asan_report_store8@PLT
+; CHECK-NEXT: [[A]]:
+; CHECK-NEXT: popfq
+; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movq {{.*}}, {{.*}}
@@ -93,18 +120,14 @@ entry:
 
 ; CHECK-LABEL: mov16b
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_load16@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: cmpw $0, 2147450880(%rax)
+; CHECK: callq __asan_report_load16@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
-; CHECK-NEXT: pushq %rdi
-; CHECK-NEXT: leaq {{.*}}, %rdi
-; CHECK-NEXT: callq __sanitizer_sanitize_store16@PLT
-; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: leaq 128(%rsp), %rsp
+; CHECK: cmpw $0, 2147450880(%rax)
+; CHECK: callq __asan_report_store16@PLT
+; CHECK: leaq 128(%rsp), %rsp
 
 ; CHECK: movaps {{.*}}, {{.*}}
 define void @mov16b(<2 x i64>* %dst, <2 x i64>* %src) #0 {
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
index df217c0..74a788c 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
@@ -7,20 +7,14 @@
 # CHECK-LABEL: mov1b:
 #
 # CHECK: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rsi), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_load1@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_load1@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movb (%rsi), %al
 #
 # CHECK-NEXT: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rdi), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_store1@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_store1@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movb %al, (%rdi)
 mov1b:                                  # @mov1b
@@ -42,20 +36,14 @@ mov1b:                                  # @mov1b
 # CHECK-LABEL: mov16b:
 #
 # CHECK: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rsi), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_load16@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_load16@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movaps (%rsi), %xmm0
 #
 # CHECK-NEXT: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rdi), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_store16@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_store16@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movaps %xmm0, (%rdi)
 mov16b:                                 # @mov16b
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
index cc05527..e3a1541 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
@@ -5,8 +5,8 @@
 	.align	16, 0x90
 	.type	mov1b,@function
 # CHECK-LABEL: mov1b
-# CHECK-NOT: callq __sanitizer_sanitize_load1@PLT
-# CHECK-NOT: callq __sanitizer_sanitize_store1@PLT
+# CHECK-NOT: callq __asan_report_load1@PLT
+# CHECK-NOT: callq __asan_report_store1@PLT
 mov1b:                                  # @mov1b
 	.cfi_startproc
 # BB#0:
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
index 8a6a8d5..ca3c54c 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
@@ -7,38 +7,26 @@
 # CHECK-LABEL: swap:
 #
 # CHECK: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rcx), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_load8@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movq (%rcx), %rax
 #
 # CHECK-NEXT: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rdx), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_load8@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movq (%rdx), %rbx
 #
-# CHECK: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rcx), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: callq __asan_report_store8@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movq %rbx, (%rcx)
 #
 # CHECK-NEXT: leaq -128(%rsp), %rsp
-# CHECK-NEXT: pushq %rdi
-# CHECK-NEXT: leaq (%rdx), %rdi
-# CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
-# CHECK-NEXT: popq %rdi
-# CHECK-NEXT: leaq 128(%rsp), %rsp
+# CHECK: callq __asan_report_store8@PLT
+# CHECK: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movq %rax, (%rdx)
 swap:                                   # @swap
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Instrumentation/AddressSanitizer/coverage-dbg.ll b/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
index 77d7286..3f7998d 100644
--- a/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
+++ b/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
@@ -2,32 +2,66 @@
 
 ; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s
 
+; C++ source:
+; 1: struct A {
+; 2:  int f();
+; 3:  int x;
+; 4: };
+; 5:
+; 6: int A::f() {
+; 7:    return x;
+; 8: }
+; clang++ ../1.cc -O3 -g -S -emit-llvm  -fno-strict-aliasing
+; and add sanitize_address to @_ZN1A1fEv
+
+; Test that __sanitizer_cov call has !dbg pointing to the opening { of A::f().
+; CHECK: call void @__sanitizer_cov(), !dbg [[A:!.*]]
+; CHECK: [[A]] = metadata !{i32 6, i32 0, metadata !{{.*}}, null}
+
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; Function Attrs: nounwind readnone uwtable
-define void @_Z1fv() #0 {
+%struct.A = type { i32 }
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 {
 entry:
-  ret void, !dbg !11
+  tail call void @llvm.dbg.value(metadata !{%struct.A* %this}, i64 0, metadata !15), !dbg !20
+  %x = getelementptr inbounds %struct.A* %this, i64 0, i32 0, !dbg !21
+  %0 = load i32* %x, align 4, !dbg !21
+  ret i32 %0, !dbg !21
 }
 
-; CHECK: call void @__sanitizer_cov(), !dbg !
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
 
-attributes #0 = { sanitize_address nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_address nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (208682)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp//tmp/1.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"/tmp/1.cc", metadata !"/tmp"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (210251)", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/../1.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"../1.cc", metadata !"/code/llvm/build0"}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"_Z1fv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1fv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//tmp/1.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 (208682)"}
-!11 = metadata !{i32 2, i32 0, metadata !4, null}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !8}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1A", metadata !"x", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [x] [line 3, size 32, align 32, offset 0] [from int]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"f", metadata !"f", metadata !"_ZN1A1fEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [f]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{metadata !7, metadata !11}
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"f", metadata !"f", metadata !"_ZN1A1fEv", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.A*)* @_ZN1A1fEv, null, metadata !8, metadata !14, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786689, metadata !13, metadata !"this", null, i32 16777216, metadata !16, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!18 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!19 = metadata !{metadata !"clang version 3.5.0 (210251)"}
+!20 = metadata !{i32 0, i32 0, metadata !13, null}
+!21 = metadata !{i32 7, i32 0, metadata !13, null}
diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll
index 5bc5103..79bb5c1 100644
--- a/test/Instrumentation/AddressSanitizer/coverage.ll
+++ b/test/Instrumentation/AddressSanitizer/coverage.ll
@@ -1,7 +1,20 @@
+; RUN: opt < %s -asan -asan-module -asan-coverage=0 -S | FileCheck %s --check-prefix=CHECK0
 ; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s --check-prefix=CHECK1
 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -S | FileCheck %s --check-prefix=CHECK2
 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK2
 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=1  -S | FileCheck %s --check-prefix=CHECK1
+
+; RUN: opt < %s -asan -asan-module -asan-coverage=0 -asan-globals=0 -S | \
+; RUN:     FileCheck %s --check-prefix=CHECK0
+; RUN: opt < %s -asan -asan-module -asan-coverage=1 -asan-globals=0 -S | \
+; RUN:     FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-globals=0 -S | \
+; RUN:     FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 \
+; RUN:     -asan-globals=0 -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=1 \
+; RUN:     -asan-globals=0 -S | FileCheck %s --check-prefix=CHECK1
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 define void @foo(i32* %a) sanitize_address {
@@ -17,6 +30,9 @@ entry:
   ret void
 }
 
+; CHECK0-NOT: call void @__sanitizer_cov(
+; CHECK0-NOT: call void @__sanitizer_cov_module_init(
+
 ; CHECK1-LABEL: define void @foo
 ; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
 ; CHECK1: %1 = icmp eq i8 0, %0
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index daf2957..336b98b 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck %s
 
 ; Checks that llvm.dbg.declare instructions are updated 
 ; accordingly as we merge allocas.
@@ -47,8 +47,9 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 ; Verify that debug descriptors for argument and local variable will be replaced
 ; with descriptors that end with OpDeref (encoded as 2).
-;   CHECK: ![[ARG_ID]] = metadata {{.*}} i64 2} ; [ DW_TAG_arg_variable ] [p] [line 1]
-;   CHECK: ![[VAR_ID]] = metadata {{.*}} i64 2} ; [ DW_TAG_auto_variable ] [r] [line 2]
+;   CHECK: ![[ARG_ID]] = {{.*}}metadata ![[OPDEREF:[0-9]+]]} ; [ DW_TAG_arg_variable ] [p] [line 1]
+;   CHECK: ![[OPDEREF]] = metadata !{i64 2}
+;   CHECK: ![[VAR_ID]] = {{.*}}metadata ![[OPDEREF]]} ; [ DW_TAG_auto_variable ] [r] [line 2]
 ; Verify that there are no more variable descriptors.
 ;   CHECK-NOT: DW_TAG_arg_variable
 ;   CHECK-NOT: DW_TAG_auto_variable
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
index fbfc096..d02f12a 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 @.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
-@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00",
+@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
 
 ; CHECK-NOT: {{asan_gen.*str_noinst}}
 ; CHECK: {{asan_gen.*str_inst}}
diff --git a/test/Instrumentation/AddressSanitizer/global_metadata.ll b/test/Instrumentation/AddressSanitizer/global_metadata.ll
new file mode 100644
index 0000000..9641c3e
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Globals:
+@global = global i32 0, align 4
+@dyn_init_global = global i32 0, align 4
+@blacklisted_global = global i32 0, align 4
+@_ZZ4funcvE10static_var = internal global i32 0, align 4
+@.str = private unnamed_addr constant [14 x i8] c"Hello, world!\00", align 1
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }]
+
+; Sanitizer location descriptors:
+@.str1 = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
+@.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
+@.asan_loc_descr1 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 7, i32 5 }
+@.asan_loc_descr2 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 12, i32 14 }
+@.asan_loc_descr4 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 14, i32 25 }
+
+; Check that globals were instrumented, but sanitizer location descriptors weren't:
+; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32
+; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32
+; CHECK: @.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
+
+; Check that location decriptors were passed into __asan_register_globals:
+; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* @.asan_loc_descr to i64)
+
+; Function Attrs: nounwind sanitize_address
+define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+entry:
+  %0 = load i32* @global, align 4
+  store i32 %0, i32* @dyn_init_global, align 4
+  ret void
+}
+
+; Function Attrs: nounwind sanitize_address
+define void @_Z4funcv() #1 {
+entry:
+  %literal = alloca i8*, align 8
+  store i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0), i8** %literal, align 8
+  ret void
+}
+
+; Function Attrs: nounwind sanitize_address
+define internal void @_GLOBAL__sub_I_asan_globals.cpp() #0 section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+attributes #0 = { nounwind sanitize_address }
+attributes #1 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.asan.globals = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = metadata !{i32* @global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr, i1 false, i1 false}
+!1 = metadata !{i32* @dyn_init_global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr1, i1 true, i1 false}
+!2 = metadata !{i32* @blacklisted_global, null, i1 false, i1 true}
+!3 = metadata !{i32* @_ZZ4funcvE10static_var, { [22 x i8]*, i32, i32 }* @.asan_loc_descr2, i1 false, i1 false}
+!4 = metadata !{[14 x i8]* @.str, { [22 x i8]*, i32, i32 }* @.asan_loc_descr4, i1 false, i1 false}
+!5 = metadata !{metadata !"clang version 3.5.0 (211282)"}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 7945e81..816ab29 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -68,8 +68,8 @@ entry:
 }
 
 
-!llvm.asan.dynamically_initialized_globals = !{!0}
-!0 = metadata !{[10 x i32]* @GlobDy}
+!llvm.asan.globals = !{!0}
+!0 = metadata !{[10 x i32]* @GlobDy, null, i1 true, i1 false}
 
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index 1d00cfa..83ff53f 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -7,9 +7,11 @@ target triple = "x86_64-unknown-linux-gnu"
 @YYY = global i32 0, align 4           ; W/o dynamic initializer.
 ; Clang will emit the following metadata identifying @xxx as dynamically
 ; initialized.
-!0 = metadata !{i32* @xxx}
-!1 = metadata !{i32* @XXX}
-!llvm.asan.dynamically_initialized_globals = !{!0, !1}
+!0 = metadata !{i32* @xxx, null, i1 true, i1 false}
+!1 = metadata !{i32* @XXX, null, i1 true, i1 false}
+!2 = metadata !{i32* @yyy, null, i1 false, i1 false}
+!3 = metadata !{i32* @YYY, null, i1 false, i1 false}
+!llvm.asan.globals = !{!0, !1, !2, !3}
 
 define i32 @initializer() uwtable {
 entry:
@@ -23,6 +25,8 @@ entry:
   ret void
 }
 
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
 define internal void @_GLOBAL__I_a() sanitize_address section ".text.startup" {
 entry:
   call void @__cxx_global_var_init()
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
index 1961997..175a07d 100644
--- a/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -1,5 +1,5 @@
 ; Test hanlding of llvm.lifetime intrinsics.
-; RUN: opt < %s -asan -asan-module -asan-check-lifetime -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-check-lifetime -asan-use-after-return=0 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/AddressSanitizer/stack-poisoning.ll b/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
index 6919e53..ace12d0 100644
--- a/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
+++ b/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -asan -asan-module -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s
-; RUN: opt < %s -asan -asan-module -S | FileCheck --check-prefix=CHECK-PLAIN %s
+; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s
 target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Instrumentation/BoundsChecking/phi.ll b/test/Instrumentation/BoundsChecking/phi.ll
index 0f9d1b0..25a5ed1 100644
--- a/test/Instrumentation/BoundsChecking/phi.ll
+++ b/test/Instrumentation/BoundsChecking/phi.ll
@@ -52,7 +52,7 @@ fn.exit:
 }
 
 
-@global_as1 = private addrspace(1) unnamed_addr constant [10 x i8] c"ola\00mundo\00", align 1
+@global_as1 = private unnamed_addr addrspace(1) constant [10 x i8] c"ola\00mundo\00", align 1
 
 define void @f1_as1(i8 addrspace(1)* nocapture %c) {
 ; CHECK: @f1_as1
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
index 1a56460..f3c36b1 100644
--- a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -8,7 +8,15 @@ module asm ".symver f1,f@@version1"
 ; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1"
 @f2 = alias void ()* @f1
 
+; CHECK: @"dfs$g2" = alias {{.*}} @"dfs$g1"
+@g2 = alias bitcast (void (i8*)* @g1 to void (i16*)*)
+
 ; CHECK: define void @"dfs$f1"
 define void @f1() {
   ret void
 }
+
+; CHECK: define void @"dfs$g1"
+define void @g1(i8*) {
+  ret void
+}
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
index 98697d7..c8f3b88 100644
--- a/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -37,12 +37,13 @@ entry:
 
 define i32 @Cmpxchg(i32* %p, i32 %a, i32 %b) sanitize_memory {
 entry:
-  %0 = cmpxchg i32* %p, i32 %a, i32 %b seq_cst seq_cst
+  %pair = cmpxchg i32* %p, i32 %a, i32 %b seq_cst seq_cst
+  %0 = extractvalue { i32, i1 } %pair, 0
   ret i32 %0
 }
 
 ; CHECK: @Cmpxchg
-; CHECK: store i32 0,
+; CHECK: store { i32, i1 } zeroinitializer,
 ; CHECK: icmp
 ; CHECK: br
 ; CHECK: @__msan_warning
@@ -55,12 +56,13 @@ entry:
 
 define i32 @CmpxchgMonotonic(i32* %p, i32 %a, i32 %b) sanitize_memory {
 entry:
-  %0 = cmpxchg i32* %p, i32 %a, i32 %b monotonic monotonic
+  %pair = cmpxchg i32* %p, i32 %a, i32 %b monotonic monotonic
+  %0 = extractvalue { i32, i1 } %pair, 0
   ret i32 %0
 }
 
 ; CHECK: @CmpxchgMonotonic
-; CHECK: store i32 0,
+; CHECK: store { i32, i1 } zeroinitializer,
 ; CHECK: icmp
 ; CHECK: br
 ; CHECK: @__msan_warning
diff --git a/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
index 34988ef..beb3c5f 100644
--- a/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
+++ b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
@@ -1,7 +1,10 @@
 ; Test -msan-instrumentation-with-call-threshold
+; Test that in with-calls mode there are no calls to __msan_chain_origin - they
+; are done from __msan_maybe_store_origin_*.
 
 ; RUN: opt < %s -msan -msan-check-access-address=0 -msan-instrumentation-with-call-threshold=0 -S | FileCheck %s
 ; RUN: opt < %s -msan -msan-check-access-address=0 -msan-instrumentation-with-call-threshold=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-instrumentation-with-call-threshold=0 -msan-track-origins=2 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -41,7 +44,10 @@ entry:
 ; CHECK: load {{.*}} @__msan_param_tls
 ; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
 ; CHECK: store
+; CHECK-ORIGINS-NOT: __msan_chain_origin
 ; CHECK-ORIGINS: bitcast i64* {{.*}} to i8*
+; CHECK-ORIGINS-NOT: __msan_chain_origin
 ; CHECK-ORIGINS: call void @__msan_maybe_store_origin_8(
+; CHECK-ORIGINS-NOT: __msan_chain_origin
 ; CHECK: store i64
 ; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/missing_origin.ll b/test/Instrumentation/MemorySanitizer/missing_origin.ll
new file mode 100644
index 0000000..673e853
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/missing_origin.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test that result origin is directy propagated from the argument,
+; and is not affected by all the literal undef operands.
+; https://code.google.com/p/memory-sanitizer/issues/detail?id=56
+
+define <4 x i32> @Shuffle(<4 x i32> %x) nounwind uwtable sanitize_memory {
+entry:
+  %y = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  ret <4 x i32> %y
+}
+
+; CHECK-LABEL: @Shuffle(
+; CHECK: [[A:%.*]] = load i32* {{.*}}@__msan_param_origin_tls,
+; CHECK: store i32 [[A]], i32* @__msan_retval_origin_tls
+; CHECK: ret <4 x i32>
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 6b71310..51693cd 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -651,7 +651,7 @@ define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
 declare void @llvm.va_start(i8*) nounwind
 
 ; Function Attrs: nounwind uwtable
-define void @VAStart(i32 %x, ...) {
+define void @VAStart(i32 %x, ...) sanitize_memory {
 entry:
   %x.addr = alloca i32, align 4
   %va = alloca [1 x %struct.__va_list_tag], align 16
@@ -683,7 +683,7 @@ entry:
 ; CHECK: ret void
 
 
-; Test that checks are omitted but shadow propagation is kept if
+; Test that checks are omitted and returned value is always initialized if
 ; sanitize_memory attribute is missing.
 
 define i32 @NoSanitizeMemory(i32 %x) uwtable {
@@ -703,9 +703,7 @@ declare void @bar()
 
 ; CHECK: @NoSanitizeMemory
 ; CHECK-NOT: @__msan_warning
-; CHECK: load i32* {{.*}} @__msan_param_tls
-; CHECK-NOT: @__msan_warning
-; CHECK: store {{.*}} @__msan_retval_tls
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK-NOT: @__msan_warning
 ; CHECK: ret i32
 
@@ -745,6 +743,29 @@ declare i32 @NoSanitizeMemoryUndefHelper(i32 %x)
 ; CHECK: ret i32
 
 
+; Test PHINode instrumentation in blacklisted functions
+
+define i32 @NoSanitizeMemoryPHI(i32 %x) {
+entry:
+  %tobool = icmp ne i32 %x, 0
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ undef, %cond.true ], [ undef, %cond.false ]
+  ret i32 %cond
+}
+
+; CHECK: [[A:%.*]] = phi i32 [ undef, %cond.true ], [ undef, %cond.false ]
+; CHECK: store i32 0, i32* bitcast {{.*}} @__msan_retval_tls
+; CHECK: ret i32 [[A]]
+
+
 ; Test argument shadow alignment
 
 define <2 x i64> @ArgumentShadowAlignment(i64 %a, <2 x i64> %b) sanitize_memory {
@@ -825,3 +846,17 @@ entry:
 ; CHECK: store i64 16, i64* @__msan_va_arg_overflow_size_tls
 ; CHECK: call void (i32, ...)* @VAArgStructFn
 ; CHECK: ret void
+
+declare i32 @InnerTailCall(i32 %a)
+
+define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
+  %b = tail call i32 @InnerTailCall(i32 %a)
+  ret void
+}
+
+; We used to strip off the 'tail' modifier, but now that we unpoison return slot
+; shadow before the call, we don't need to anymore.
+
+; CHECK-LABEL: define void @MismatchedReturnTypeTailCall
+; CHECK: tail call i32 @InnerTailCall
+; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
new file mode 100644
index 0000000..e068f69
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check instrumentation mul when one of the operands is a constant.
+
+define i64 @MulConst(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 42949672960000
+  ret i64 %y
+}
+
+; 42949672960000 = 2**32 * 10000
+; 36 trailing zero bits
+; 68719476736 = 2**36
+
+; CHECK-LABEL: @MulConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 68719476736
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulZero(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 0
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulZero(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 0{{$}}
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -16
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg2(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -48
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg2(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulOdd(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 12345
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulOdd(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 1
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulLarge(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -9223372036854775808
+  ret i64 %y
+}
+
+; -9223372036854775808 = 0x7000000000000000
+
+; CHECK-LABEL: @MulLarge(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], -9223372036854775808
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+define <4 x i32> @MulVectorConst(<4 x i32> %x) sanitize_memory {
+entry:
+  %y = mul <4 x i32> %x, <i32 3072, i32 0, i32 -16, i32 -48>
+  ret <4 x i32> %y
+}
+
+; CHECK-LABEL: @MulVectorConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul <4 x i32> [[A]], <i32 1024, i32 0, i32 16, i32 16>
+; CHECK: store <4 x i32> [[B]], <4 x i32>* {{.*}} @__msan_retval_tls
diff --git a/test/Instrumentation/MemorySanitizer/store-origin.ll b/test/Instrumentation/MemorySanitizer/store-origin.ll
index 024a10a..0bd9777 100644
--- a/test/Instrumentation/MemorySanitizer/store-origin.ll
+++ b/test/Instrumentation/MemorySanitizer/store-origin.ll
@@ -20,7 +20,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sanitize_memory "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/test/Instrumentation/MemorySanitizer/vector_arith.ll b/test/Instrumentation/MemorySanitizer/vector_arith.ll
new file mode 100644
index 0000000..6541a1c
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_arith.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory {
+entry:
+  %c = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) nounwind
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: @Test_sse2_pmadd_wd(
+; CHECK: or <8 x i16>
+; CHECK: bitcast <8 x i16> {{.*}} to <4 x i32>
+; CHECK: icmp ne <4 x i32> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i32>
+; CHECK: ret <4 x i32>
+
+
+define x86_mmx @Test_ssse3_pmadd_ub_sw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_ssse3_pmadd_ub_sw(
+; CHECK: or i64
+; CHECK: bitcast i64 {{.*}} to <4 x i16>
+; CHECK: icmp ne <4 x i16> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i16>
+; CHECK: bitcast <4 x i16> {{.*}} to i64
+; CHECK: ret x86_mmx
+
+
+define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_memory {
+  %c = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a, <16 x i8> %b)
+  ret <2 x i64> %c
+}
+
+; CHECK-LABEL: @Test_x86_sse2_psad_bw(
+; CHECK: or <16 x i8> {{.*}}, {{.*}}
+; CHECK: bitcast <16 x i8> {{.*}} to <2 x i64>
+; CHECK: icmp ne <2 x i64> {{.*}}, zeroinitializer
+; CHECK: sext <2 x i1> {{.*}} to <2 x i64>
+; CHECK: lshr <2 x i64> {{.*}}, <i64 48, i64 48>
+; CHECK: ret <2 x i64>
+
+
+define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_x86_mmx_psad_bw(
+; CHECK: or i64
+; CHECK: icmp ne i64
+; CHECK: sext i1 {{.*}} to i64
+; CHECK: lshr i64 {{.*}}, 48
+; CHECK: ret x86_mmx
diff --git a/test/Instrumentation/MemorySanitizer/vector_pack.ll b/test/Instrumentation/MemorySanitizer/vector_pack.ll
new file mode 100644
index 0000000..31c0c62
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_pack.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define <8 x i16> @Test_packssdw_128(<4 x i32> %a, <4 x i32> %b) sanitize_memory {
+entry:
+  %c = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) nounwind
+  ret <8 x i16> %c
+}
+
+; CHECK-LABEL: @Test_packssdw_128(
+; CHECK-DAG: icmp ne <4 x i32> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <4 x i1> {{.*}} to <4 x i32>
+; CHECK-DAG: icmp ne <4 x i32> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <4 x i1> {{.*}} to <4 x i32>
+; CHECK-DAG: call <8 x i16> @llvm.x86.sse2.packssdw.128(
+; CHECK-DAG: call <8 x i16> @llvm.x86.sse2.packssdw.128(
+; CHECK: ret <8 x i16>
+
+
+define <32 x i8> @Test_avx_packuswb(<16 x i16> %a, <16 x i16> %b) sanitize_memory {
+entry:
+  %c = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) nounwind
+  ret <32 x i8> %c
+}
+
+; CHECK-LABEL: @Test_avx_packuswb(
+; CHECK-DAG: icmp ne <16 x i16> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <16 x i1> {{.*}} to <16 x i16>
+; CHECK-DAG: icmp ne <16 x i16> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <16 x i1> {{.*}} to <16 x i16>
+; CHECK-DAG: call <32 x i8> @llvm.x86.avx2.packsswb(
+; CHECK-DAG: call <32 x i8> @llvm.x86.avx2.packuswb(
+; CHECK: ret <32 x i8>
+
+
+define x86_mmx @Test_mmx_packuswb(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_mmx_packuswb(
+; CHECK-DAG: bitcast i64 {{.*}} to <4 x i16>
+; CHECK-DAG: bitcast i64 {{.*}} to <4 x i16>
+; CHECK-DAG: icmp ne <4 x i16> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <4 x i1> {{.*}} to <4 x i16>
+; CHECK-DAG: icmp ne <4 x i16> {{.*}}, zeroinitializer
+; CHECK-DAG: sext <4 x i1> {{.*}} to <4 x i16>
+; CHECK-DAG: bitcast <4 x i16> {{.*}} to x86_mmx
+; CHECK-DAG: bitcast <4 x i16> {{.*}} to x86_mmx
+; CHECK-DAG: call x86_mmx @llvm.x86.mmx.packsswb({{.*}}
+; CHECK-DAG: bitcast x86_mmx {{.*}} to i64
+; CHECK-DAG: call x86_mmx @llvm.x86.mmx.packuswb({{.*}}
+; CHECK: ret x86_mmx
diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll
index d32f51b..91e4bd5 100644
--- a/test/Instrumentation/MemorySanitizer/vector_shift.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll
@@ -13,7 +13,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32)
 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32)
 
-define i64 @test_mmx(i64 %x.coerce, i64 %y.coerce) {
+define i64 @test_mmx(i64 %x.coerce, i64 %y.coerce) sanitize_memory {
 entry:
   %0 = bitcast i64 %x.coerce to <2 x i32>
   %1 = bitcast <2 x i32> %0 to x86_mmx
@@ -35,7 +35,7 @@ entry:
 ; CHECK: ret i64
 
 
-define <8 x i16> @test_sse2_scalar(<8 x i16> %x, i32 %y) {
+define <8 x i16> @test_sse2_scalar(<8 x i16> %x, i32 %y) sanitize_memory {
 entry:
   %0 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %x, i32 %y)
   ret <8 x i16> %0
@@ -51,7 +51,7 @@ entry:
 ; CHECK: ret <8 x i16>
 
 
-define <8 x i16> @test_sse2(<8 x i16> %x, <8 x i16> %y) {
+define <8 x i16> @test_sse2(<8 x i16> %x, <8 x i16> %y) sanitize_memory {
 entry:
   %0 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %x, <8 x i16> %y)
   ret <8 x i16> %0
@@ -71,7 +71,7 @@ entry:
 
 ; Test variable shift (i.e. vector by vector).
 
-define <4 x i32> @test_avx2(<4 x i32> %x, <4 x i32> %y) {
+define <4 x i32> @test_avx2(<4 x i32> %x, <4 x i32> %y) sanitize_memory {
 entry:
   %0 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %0
@@ -85,7 +85,7 @@ entry:
 ; CHECK: = tail call <4 x i32> @llvm.x86.avx2.psllv.d(
 ; CHECK: ret <4 x i32>
 
-define <8 x i32> @test_avx2_256(<8 x i32> %x, <8 x i32> %y) {
+define <8 x i32> @test_avx2_256(<8 x i32> %x, <8 x i32> %y) sanitize_memory {
 entry:
   %0 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %x, <8 x i32> %y)
   ret <8 x i32> %0
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index d449a97..dc6e43e 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -27,7 +27,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
 
 ; Check that tsan converts mem intrinsics back to function calls.
 
-define void @MemCpyTest(i8* nocapture %x, i8* nocapture %y) {
+define void @MemCpyTest(i8* nocapture %x, i8* nocapture %y) sanitize_thread {
 entry:
     tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
     ret void
@@ -36,7 +36,7 @@ entry:
 ; CHECK: ret void
 }
 
-define void @MemMoveTest(i8* nocapture %x, i8* nocapture %y) {
+define void @MemMoveTest(i8* nocapture %x, i8* nocapture %y) sanitize_thread {
 entry:
     tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
     ret void
@@ -45,7 +45,7 @@ entry:
 ; CHECK: ret void
 }
 
-define void @MemSetTest(i8* nocapture %x)  {
+define void @MemSetTest(i8* nocapture %x) sanitize_thread {
 entry:
     tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
     ret void
diff --git a/test/LTO/jump-table-type.ll b/test/LTO/jump-table-type.ll
new file mode 100644
index 0000000..a39d3e9
--- /dev/null
+++ b/test/LTO/jump-table-type.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-lto -o %t2 %t1 -jump-table-type=arity
+; RUN: llvm-nm %t2 | FileCheck %s
+
+; CHECK: T __llvm_jump_instr_table_0_1
+; CHECK: T __llvm_jump_instr_table_1_1
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @g(i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @main() {
+  ret i32 0
+}
+
+@llvm.used = appending global [2 x i8*]  [i8* bitcast (i32(i32)* @g to i8*),
+                                          i8* bitcast (i32()* @f to i8*)]
diff --git a/test/LTO/lit.local.cfg b/test/LTO/lit.local.cfg
index 6df0e03..afde89b 100644
--- a/test/LTO/lit.local.cfg
+++ b/test/LTO/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
   config.unsupported = True
diff --git a/test/LTO/no-undefined-puts-when-implemented.ll b/test/LTO/no-undefined-puts-when-implemented.ll
index 18f5d21..29db8a6 100644
--- a/test/LTO/no-undefined-puts-when-implemented.ll
+++ b/test/LTO/no-undefined-puts-when-implemented.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as <%s >%t1
 ; RUN: llvm-lto -exported-symbol=_uses_puts -exported-symbol=_uses_printf -o - %t1 | \
-; RUN: llvm-nm | \
+; RUN: llvm-nm - | \
 ; RUN: FileCheck %s
 ; rdar://problem/16165191
 ; runtime library implementations should not be renamed
diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll
index 362d309..9e538f5 100644
--- a/test/Linker/Inputs/PR8300.b.ll
+++ b/test/Linker/Inputs/PR8300.b.ll
@@ -1,7 +1,7 @@
 %foo = type { [8 x i8] }
 %bar = type { [9 x i8] }
 
-@zed = alias void (%foo*), void (%bar*)* @xyz
+@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
 
 define void @xyz(%bar* %this) {
 entry:
diff --git a/test/Linker/Inputs/alias.ll b/test/Linker/Inputs/alias.ll
index b869cae..f379476 100644
--- a/test/Linker/Inputs/alias.ll
+++ b/test/Linker/Inputs/alias.ll
@@ -1,3 +1,3 @@
 @zed = global i32 42
 @foo = alias i32* @zed
-@foo2 = alias i16, i32* @zed
+@foo2 = alias bitcast (i32* @zed to i16*)
diff --git a/test/Linker/Inputs/comdat.ll b/test/Linker/Inputs/comdat.ll
new file mode 100644
index 0000000..fdcca49
--- /dev/null
+++ b/test/Linker/Inputs/comdat.ll
@@ -0,0 +1,20 @@
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat largest
+@foo = global i64 43, comdat $foo
+
+define i32 @bar() comdat $foo {
+  ret i32 43
+}
+
+$qux = comdat largest
+@qux = global i32 13, comdat $qux
+@in_unselected_group = global i32 13, comdat $qux
+
+define i32 @baz() comdat $qux {
+  ret i32 13
+}
+
+$any = comdat any
+@any = global i64 7, comdat $any
diff --git a/test/Linker/Inputs/comdat2.ll b/test/Linker/Inputs/comdat2.ll
new file mode 100644
index 0000000..9e18304
--- /dev/null
+++ b/test/Linker/Inputs/comdat2.ll
@@ -0,0 +1,2 @@
+$foo = comdat largest
+@foo = global i64 43, comdat $foo
diff --git a/test/Linker/Inputs/comdat3.ll b/test/Linker/Inputs/comdat3.ll
new file mode 100644
index 0000000..06f08b9
--- /dev/null
+++ b/test/Linker/Inputs/comdat3.ll
@@ -0,0 +1,2 @@
+$foo = comdat noduplicates
+@foo = global i64 43, comdat $foo
diff --git a/test/Linker/Inputs/comdat4.ll b/test/Linker/Inputs/comdat4.ll
new file mode 100644
index 0000000..bbfe3f7
--- /dev/null
+++ b/test/Linker/Inputs/comdat4.ll
@@ -0,0 +1,5 @@
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat samesize
+@foo = global i64 42, comdat $foo
diff --git a/test/Linker/Inputs/comdat5.ll b/test/Linker/Inputs/comdat5.ll
new file mode 100644
index 0000000..800af18
--- /dev/null
+++ b/test/Linker/Inputs/comdat5.ll
@@ -0,0 +1,15 @@
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%MSRTTICompleteObjectLocator = type { i32, i32, i32, i8*, %MSRTTIClassHierarchyDescriptor* }
+%MSRTTIClassHierarchyDescriptor = type { i32, i32, i32, %MSRTTIBaseClassDescriptor** }
+%MSRTTIBaseClassDescriptor = type { i8*, i32, i32, i32, i32, i32, %MSRTTIClassHierarchyDescriptor* }
+%struct.S = type { i32 (...)** }
+
+$"\01??_7S@@6B@" = comdat largest
+
+@"\01??_R4S@@6B@" = external constant %MSRTTICompleteObjectLocator
+@some_name = private unnamed_addr constant [2 x i8*] [i8* bitcast (%MSRTTICompleteObjectLocator* @"\01??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*, i32)* @"\01??_GS@@UAEPAXI@Z" to i8*)], comdat $"\01??_7S@@6B@"
+@"\01??_7S@@6B@" = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
+
+declare x86_thiscallcc void @"\01??_GS@@UAEPAXI@Z"(%struct.S*, i32) unnamed_addr
diff --git a/test/Linker/Inputs/cycle.ll b/test/Linker/Inputs/cycle.ll
deleted file mode 100644
index d0eddb6..0000000
--- a/test/Linker/Inputs/cycle.ll
+++ /dev/null
@@ -1,2 +0,0 @@
-@foo = alias i32* @bar
-@bar = weak global i32 0
diff --git a/test/Linker/alias.ll b/test/Linker/alias.ll
index 5809a15..bce51ad 100644
--- a/test/Linker/alias.ll
+++ b/test/Linker/alias.ll
@@ -5,12 +5,12 @@
 ; CHECK-DAG: @foo = alias i32* @zed
 
 @bar = alias i32* @foo
-; CHECK-DAG: @bar = alias i32* @zed
+; CHECK-DAG: @bar = alias i32* @foo
 
 @foo2 = weak global i32 0
-; CHECK-DAG: @foo2 = alias i16, i32* @zed
+; CHECK-DAG: @foo2 = alias bitcast (i32* @zed to i16*)
 
 @bar2 = alias i32* @foo2
-; CHECK-DAG: @bar2 = alias i32* @zed
+; CHECK-DAG: @bar2 = alias bitcast (i16* @foo2 to i32*)
 
 ; CHECK-DAG: @zed = global i32 42
diff --git a/test/Linker/comdat.ll b/test/Linker/comdat.ll
new file mode 100644
index 0000000..4d2aef7
--- /dev/null
+++ b/test/Linker/comdat.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-link %s %p/Inputs/comdat.ll -S -o - | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat largest
+@foo = global i32 42, comdat $foo
+
+define i32 @bar() comdat $foo {
+  ret i32 42
+}
+
+$qux = comdat largest
+@qux = global i64 12, comdat $qux
+
+define i32 @baz() comdat $qux {
+  ret i32 12
+}
+
+$any = comdat any
+@any = global i64 6, comdat $any
+
+; CHECK: $qux = comdat largest
+; CHECK: $foo = comdat largest
+; CHECK: $any = comdat any
+
+; CHECK: @qux = global i64 12, comdat $qux
+; CHECK: @any = global i64 6, comdat $any
+; CHECK: @foo = global i64 43, comdat $foo
+; CHECK-NOT: @in_unselected_group = global i32 13, comdat $qux
+
+; CHECK: define i32 @baz() comdat $qux
+; CHECK: define i32 @bar() comdat $foo
diff --git a/test/Linker/comdat2.ll b/test/Linker/comdat2.ll
new file mode 100644
index 0000000..60c3d7c
--- /dev/null
+++ b/test/Linker/comdat2.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-link %s %p/Inputs/comdat.ll -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat samesize
+@foo = global i32 42, comdat $foo
+; CHECK: Linking COMDATs named 'foo': invalid selection kinds!
diff --git a/test/Linker/comdat3.ll b/test/Linker/comdat3.ll
new file mode 100644
index 0000000..f0d9a48
--- /dev/null
+++ b/test/Linker/comdat3.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-link %s %p/Inputs/comdat2.ll -S -o - 2>&1 | FileCheck %s
+
+$foo = comdat largest
+@foo = global i32 43, comdat $foo
+; CHECK: Linking COMDATs named 'foo': can't do size dependent selection without DataLayout!
diff --git a/test/Linker/comdat4.ll b/test/Linker/comdat4.ll
new file mode 100644
index 0000000..50c1778
--- /dev/null
+++ b/test/Linker/comdat4.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-link %s %p/Inputs/comdat3.ll -S -o - 2>&1 | FileCheck %s
+
+$foo = comdat noduplicates
+@foo = global i64 43, comdat $foo
+; CHECK: Linking COMDATs named 'foo': noduplicates has been violated!
diff --git a/test/Linker/comdat5.ll b/test/Linker/comdat5.ll
new file mode 100644
index 0000000..011fb8c
--- /dev/null
+++ b/test/Linker/comdat5.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-link %s %p/Inputs/comdat4.ll -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$foo = comdat samesize
+@foo = global i32 42, comdat $foo
+; CHECK: Linking COMDATs named 'foo': SameSize violated!
diff --git a/test/Linker/comdat6.ll b/test/Linker/comdat6.ll
new file mode 100644
index 0000000..efa5dfb
--- /dev/null
+++ b/test/Linker/comdat6.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
+; RUN: llvm-link %p/Inputs/comdat5.ll %s -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%struct.S = type { i32 (...)** }
+
+$"\01??_7S@@6B@" = comdat largest
+@"\01??_7S@@6B@" = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void (%struct.S*, i32)* @"\01??_GS@@UAEPAXI@Z" to i8*)], comdat $"\01??_7S@@6B@"
+
+; CHECK: @"\01??_7S@@6B@" = alias getelementptr inbounds ([2 x i8*]* @some_name, i32 0, i32 1)
+
+declare x86_thiscallcc void @"\01??_GS@@UAEPAXI@Z"(%struct.S*, i32) unnamed_addr
diff --git a/test/Linker/comdat7.ll b/test/Linker/comdat7.ll
new file mode 100644
index 0000000..c3ff3f6
--- /dev/null
+++ b/test/Linker/comdat7.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$"\01??_7S@@6B@" = comdat largest
+define void @"\01??_7S@@6B@"() {
+  ret void
+}
+; CHECK: GlobalVariable required for data dependent selection!
diff --git a/test/Linker/comdat8.ll b/test/Linker/comdat8.ll
new file mode 100644
index 0000000..21669f6
--- /dev/null
+++ b/test/Linker/comdat8.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+$"\01??_7S@@6B@" = comdat largest
+define void @some_name() {
+  ret void
+}
+@"\01??_7S@@6B@" = alias i8* inttoptr (i32 ptrtoint (void ()* @some_name to i32) to i8*)
+; CHECK: COMDAT key involves incomputable alias size.
diff --git a/test/Linker/cycle.ll b/test/Linker/cycle.ll
deleted file mode 100644
index 7d9ad2d..0000000
--- a/test/Linker/cycle.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: not llvm-link %s %S/Inputs/cycle.ll 2>&1 | FileCheck %s
-; RUN: not llvm-link %S/Inputs/cycle.ll %s 2>&1 | FileCheck %s
-
-; CHECK: Linking these modules creates an alias cycle
-
-@foo = weak global i32 0
-@bar = alias i32* @foo
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index adaa400..794ae98 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -21,6 +21,11 @@ define weak void @func-b() unnamed_addr { ret void }
 @global-f = weak global i32 42
 ; CHECK-DAG: @global-f = global i32 42
 
+@alias-a = weak global i32 42
+; CHECK-DAG: @alias-a = alias i32* @global-f
+@alias-b = weak unnamed_addr global i32 42
+; CHECK-DAG: @alias-b = unnamed_addr alias i32* @global-f
+
 declare void @func-c()
 ; CHECK-DAG: define weak void @func-c() {
 define weak void @func-d() { ret void }
@@ -38,6 +43,12 @@ define weak void @func-e() unnamed_addr { ret void }
 @global-j = weak global i32 42
 ; CHECK-DAG: @global-j = global i32 42
 
+@alias-c = weak global i32 42
+; CHECK-DAG: @alias-c = alias i32* @global-f
+@alias-d = weak unnamed_addr global i32 42
+; CHECK-DAG: @alias-d = alias i32* @global-f
+
+
 declare void @func-g()
 ; CHECK-DAG: define weak void @func-g() {
 define weak void @func-h() { ret void }
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index aa1507b..39a0c8b 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -6,6 +6,9 @@
 @global-e = unnamed_addr global i32 42
 @global-f = unnamed_addr global i32 42
 
+@alias-a =  unnamed_addr alias i32* @global-f
+@alias-b =  unnamed_addr alias i32* @global-f
+
 define weak void @func-c() unnamed_addr { ret void }
 define weak void @func-d() unnamed_addr { ret void }
 define weak void @func-e() unnamed_addr { ret void }
@@ -15,6 +18,9 @@ define weak void @func-e() unnamed_addr { ret void }
 @global-i = global i32 42
 @global-j = global i32 42
 
+@alias-c =  alias i32* @global-f
+@alias-d =  alias i32* @global-f
+
 define weak void @func-g() { ret void }
 define weak void @func-h() { ret void }
 define weak void @func-i() { ret void }
diff --git a/test/MC/AArch64/alias-logicalimm.s b/test/MC/AArch64/alias-logicalimm.s
new file mode 100644
index 0000000..28ec40b
--- /dev/null
+++ b/test/MC/AArch64/alias-logicalimm.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+// CHECK: and x0, x1, #0xfffffffffffffffd
+// CHECK: and x0, x1, #0xfffffffffffffffd
+        and x0, x1, #~2
+        bic x0, x1, #2
+
+// CHECK: and w0, w1, #0xfffffffd
+// CHECK: and w0, w1, #0xfffffffd
+        and w0, w1, #~2
+        bic w0, w1, #2
+
+// CHECK: ands x0, x1, #0xfffffffffffffffd
+// CHECK: ands x0, x1, #0xfffffffffffffffd
+        ands x0, x1, #~2
+        bics x0, x1, #2
+
+// CHECK: ands w0, w1, #0xfffffffd
+// CHECK: ands w0, w1, #0xfffffffd
+        ands w0, w1, #~2
+        bics w0, w1, #2
+
+// CHECK: orr x0, x1, #0xfffffffffffffffd
+// CHECK: orr x0, x1, #0xfffffffffffffffd
+        orr x0, x1, #~2
+        orn x0, x1, #2
+
+// CHECK: orr w2, w1, #0xfffffffc
+// CHECK: orr w2, w1, #0xfffffffc
+        orr w2, w1, #~3
+        orn w2, w1, #3
+
+// CHECK: eor x0, x1, #0xfffffffffffffffd
+// CHECK: eor x0, x1, #0xfffffffffffffffd
+        eor x0, x1, #~2
+        eon x0, x1, #2
+
+// CHECK: eor w2, w1, #0xfffffffc
+// CHECK: eor w2, w1, #0xfffffffc
+        eor w2, w1, #~3
+        eon w2, w1, #3
diff --git a/test/MC/AArch64/arm64-leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s
index d699813..27d3d51 100644
--- a/test/MC/AArch64/arm64-leaf-compact-unwind.s
+++ b/test/MC/AArch64/arm64-leaf-compact-unwind.s
@@ -23,10 +23,10 @@
 // CHECK-NEXT:   Reserved1:
 // CHECK-NEXT:   Reserved2:
 // CHECK-NEXT:   Relocations [
-// CHECK-NEXT:     0x60 0 3 0 ARM64_RELOC_UNSIGNED 0 -
-// CHECK-NEXT:     0x40 0 3 0 ARM64_RELOC_UNSIGNED 0 -
-// CHECK-NEXT:     0x20 0 3 0 ARM64_RELOC_UNSIGNED 0 -
-// CHECK-NEXT:     0x0 0 3 0 ARM64_RELOC_UNSIGNED 0 -
+// CHECK-NEXT:     0x60 0 3 0 ARM64_RELOC_UNSIGNED 0 0x1
+// CHECK-NEXT:     0x40 0 3 0 ARM64_RELOC_UNSIGNED 0 0x1
+// CHECK-NEXT:     0x20 0 3 0 ARM64_RELOC_UNSIGNED 0 0x1
+// CHECK-NEXT:     0x0 0 3 0 ARM64_RELOC_UNSIGNED 0 0x1
 // CHECK-NEXT:   ]
 // CHECK-NEXT:   SectionData (
 // CHECK-NEXT:     0000: 00000000 00000000 08000000 00000002
diff --git a/test/MC/AArch64/arm64-system-encoding.s b/test/MC/AArch64/arm64-system-encoding.s
index 9246608..87f8f8a 100644
--- a/test/MC/AArch64/arm64-system-encoding.s
+++ b/test/MC/AArch64/arm64-system-encoding.s
@@ -4,7 +4,7 @@
 foo:
 
 ;-----------------------------------------------------------------------------
-; Simple encodings (instuctions w/ no operands)
+; Simple encodings (instructions w/ no operands)
 ;-----------------------------------------------------------------------------
 
   nop
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index a4a3b13..5293131 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -729,6 +729,27 @@
 // CHECK-ERROR-NEXT:                  ^
 
 //------------------------------------------------------------------------------
+// Logical (immediates)
+//------------------------------------------------------------------------------
+
+        and w2, w3, #4294967296
+        eor w2, w3, #4294967296
+        orr w2, w3, #4294967296
+        ands w2, w3, #4294967296
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         and w2, w3, #4294967296
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor w2, w3, #4294967296
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         orr w2, w3, #4294967296
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         ands w2, w3, #4294967296
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
 // Bitfield
 //------------------------------------------------------------------------------
 
@@ -1345,39 +1366,59 @@
 
         cset wsp, lt
         csetm sp, ge
+        cset w1, al
+        csetm x6, nv
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cset wsp, lt
 // CHECK-ERROR-NEXT:             ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        csetm sp, ge
 // CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: condition codes AL and NV are invalid for this instruction
+// CHECK-ERROR-NEXT:        cset w1, al
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: condition codes AL and NV are invalid for this instruction
+// CHECK-ERROR-NEXT:        csetm x6, nv
+// CHECK-ERROR-NEXT:                    ^
 
         cinc w3, wsp, ne
         cinc sp, x9, eq
+        cinc x2, x0, nv
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cinc w3, wsp, ne
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cinc sp, x9, eq
 // CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: condition codes AL and NV are invalid for this instruction
+// CHECK-ERROR-NEXT:        cinc x2, x0, nv
+// CHECK-ERROR-NEXT:                       ^
 
         cinv w3, wsp, ne
         cinv sp, x9, eq
+        cinv w8, x7, nv
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cinv w3, wsp, ne
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cinv sp, x9, eq
 // CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: condition codes AL and NV are invalid for this instruction
+// CHECK-ERROR-NEXT:        cinv w8, x7, nv
+// CHECK-ERROR-NEXT:                       ^
 
         cneg w3, wsp, ne
         cneg sp, x9, eq
+        cneg x4, x5, al
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cneg w3, wsp, ne
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        cneg sp, x9, eq
 // CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: condition codes AL and NV are invalid for this instruction
+// CHECK-ERROR-NEXT:        cneg x4, x5, al
+// CHECK-ERROR-NEXT:                       ^
 
 //------------------------------------------------------------------------------
 // Data Processing (1 source)
@@ -2944,13 +2985,17 @@
         orn wsp, w3, w5
         bics x20, sp, x9, lsr #0
         orn x2, x6, sp, lsl #3
-// CHECK-ERROR: error: invalid operand for instruction
+// FIXME: the diagnostic we get for 'orn wsp, w3, w5' is from the orn alias,
+// which is a better match than the genuine ORNWri, whereas it would be better
+// to get the ORNWri diagnostic when the alias did not match, i.e. the
+// alias' diagnostics should have a lower priority.
+// CHECK-ERROR: error: expected compatible register or logical immediate
 // CHECK-ERROR-NEXT:         orn wsp, w3, w5
-// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT:                      ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         bics x20, sp, x9, lsr #0
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
 // CHECK-ERROR-NEXT:         orn x2, x6, sp, lsl #3
 // CHECK-ERROR-NEXT:                     ^
 
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index a12968b..140ea33 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -601,9 +601,11 @@ _func:
         cmn w0, w3
         cmn wzr, w4
         cmn w5, wzr
+        cmn wsp, w6
 // CHECK: cmn      w0, w3                     // encoding: [0x1f,0x00,0x03,0x2b]
 // CHECK: cmn      wzr, w4                    // encoding: [0xff,0x03,0x04,0x2b]
 // CHECK: cmn      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x2b]
+// CHECK: cmn      wsp, w6                    // encoding: [0xff,0x43,0x26,0x2b]
 
         cmn w6, w7, lsl #0
         cmn w8, w9, lsl #15
@@ -629,9 +631,11 @@ _func:
         cmn x0, x3
         cmn xzr, x4
         cmn x5, xzr
+        cmn sp, x6
 // CHECK: cmn      x0, x3                     // encoding: [0x1f,0x00,0x03,0xab]
 // CHECK: cmn      xzr, x4                    // encoding: [0xff,0x03,0x04,0xab]
 // CHECK: cmn      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xab]
+// CHECK: cmn      sp, x6                     // encoding: [0xff,0x63,0x26,0xab]
 
         cmn x6, x7, lsl #0
         cmn x8, x9, lsl #15
@@ -657,9 +661,11 @@ _func:
         cmp w0, w3
         cmp wzr, w4
         cmp w5, wzr
+        cmp wsp, w6
 // CHECK: cmp      w0, w3                     // encoding: [0x1f,0x00,0x03,0x6b]
 // CHECK: cmp      wzr, w4                    // encoding: [0xff,0x03,0x04,0x6b]
 // CHECK: cmp      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x6b]
+// CHECK: cmp      wsp, w6                    // encoding: [0xff,0x43,0x26,0x6b]
 
         cmp w6, w7, lsl #0
         cmp w8, w9, lsl #15
@@ -685,9 +691,11 @@ _func:
         cmp x0, x3
         cmp xzr, x4
         cmp x5, xzr
+        cmp sp, x6
 // CHECK: cmp      x0, x3                     // encoding: [0x1f,0x00,0x03,0xeb]
 // CHECK: cmp      xzr, x4                    // encoding: [0xff,0x03,0x04,0xeb]
 // CHECK: cmp      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xeb]
+// CHECK: cmp      sp, x6                     // encoding: [0xff,0x63,0x26,0xeb]
 
         cmp x6, x7, lsl #0
         cmp x8, x9, lsl #15
@@ -3237,6 +3245,17 @@ _func:
 // CHECK: orr      w3, wzr, #0xf000f          // encoding: [0xe3,0x8f,0x00,0x32]
 // CHECK: orr x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
 
+        // The Imm field of logicalImm operations has to be truncated to the
+        // register width, i.e. 32 bits
+        and w2, w3, #-3
+        orr w0, w1, #~2
+        eor w16, w17, #-7
+        ands w19, w20, #~15
+// CHECK: and	w2, w3, #0xfffffffd     // encoding: [0x62,0x78,0x1e,0x12]
+// CHECK: orr	w0, w1, #0xfffffffd     // encoding: [0x20,0x78,0x1e,0x32]
+// CHECK: eor	w16, w17, #0xfffffff9   // encoding: [0x30,0x76,0x1d,0x52]
+// CHECK: ands	w19, w20, #0xfffffff0   // encoding: [0x93,0x6e,0x1c,0x72]
+
 //------------------------------------------------------------------------------
 // Logical (shifted register)
 //------------------------------------------------------------------------------
diff --git a/test/MC/AArch64/dot-req-case-insensitive.s b/test/MC/AArch64/dot-req-case-insensitive.s
new file mode 100644
index 0000000..e68b101
--- /dev/null
+++ b/test/MC/AArch64/dot-req-case-insensitive.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -triple=arm64-eabi < %s | FileCheck %s
+_foo:
+        OBJECT .req x2
+        mov x4, OBJECT
+        mov x4, oBjEcT
+        .unreq oBJECT
+
+_foo2:
+        OBJECT .req w5
+        mov w4, OBJECT
+        .unreq OBJECT
+
+// CHECK-LABEL: _foo:
+// CHECK: mov x4, x2
+// CHECK: mov x4, x2
+
+// CHECK-LABEL: _foo2:
+// CHECK: mov w4, w5
diff --git a/test/MC/AArch64/dot-req-diagnostics.s b/test/MC/AArch64/dot-req-diagnostics.s
new file mode 100644
index 0000000..44065f8
--- /dev/null
+++ b/test/MC/AArch64/dot-req-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ERROR %s
+
+bar:
+        fred .req x5
+        fred .req x6
+// CHECK-ERROR: warning: ignoring redefinition of register alias 'fred'
+// CHECK-ERROR: fred .req x6
+// CHECK-ERROR: ^
+
+        ada  .req v2.8b
+// CHECK-ERROR: error: vector register without type specifier expected
+// CHECK-ERROR: ada  .req v2.8b
+// CHECK-ERROR:           ^
+
+        bob  .req lisa
+// CHECK-ERROR: error: register name or alias expected
+// CHECK-ERROR: bob  .req lisa
+// CHECK-ERROR:           ^
+
+        lisa .req x1, 23
+// CHECK-ERROR: error: unexpected input in .req directive
+// CHECK-ERROR: lisa .req x1, 23
+// CHECK-ERROR:             ^
+
+        mov  bob, fred
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: mov  bob, fred
+// CHECK-ERROR:      ^
+
+        .unreq 1
+// CHECK-ERROR: error: unexpected input in .unreq directive.
+// CHECK-ERROR: .unreq 1
+// CHECK-ERROR:        ^
+
+        mov  x1, fred
+// CHECK: mov x1, x5
+// CHECK-NOT: mov x1, x6
diff --git a/test/MC/AArch64/dot-req.s b/test/MC/AArch64/dot-req.s
new file mode 100644
index 0000000..947f945
--- /dev/null
+++ b/test/MC/AArch64/dot-req.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+
+bar:
+        fred .req x5
+        mov fred, x11
+        .unreq fred
+        fred .req w6
+        mov w1, fred
+
+        bob .req fred
+        ada .req w1
+        mov ada, bob
+        .unreq bob
+        .unreq fred
+        .unreq ada
+// CHECK: mov      x5, x11                // encoding: [0xe5,0x03,0x0b,0xaa]
+// CHECK: mov      w1, w6                 // encoding: [0xe1,0x03,0x06,0x2a]
+// CHECK: mov      w1, w6                 // encoding: [0xe1,0x03,0x06,0x2a]
+
+        bob     .req b6
+        hanah   .req h5
+        sam     .req s4
+        dora    .req d3
+        quentin .req q2
+        vesna   .req v1
+        addv bob,     v0.8b
+        mov  hanah,   v4.h[3]
+        fadd s0,      sam,     sam
+        fmov d2,      dora
+        ldr  quentin, [sp]
+        mov  v0.8b,   vesna.8b
+// CHECK: addv    b6, v0.8b               // encoding: [0x06,0xb8,0x31,0x0e]
+// CHECK: mov     h5, v4.h[3]             // encoding: [0x85,0x04,0x0e,0x5e]
+// CHECK: fadd    s0, s4, s4              // encoding: [0x80,0x28,0x24,0x1e]
+// CHECK: fmov    d2, d3                  // encoding: [0x62,0x40,0x60,0x1e]
+// CHECK: ldr      q2, [sp]               // encoding: [0xe2,0x03,0xc0,0x3d]
+// CHECK: mov             v0.8b, v1.8b    // encoding: [0x20,0x1c,0xa1,0x0e]
diff --git a/test/MC/AArch64/ldr-pseudo-obj-errors.s b/test/MC/AArch64/ldr-pseudo-obj-errors.s
new file mode 100644
index 0000000..7f1b642
--- /dev/null
+++ b/test/MC/AArch64/ldr-pseudo-obj-errors.s
@@ -0,0 +1,13 @@
+//RUN: not llvm-mc -triple=aarch64-linux -filetype=obj %s -o %t1 2> %t2
+//RUN: cat %t2 | FileCheck %s
+
+//These tests look for errors that should be reported for invalid object layout
+//with the ldr pseudo. They are tested separately from parse errors because they
+//only trigger when the file has successfully parsed and the object file is about
+//to be written out.
+
+.text
+foo:
+  ldr x0, =0x10111
+  .space 0xdeadb0
+// CHECK: LVM ERROR: fixup value out of range
diff --git a/test/MC/AArch64/ldr-pseudo.s b/test/MC/AArch64/ldr-pseudo.s
new file mode 100644
index 0000000..6c82fb9
--- /dev/null
+++ b/test/MC/AArch64/ldr-pseudo.s
@@ -0,0 +1,231 @@
+//RUN: llvm-mc  -triple=aarch64-linux-gnu %s | FileCheck %s
+
+//
+// Check that large constants are converted to ldr from constant pool
+//
+// simple test
+.section a, "ax", @progbits
+// CHECK-LABEL: f1:
+f1:
+  ldr x0, =0x1234
+// CHECK: movz    x0, #0x1234
+  ldr w1, =0x4567
+// CHECK:  movz    w1, #0x4567
+  ldr x0, =0x12340000
+// CHECK:  movz    x0, #0x1234, lsl #16
+  ldr w1, =0x45670000
+// CHECK: movz    w1, #0x4567, lsl #16
+  ldr x0, =0xabc00000000
+// CHECK: movz    x0, #0xabc, lsl #32
+  ldr x0, =0xbeef000000000000
+// CHECK: movz    x0, #0xbeef, lsl #48
+
+.section b,"ax",@progbits
+// CHECK-LABEL: f3:
+f3:
+  ldr x0, =0x10001
+// CHECK: ldr x0, .Ltmp[[TMP0:[0-9]+]]
+
+// loading multiple constants
+.section c,"ax",@progbits
+// CHECK-LABEL: f4:
+f4:
+  ldr x0, =0x10002
+// CHECK: ldr x0, .Ltmp[[TMP1:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  ldr x0, =0x10003
+// CHECK: ldr x0, .Ltmp[[TMP2:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+// TODO: the same constants should have the same constant pool location
+.section d,"ax",@progbits
+// CHECK-LABEL: f5:
+f5:
+  ldr x0, =0x10004
+// CHECK: ldr x0, .Ltmp[[TMP3:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  ldr x0, =0x10004
+// CHECK: ldr x0, .Ltmp[[TMP4:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+// a section defined in multiple pieces should be merged and use a single constant pool
+.section e,"ax",@progbits
+// CHECK-LABEL: f6:
+f6:
+  ldr x0, =0x10006
+// CHECK: ldr x0, .Ltmp[[TMP5:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+.section f, "ax", @progbits
+// CHECK-LABEL: f7:
+f7:
+  adds x0, x0, #1
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+.section e, "ax", @progbits
+// CHECK-LABEL: f8:
+f8:
+  adds x0, x0, #1
+  ldr x0, =0x10007
+// CHECK: ldr x0, .Ltmp[[TMP6:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+//
+// Check that symbols can be loaded using ldr pseudo
+//
+
+// load an undefined symbol
+.section g,"ax",@progbits
+// CHECK-LABEL: f9:
+f9:
+  ldr x0, =foo
+// CHECK: ldr x0, .Ltmp[[TMP7:[0-9]+]]
+
+// load a symbol from another section
+.section h,"ax",@progbits
+// CHECK-LABEL: f10:
+f10:
+  ldr x0, =f5
+// CHECK: ldr x0, .Ltmp[[TMP8:[0-9]+]]
+
+// load a symbol from the same section
+.section i,"ax",@progbits
+// CHECK-LABEL: f11:
+f11:
+  ldr x0, =f12
+// CHECK: ldr x0, .Ltmp[[TMP9:[0-9]+]]
+  ldr w0,=0x3C000
+// CHECK: ldr     w0, .Ltmp[[TMP10:[0-9]+]]
+
+// CHECK-LABEL: f12:
+f12:
+  adds x0, x0, #1
+  adds x0, x0, #1
+
+.section j,"ax",@progbits
+// mix of symbols and constants
+// CHECK-LABEL: f13:
+f13:
+  adds x0, x0, #1
+  adds x0, x0, #1
+  ldr x0, =0x101
+// CHECK: movz x0, #0x101
+  adds x0, x0, #1
+  adds x0, x0, #1
+  ldr x0, =bar
+// CHECK: ldr x0, .Ltmp[[TMP11:[0-9]+]]
+  adds x0, x0, #1
+  adds x0, x0, #1
+//
+// Check for correct usage in other contexts
+//
+
+// usage in macro
+.macro useit_in_a_macro
+  ldr x0, =0x10008
+  ldr x0, =baz
+.endm
+.section k,"ax",@progbits
+// CHECK-LABEL: f14:
+f14:
+  useit_in_a_macro
+// CHECK: ldr x0, .Ltmp[[TMP12:[0-9]+]]
+// CHECK: ldr x0, .Ltmp[[TMP13:[0-9]+]]
+
+// usage with expressions
+.section l, "ax", @progbits
+// CHECK-LABEL: f15:
+f15:
+  ldr x0, =0x10001+8
+// CHECK: ldr x0, .Ltmp[[TMP14:[0-9]+]]
+  adds x0, x0, #1
+  ldr x0, =bar+4
+// CHECK: ldr x0, .Ltmp[[TMP15:[0-9]+]]
+  adds x0, x0, #1
+
+//
+// Constant Pools
+//
+// CHECK: .section b,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP0]]
+// CHECK: .word 65537
+
+// CHECK: .section c,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP1]]
+// CHECK: .word 65538
+// CHECK: .Ltmp[[TMP2]]
+// CHECK: .word 65539
+
+// CHECK: .section d,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP3]]
+// CHECK: .word 65540
+// CHECK: .Ltmp[[TMP4]]
+// CHECK: .word 65540
+
+// CHECK: .section e,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP5]]
+// CHECK: .word 65542
+// CHECK: .Ltmp[[TMP6]]
+// CHECK: .word 65543
+
+// Should not switch to section because it has no constant pool
+// CHECK-NOT: .section f,"ax",@progbits
+
+// CHECK: .section g,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP7]]
+// CHECK: .word foo
+
+// CHECK: .section h,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP8]]
+// CHECK: .word f5
+
+// CHECK: .section i,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP9]]
+// CHECK: .word f12
+// CHECK: .Ltmp[[TMP10]]
+// CHECK: .word 245760
+
+// CHECK: .section j,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP11]]
+// CHECK: .word bar
+
+// CHECK: .section k,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP12]]
+// CHECK: .word 65544
+// CHECK: .Ltmp[[TMP13]]
+// CHECK: .word baz
+
+// CHECK: .section l,"ax",@progbits
+// CHECK: .align 2
+// CHECK: .Ltmp[[TMP14]]
+// CHECK: .word 65545
+// CHECK: .Ltmp[[TMP15]]
+// CHECK: .word bar+4
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
index 1be70c0..5822b72 100644
--- a/test/MC/AArch64/lit.local.cfg
+++ b/test/MC/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if 'AArch64' not in targets:
+if 'AArch64' not in config.root.targets:
     config.unsupported = True
diff --git a/test/MC/ARM/AlignedBundling/lit.local.cfg b/test/MC/ARM/AlignedBundling/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/ARM/AlignedBundling/lit.local.cfg
+++ b/test/MC/ARM/AlignedBundling/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/ARM/Windows/multiple-text-sections.s b/test/MC/ARM/Windows/multiple-text-sections.s
new file mode 100644
index 0000000..241eee4
--- /dev/null
+++ b/test/MC/ARM/Windows/multiple-text-sections.s
@@ -0,0 +1,58 @@
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -s - | FileCheck %s
+
+	.syntax unified
+	.text
+	.thumb
+
+	.section	.text,"xr",one_only,a
+
+	.def	 a;
+		.scl	2;
+		.type	32;
+	.endef
+a:
+	movs	r0, #65
+	bx	lr
+
+	.section	.text,"xr",one_only,b
+
+	.def	 b;
+		.scl	2;
+		.type	32;
+	.endef
+	.thumb_func
+b:
+	movs	r0, #66
+	bx	lr
+
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     Characteristics [
+@ CHECK:       IMAGE_SCN_CNT_CODE
+@ CHECK:       IMAGE_SCN_MEM_16BIT
+@ CHECK:       IMAGE_SCN_MEM_EXECUTE
+@ CHECK:       IMAGE_SCN_MEM_READ
+@ CHECK:     ]
+@ CHECK:   }
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     Characteristics [
+@ CHECK:       IMAGE_SCN_CNT_CODE
+@ CHECK:       IMAGE_SCN_MEM_16BIT
+@ CHECK:       IMAGE_SCN_MEM_EXECUTE
+@ CHECK:       IMAGE_SCN_MEM_READ
+@ CHECK:     ]
+@ CHECK:   }
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     Characteristics [
+@ CHECK:       IMAGE_SCN_CNT_CODE
+@ CHECK:       IMAGE_SCN_MEM_16BIT
+@ CHECK:       IMAGE_SCN_MEM_EXECUTE
+@ CHECK:       IMAGE_SCN_MEM_READ
+@ CHECK:     ]
+@ CHECK:   }
+@ CHECK: ]
+
diff --git a/test/MC/ARM/Windows/text-attributes.s b/test/MC/ARM/Windows/text-attributes.s
new file mode 100644
index 0000000..62aa028
--- /dev/null
+++ b/test/MC/ARM/Windows/text-attributes.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -s - | FileCheck %s
+
+	.syntax unified
+	.thumb
+
+	.text
+
+	.def function
+		.type 32
+		.scl 2
+	.endef
+	.global function
+	.thumb_func
+function:
+	bx lr
+
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     Characteristics [
+@ CHECK:       IMAGE_SCN_ALIGN_4BYTES
+@ CHECK:       IMAGE_SCN_CNT_CODE
+@ CHECK:       IMAGE_SCN_MEM_16BIT
+@ CHECK:       IMAGE_SCN_MEM_EXECUTE
+@ CHECK:       IMAGE_SCN_MEM_PURGEABLE
+@ CHECK:       IMAGE_SCN_MEM_READ
+@ CHECK:     ]
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 62d7dae..88c5fb5 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -351,6 +351,24 @@
 @ CHECK-ERRORS:         ubfxgt r4, r5, #16, #17
 @ CHECK-ERRORS:                             ^
 
+        @ Using pc for SBFX/UBFX
+        sbfx pc, r2, #1, #3
+        sbfx sp, pc, #4, #5
+        ubfx pc, r0, #0, #31
+        ubfx r14, pc, #1, #2
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS:         sbfx pc, r2, #1, #3
+@ CHECK-ERRORS:              ^
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS:         sbfx sp, pc, #4, #5
+@ CHECK-ERRORS:                  ^
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS:         ubfx pc, r0, #0, #31
+@ CHECK-ERRORS:              ^
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS:         ubfx r14, pc, #1, #2
+@ CHECK-ERRORS:                   ^
+
         @ Out of order Rt/Rt2 operands for ldrd
         ldrd  r4, r3, [r8]
         ldrd  r4, r3, [r8, #8]!
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
new file mode 100644
index 0000000..ed1b89e
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -0,0 +1,79 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_ranges    DW_FORM_data4
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x00000024, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      9      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      9      0      1   0   0  is_stmt end_sequence
+// DWARF-NEXT: 0x0000000000000000     13      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004     13      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 <End of list>
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+// RELOC-NEXT: 00000004 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000014 R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000018 R_ARM_ABS32 foo
+
+
+// VERSION: {{.*}} error: DWARF2 only supports one section per compilation unit
+
+// DWARF1: Dwarf version 1 is not supported.
+// DWARF5: Dwarf version 5 is not supported.
diff --git a/test/MC/ARM/dwarf-asm-no-code.s b/test/MC/ARM/dwarf-asm-no-code.s
new file mode 100644
index 0000000..7d06a41
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-no-code.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+// If there is no code in an assembly file, no debug info is produced
+
+.section .data, "aw"
+a:
+.long 42
+
+// DWARF: .debug_abbrev contents:
+// DWARF-NEXT: < EMPTY >
+
+// DWARF: .debug_info contents:
+
+// DWARF: .debug_aranges contents:
+
+// DWARF: .debug_line contents:
+
+// DWARF: .debug_ranges contents:
+
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_info]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_aranges]:
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
new file mode 100644
index 0000000..497a39a
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// DWARF-NOT:         DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 foo
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
new file mode 100644
index 0000000..c57e649
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0 is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0 is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
diff --git a/test/MC/ARM/gas-compl-copr-reg.s b/test/MC/ARM/gas-compl-copr-reg.s
new file mode 100644
index 0000000..ab0b023
--- /dev/null
+++ b/test/MC/ARM/gas-compl-copr-reg.s
@@ -0,0 +1,14 @@
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+
+@ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x90,0xed]
+@ CHECK: stc	p14, c6, [r2, #-224]    @ encoding: [0x38,0x6e,0x02,0xed]
+
+        ldc p12, cr4, [r0, #4]
+        stc p14, cr6, [r2, #-224]
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+
+@ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x90,0xed]
+@ CHECK: stc	p14, c6, [r2, #-224]    @ encoding: [0x38,0x6e,0x02,0xed]
+
+        ldc p12, cr4, [r0, #4]
+        stc p14, cr6, [r2, #-224]
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/MC/ARM/lit.local.cfg
+++ b/test/MC/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/ARM/macho-relocs-with-addend.s b/test/MC/ARM/macho-relocs-with-addend.s
new file mode 100644
index 0000000..fee930e
--- /dev/null
+++ b/test/MC/ARM/macho-relocs-with-addend.s
@@ -0,0 +1,34 @@
+@ RUN: llvm-mc -triple thumbv7-apple-ios7.0 -filetype=obj -o - %s | \
+@ RUN: llvm-readobj -r - | FileCheck %s
+
+        @ MachO relocations that end up expressed as internal
+        @ (scattered) still need to have the type set correctly.
+
+        .text
+        .thumb_func
+        .thumb
+        .globl _with_thumb
+_with_thumb:
+        bl _dest+10
+        blx _dest+20
+
+        .globl _with_arm
+        .arm
+_with_arm:
+        bl _dest+10
+        blx _dest+20
+        bne _dest+30
+        b _dest+40
+
+        .data
+_dest:
+        .word 42
+
+@ CHECK: Relocations [
+@ CHECK-NEXT: Section __text {
+@ CHECK-NEXT: 0x14 1 2 n/a ARM_RELOC_BR24 1 0x18
+@ CHECK-NEXT: 0x10 1 2 n/a ARM_RELOC_BR24 1 0x18
+@ CHECK-NEXT: 0xC 1 2 n/a ARM_RELOC_BR24 1 0x18
+@ CHECK-NEXT: 0x8 1 2 n/a ARM_RELOC_BR24 1 0x18
+@ CHECK-NEXT: 0x4 1 2 n/a ARM_THUMB_RELOC_BR22 1 0x18
+@ CHECK-NEXT: 0x0 1 2 n/a ARM_THUMB_RELOC_BR22 1 0x18
diff --git a/test/MC/ARM/thumb-types.s b/test/MC/ARM/thumb-types.s
index 2fd7152..b3aaf7d 100644
--- a/test/MC/ARM/thumb-types.s
+++ b/test/MC/ARM/thumb-types.s
@@ -29,6 +29,12 @@ untyped_text_label:
 explicit_function:
 	nop
 
+	.long	tls(TPOFF)
+
+	.type indirect_function,%gnu_indirect_function
+indirect_function:
+	nop
+
 	.data
 
 untyped_data_label:
@@ -38,6 +44,14 @@ untyped_data_label:
 explicit_data:
 	.long 0
 
+	.section	.tdata,"awT",%progbits
+	.type	tls,%object
+	.align	2
+tls:
+	.long	42
+	.size	tls, 4
+
+
 @ CHECK: Symbol {
 @ CHECK:   Name: arm_function
 @ CHECK:   Value: 0x6
@@ -69,6 +83,18 @@ explicit_data:
 @ CHECK: }
 
 @ CHECK: Symbol {
+@ CHECK:   Name: indirect_function
+@ CHECK:   Value: 0x13
+@ CHECK:   Type: GNU_IFunc
+@ CHECK: }
+
+@ CHECK: Symbol {
+@ CHECK:   Name: tls
+@ CHECK:   Value: 0x0
+@ CHECK:   Type: TLS
+@ CHECK: }
+
+@ CHECK: Symbol {
 @ CHECK:   Name: untyped_data_label
 @ CHECK:   Value: 0x0
 @ CHECK:   Type: None
diff --git a/test/MC/AsmParser/cfi-invalid-startproc.s b/test/MC/AsmParser/cfi-invalid-startproc.s
deleted file mode 100644
index 57ded13..0000000
--- a/test/MC/AsmParser/cfi-invalid-startproc.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple=x86_64-apple-macosx10.8 -filetype=obj -o %t %s 2>&1 | FileCheck %s
-# Check that the cfi_startproc is declared after the beginning of
-# a procedure, otherwise it will reference an invalid symbol for
-# emitting the relocation.
-# <rdar://problem/15939159>
-
-# CHECK: No symbol to start a frame
-.text
-.cfi_startproc
-.globl _someFunction
-_someFunction:
-.cfi_def_cfa_offset 16
-.cfi_offset %rbp, -16
-.cfi_def_cfa_register rbp
-  ret
-.cfi_endproc
diff --git a/test/MC/AsmParser/conditional_asm.s b/test/MC/AsmParser/conditional_asm.s
index b9bee33..ecbceb1 100644
--- a/test/MC/AsmParser/conditional_asm.s
+++ b/test/MC/AsmParser/conditional_asm.s
@@ -11,6 +11,66 @@
     .endif
 .endif
 
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+.ifeq 32 - 32
+        .byte 0
+.else
+        .byte 1
+.endif
+
+# CHECK: .byte 0
+# CHECK: .byte 1
+# CHECK-NOT: .byte 2
+.ifge 32 - 31
+        .byte 0
+.endif
+.ifge 32 - 32
+        .byte 1
+.endif
+.ifge 32 - 33
+        .byte 2
+.endif
+
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK-NOT: .byte 2
+.ifgt 32 - 31
+        .byte 0
+.endif
+.ifgt 32 - 32
+        .byte 1
+.endif
+.ifgt 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+# CHECK: .byte 2
+.ifle 32 - 31
+        .byte 0
+.endif
+.ifle 32 - 32
+        .byte 1
+.endif
+.ifle 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK: .byte 2
+.iflt 32 - 31
+        .byte 0
+.endif
+.iflt 32 - 32
+        .byte 1
+.endif
+.iflt 32 - 33
+        .byte 2
+.endif
+
 # CHECK: .byte 1
 # CHECK-NOT: .byte 0
 .ifne 32 - 32
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 9b99e0f..d7290eb 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file "hello"
         .file 1 "worl\144"   # "\144" is "d"
diff --git a/test/MC/AsmParser/directive_line.s b/test/MC/AsmParser/directive_line.s
index 94ce446..110b68a 100644
--- a/test/MC/AsmParser/directive_line.s
+++ b/test/MC/AsmParser/directive_line.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 # FIXME: Actually test the output.
 
         .line
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index cda9579..404ebce 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file 1 "hello"
 # CHECK: .file 1 "hello"
diff --git a/test/MC/AsmParser/directive_seh.s b/test/MC/AsmParser/directive_seh.s
index f6eb970..1821747 100644
--- a/test/MC/AsmParser/directive_seh.s
+++ b/test/MC/AsmParser/directive_seh.s
@@ -1,36 +1,25 @@
 # RUN: llvm-mc -triple x86_64-pc-win32 %s | FileCheck %s
 
-# CHECK: .seh_proc func
-# CHECK: .seh_pushframe @code
-# CHECK: .seh_stackalloc 24
-# CHECK: .seh_savereg %rbp, 16
-# CHECK: .seh_savexmm %r8, 0
-# CHECK: .seh_pushreg %rbx
-# CHECK: .seh_setframe %rbx, 0
-# CHECK: .seh_endprologue
-# CHECK: .seh_handler __C_specific_handler, @except
-# CHECK-NOT: .section{{.*}}.xdata
-# CHECK: .seh_handlerdata
-# CHECK: .text
-# CHECK: .seh_startchained
-# CHECK: .seh_endprologue
-# CHECK: .seh_endchained
-# CHECK: .seh_endproc
-
     .text
     .globl func
     .def func; .scl 2; .type 32; .endef
     .seh_proc func
+# CHECK: .seh_proc func
 func:
     .seh_pushframe @code
+# CHECK: .seh_pushframe @code
     subq $24, %rsp
     .seh_stackalloc 24
+# CHECK: .seh_stackalloc 24
     movq %rsi, 16(%rsp)
     .seh_savereg %rsi, 16
+# CHECK: .seh_savereg 6, 16
     movups %xmm8, (%rsp)
     .seh_savexmm %xmm8, 0
+# CHECK: .seh_savexmm 8, 0
     pushq %rbx
     .seh_pushreg 3
+# CHECK: .seh_pushreg 3
     mov %rsp, %rbx
     .seh_setframe 3, 0
     .seh_endprologue
@@ -41,8 +30,18 @@ func:
     .seh_startchained
     .seh_endprologue
     .seh_endchained
+# CHECK: .seh_setframe 3, 0
+# CHECK: .seh_endprologue
+# CHECK: .seh_handler __C_specific_handler, @except
+# CHECK-NOT: .section{{.*}}.xdata
+# CHECK: .seh_handlerdata
+# CHECK: .text
+# CHECK: .seh_startchained
+# CHECK: .seh_endprologue
+# CHECK: .seh_endchained
     lea (%rbx), %rsp
     pop %rbx
     addq $24, %rsp
     ret
     .seh_endproc
+# CHECK: .seh_endproc
diff --git a/test/MC/AsmParser/if-diagnostics.s b/test/MC/AsmParser/if-diagnostics.s
new file mode 100644
index 0000000..d102a56
--- /dev/null
+++ b/test/MC/AsmParser/if-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple i386 %s -o /dev/null 2>&1 | FileCheck %s
+
+.if
+.endif
+
+// CHECK: error: unknown token in expression
+// CHECK: .if
+// CHECK:   ^
+
+.ifeq 0, 3
+.endif
+
+// CHECK:error: unexpected token in '.if' directive
+// CHECK: .ifeq 0, 3
+// CHECK:        ^
+
+.iflt "string1"
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .iflt "string1"
+// CHECK:       ^
+
+.ifge test
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .ifge test
+// CHECK:       ^
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/AsmParser/lit.local.cfg
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
index b27668e..e3236b0 100644
--- a/test/MC/AsmParser/vararg.s
+++ b/test/MC/AsmParser/vararg.s
@@ -17,6 +17,12 @@
 .endif
 .endm
 
+.macro ifcc4 arg0, arg1:vararg
+.if cc
+            movl \arg1, \arg0
+.endif
+.endm
+
 .text
 
 // CHECK: movl %esp, %ebp
@@ -25,6 +31,8 @@
 // CHECK: movl %ecx, %ebx
 // CHECK: movl %ecx, %eax
 // CHECK: movl %eax, %ecx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
 .set cc,1
   ifcc  movl    %esp, %ebp
         subl $0, %esp
@@ -33,6 +41,8 @@
   ifcc2 %ecx, %ebx
   ifcc3 %ecx %eax
   ifcc3 %eax, %ecx
+  ifcc4 %eax %ecx  ## test
+  ifcc4 %ecx, %eax ## test
 
 // CHECK-NOT movl
 // CHECK: subl $1, %esp
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
index dc4f65a..eb5398a 100644
--- a/test/MC/COFF/alias.s
+++ b/test/MC/COFF/alias.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -t -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - \
+// RUN:   | llvm-readobj -t -r | FileCheck %s
 
 local1:
 external_aliased_to_local = local1
@@ -36,7 +37,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:     AuxSymbolCount: 1
 // CHECK:        }
 // CHECK:        Symbol {
-// CHECK-NEXT:     Name: local1
+// CHECK:          Name: local1
 // CHECK-NEXT:     Value: 0
 // CHECK-NEXT:     Section: .text (1)
 // CHECK-NEXT:     BaseType: Null (0x0)
@@ -89,7 +90,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:     StorageClass: WeakExternal (0x69)
 // CHECK-NEXT:     AuxSymbolCount: 1
 // CHECK-NEXT:     AuxWeakExternal {
-// CHECK-NEXT:       Linked: external2 (9)
+// CHECK-NEXT:       Linked: external2 (13)
 // CHECK-NEXT:       Search: Library (0x2)
 // CHECK-NEXT:       Unused: (00 00 00 00 00 00 00 00 00 00)
 // CHECK-NEXT:     }
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
index 89d1745..38a9e57 100644
--- a/test/MC/COFF/basic-coff-64.s
+++ b/test/MC/COFF/basic-coff-64.s
@@ -25,10 +25,10 @@ _main:                                  # @main
 
 // CHECK: ImageFileHeader {
 // CHECK:   Machine: IMAGE_FILE_MACHINE_AMD64
-// CHECK:   SectionCount: 2
+// CHECK:   SectionCount: 3
 // CHECK:   TimeDateStamp: {{[0-9]+}}
 // CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
-// CHECK:   SymbolCount: 6
+// CHECK:   SymbolCount: 8
 // CHECK:   OptionalHeaderSize: 0
 // CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 9b29970..38bfa6d 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -25,10 +25,10 @@ L_.str:                                 # @.str
 
 // CHECK: ImageFileHeader {
 // CHECK:   Machine: IMAGE_FILE_MACHINE_I386
-// CHECK:   SectionCount: 2
+// CHECK:   SectionCount: 3
 // CHECK:   TimeDateStamp: {{[0-9]+}}
 // CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
-// CHECK:   SymbolCount: 6
+// CHECK:   SymbolCount: 8
 // CHECK:   OptionalHeaderSize: 0
 // CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
diff --git a/test/MC/COFF/early-dce.s b/test/MC/COFF/early-dce.s
new file mode 100644
index 0000000..ec1a9bd
--- /dev/null
+++ b/test/MC/COFF/early-dce.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple i686-windows -g -filetype obj -o - %s \
+# RUN:   | llvm-readobj -s -t | FileCheck %s
+
+	.section .rdata
+
+	.align 8
+	.global data
+data:
+	.quad 0
+
+# CHECK: Sections [
+# CHECK:  Section {
+# CHECK:    Name: .text
+# CHECK:  }
+# CHECK: ]
+
diff --git a/test/MC/COFF/global_ctors_dtors.ll b/test/MC/COFF/global_ctors_dtors.ll
index 046e93a..ca17f24 100644
--- a/test/MC/COFF/global_ctors_dtors.ll
+++ b/test/MC/COFF/global_ctors_dtors.ll
@@ -11,9 +11,10 @@
 
 %ini = type { i32, void()*, i8* }
 
-@llvm.global_ctors = appending global [2 x %ini ] [
+@llvm.global_ctors = appending global [3 x %ini ] [
   %ini { i32 65535, void ()* @a_global_ctor, i8* null },
-  %ini { i32 65535, void ()* @b_global_ctor, i8* bitcast (i32* @b to i8*) }
+  %ini { i32 65535, void ()* @b_global_ctor, i8* bitcast (i32* @b to i8*) },
+  %ini { i32 65535, void ()* @c_global_ctor, i8* bitcast (i32* @c to i8*) }
 ]
 @llvm.global_dtors = appending global [1 x %ini ] [%ini { i32 65535, void ()* @a_global_dtor, i8* null }]
 
@@ -26,11 +27,18 @@ define void @a_global_ctor() nounwind {
 
 @b = global i32 zeroinitializer
 
+@c = available_externally dllimport global i32 zeroinitializer
+
 define void @b_global_ctor() nounwind {
   store i32 42, i32* @b
   ret void
 }
 
+define void @c_global_ctor() nounwind {
+  store i32 42, i32* @c
+  ret void
+}
+
 define void @a_global_dtor() nounwind {
   %1 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str2, i32 0, i32 0))
   ret void
@@ -43,13 +51,15 @@ define i32 @main() nounwind {
 
 ; WIN32: .section .CRT$XCU,"rd"
 ; WIN32: a_global_ctor
-; WIN32: .section .CRT$XCU,"rd",associative .bss,{{_?}}b
+; WIN32: .section .CRT$XCU,"rd",associative,{{_?}}b
 ; WIN32: b_global_ctor
+; WIN32-NOT: c_global_ctor
 ; WIN32: .section .CRT$XTX,"rd"
 ; WIN32: a_global_dtor
 ; MINGW32: .section .ctors,"wd"
 ; MINGW32: a_global_ctor
-; MINGW32: .section .ctors,"wd",associative .bss,{{_?}}b
+; MINGW32: .section .ctors,"wd",associative,{{_?}}b
 ; MINGW32: b_global_ctor
+; MINGW32-NOT: c_global_ctor
 ; MINGW32: .section .dtors,"wd"
 ; MINGW32: a_global_dtor
diff --git a/test/MC/COFF/linker-options.ll b/test/MC/COFF/linker-options.ll
index de11941..0be74e5 100755
--- a/test/MC/COFF/linker-options.ll
+++ b/test/MC/COFF/linker-options.ll
@@ -5,6 +5,7 @@
       metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib" },
       metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib",
                   metadata !"/DEFAULTLIB:secur32.lib" },
+      metadata !{ metadata !"/DEFAULTLIB:C:\5Cpath to\5Casan_rt.lib" },
       metadata !{ metadata !"/with spaces" } } }
 
 !llvm.module.flags = !{ !0 }
@@ -17,5 +18,6 @@ define dllexport void @foo() {
 ; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
 ; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
 ; CHECK: .ascii   " /DEFAULTLIB:secur32.lib"
+; CHECK: .ascii   " \"/DEFAULTLIB:C:\\path to\\asan_rt.lib\""
 ; CHECK: .ascii   " \"/with spaces\""
 ; CHECK: .ascii   " /EXPORT:_foo"
diff --git a/test/MC/COFF/linkonce-invalid.s b/test/MC/COFF/linkonce-invalid.s
index 90ce4a7..cc3a27c 100644
--- a/test/MC/COFF/linkonce-invalid.s
+++ b/test/MC/COFF/linkonce-invalid.s
@@ -19,21 +19,9 @@
 // CHECK: error: unexpected token in directive
 .linkonce discard foo
 
-// CHECK: error: expected associated section name
+// CHECK: error: cannot make section associative with .linkonce
 .linkonce associative
 
-// CHECK: error: cannot associate unknown section 'unknown'
-.linkonce associative unknown
-
-// CHECK: error: cannot associate a section with itself
-.linkonce associative invalid
-
-// CHECK: error: associated section must be a COMDAT section
-.linkonce associative non_comdat
-
-// CHECK: error: associated section cannot be itself associative
-.linkonce associative assoc
-
 // CHECK: error: section 'multi' is already linkonce
 .section multi
 .linkonce discard
diff --git a/test/MC/COFF/linkonce.s b/test/MC/COFF/linkonce.s
index e7b7f47..f2e3506 100644
--- a/test/MC/COFF/linkonce.s
+++ b/test/MC/COFF/linkonce.s
@@ -24,7 +24,6 @@
 .long 1
 
 .section s6
-.linkonce associative s1
 .long 1
 
 .section s7
@@ -39,11 +38,6 @@
 .linkonce discard
 .long 1
 
-// Check that valid '.section' names can be associated.
-.section multi
-.linkonce associative .foo$bar
-.long 1
-
 
 // CHECK: Sections [
 // CHECK:   Section {
@@ -79,7 +73,6 @@
 // CHECK:   Section {
 // CHECK:     Name: s6
 // CHECK:     Characteristics [
-// CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
@@ -94,86 +87,64 @@
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
-// CHECK:   Section {
-// CHECK:     Name: multi
-// CHECK:     Characteristics [
-// CHECK:       IMAGE_SCN_LNK_COMDAT
-// CHECK:     ]
-// CHECK:   }
 // CHECK: ]
 // CHECK: Symbols [
 // CHECK:   Symbol {
 // CHECK:     Name: s1
-// CHECK:     Section: s1 (1)
+// CHECK:     Section: s1 (4)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 1
+// CHECK:       Number: 4
 // CHECK:       Selection: Any (0x2)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s2
-// CHECK:     Section: s2 (2)
+// CHECK:     Section: s2 (5)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 2
+// CHECK:       Number: 5
 // CHECK:       Selection: NoDuplicates (0x1)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s3
-// CHECK:     Section: s3 (3)
+// CHECK:     Section: s3 (6)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 3
+// CHECK:       Number: 6
 // CHECK:       Selection: Any (0x2)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s4
-// CHECK:     Section: s4 (4)
+// CHECK:     Section: s4 (7)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 4
+// CHECK:       Number: 7
 // CHECK:       Selection: SameSize (0x3)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s5
-// CHECK:     Section: s5 (5)
+// CHECK:     Section: s5 (8)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 5
+// CHECK:       Number: 8
 // CHECK:       Selection: ExactMatch (0x4)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s6
-// CHECK:     Section: s6 (6)
-// CHECK:     AuxSectionDef {
-// CHECK:       Number: 1
-// CHECK:       Selection: Associative (0x5)
-// CHECK:       AssocSection: s1
-// CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s7
-// CHECK:     Section: s7 (7)
+// CHECK:     Section: s7 (10)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 7
+// CHECK:       Number: 10
 // CHECK:       Selection: Largest (0x6)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: s8
-// CHECK:     Section: s8 (8)
+// CHECK:     Section: s8 (11)
 // CHECK:     AuxSectionDef {
-// CHECK:       Number: 8
+// CHECK:       Number: 11
 // CHECK:       Selection: Newest (0x7)
 // CHECK:     }
 // CHECK:   }
-// CHECK:   Symbol {
-// CHECK:     Name: multi
-// CHECK:     Value: 0
-// CHECK:     Section: multi (10)
-// CHECK:     AuxSectionDef {
-// CHECK:       Number: 9
-// CHECK:       Selection: Associative (0x5)
-// CHECK:       AssocSection: .foo$bar
-// CHECK:     }
-// CHECK:   }
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/COFF/lit.local.cfg
+++ b/test/MC/COFF/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/COFF/lset0.s b/test/MC/COFF/lset0.s
index f5020c8..7321b01 100755
--- a/test/MC/COFF/lset0.s
+++ b/test/MC/COFF/lset0.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-nm | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-nm - | FileCheck %s
 
 not_global = 123
 global = 456
diff --git a/test/MC/COFF/section-comdat-conflict.s b/test/MC/COFF/section-comdat-conflict.s
new file mode 100644
index 0000000..7ed452a
--- /dev/null
+++ b/test/MC/COFF/section-comdat-conflict.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple i386-pc-win32 -filetype=obj < %s 2>&1 |  FileCheck %s
+
+// CHECK: conflicting sections for symbol
+
+        .section .xyz
+        .global bar
+bar:
+        .long 42
+
+        .section        .abcd,"xr",discard,bar
+        .global foo
+foo:
+        .long 42
diff --git a/test/MC/COFF/section-comdat-conflict2.s b/test/MC/COFF/section-comdat-conflict2.s
new file mode 100644
index 0000000..e2dfc2d
--- /dev/null
+++ b/test/MC/COFF/section-comdat-conflict2.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple i386-pc-win32 -filetype=obj < %s 2>&1 |  FileCheck %s
+
+// CHECK: two sections have the same comdat
+
+        .section        .xyz,"xr",discard,bar
+        .section        .abcd,"xr",discard,bar
diff --git a/test/MC/COFF/section-comdat.s b/test/MC/COFF/section-comdat.s
index dd5be87..e7052d8 100644
--- a/test/MC/COFF/section-comdat.s
+++ b/test/MC/COFF/section-comdat.s
@@ -1,8 +1,7 @@
 // RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
 // RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
 
-.section assocSec
-.linkonce
+.section assocSec, "dr", discard, "assocSym"
 .long 1
 
 .section secName, "dr", discard, "Symbol1"
@@ -25,7 +24,7 @@ Symbol3:
 Symbol4:
 .long 1
 
-.section SecName, "dr", associative assocSec, "Symbol5"
+.section SecName, "dr", associative, "assocSym"
 .globl Symbol5
 Symbol5:
 .long 1
@@ -40,58 +39,63 @@ Symbol6:
 Symbol7:
 .long 1
 
+.section assocSec, "dr", associative, "assocSym"
+.globl Symbol8
+Symbol8:
+.long 1
+
 // CHECK: Sections [
 // CHECK:   Section {
-// CHECK:     Number: 1
+// CHECK:     Number: 4
 // CHECK:     Name: assocSec
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 2
+// CHECK:     Number: 5
 // CHECK:     Name: secName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 3
+// CHECK:     Number: 6
 // CHECK:     Name: secName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 4
+// CHECK:     Number: 7
 // CHECK:     Name: SecName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 5
+// CHECK:     Number: 8
 // CHECK:     Name: SecName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 6
+// CHECK:     Number: 9
 // CHECK:     Name: SecName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 7
+// CHECK:     Number: 10
 // CHECK:     Name: SecName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
 // CHECK:     ]
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 8
+// CHECK:     Number: 11
 // CHECK:     Name: SecName
 // CHECK:     Characteristics [
 // CHECK:       IMAGE_SCN_LNK_COMDAT
@@ -101,88 +105,104 @@ Symbol7:
 // CHECK: Symbols [
 // CHECK:   Symbol {
 // CHECK:     Name: assocSec
-// CHECK:     Section: assocSec (1)
+// CHECK:     Section: assocSec (4)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: Any
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: assocSym
+// CHECK:     Section: assocSec
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: secName
-// CHECK:     Section: secName (2)
+// CHECK:     Section: secName (5)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: Any
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: Symbol1
+// CHECK:     Section: secName (5)
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: secName
-// CHECK:     Section: secName (3)
+// CHECK:     Section: secName (6)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: NoDuplicates
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: Symbol2
+// CHECK:     Section: secName (6)
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: SecName
-// CHECK:     Section: SecName (4)
+// CHECK:     Section: SecName (7)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: SameSize
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: Symbol3
+// CHECK:     Section: SecName (7)
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: SecName
-// CHECK:     Section: SecName (5)
+// CHECK:     Section: SecName (8)
 // CHECK:     AuxSymbolCount: 1
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: ExactMatch
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: Symbol4
+// CHECK:     Section: SecName (8)
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: SecName
-// CHECK:     Section: SecName (6)
+// CHECK:     Section: SecName (9)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: Associative
-// CHECK:       AssocSection: assocSec (1)
+// CHECK:       AssocSection: assocSec (4)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: SecName
-// CHECK:     Section: SecName (7)
+// CHECK:     Section: SecName (10)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: Largest
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
+// CHECK:     Name: Symbol6
+// CHECK:     Section: SecName (10)
+// CHECK:   }
+// CHECK:   Symbol {
 // CHECK:     Name: SecName
-// CHECK:     Section: SecName (8)
+// CHECK:     Section: SecName (11)
 // CHECK:     AuxSectionDef {
 // CHECK:       Selection: Newest (0x7)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
-// CHECK:     Name: Symbol1
-// CHECK:     Section: secName (2)
-// CHECK:   }
-// CHECK:   Symbol {
-// CHECK:     Name: Symbol2
-// CHECK:     Section: secName (3)
-// CHECK:   }
-// CHECK:   Symbol {
-// CHECK:     Name: Symbol3
-// CHECK:     Section: SecName (4)
+// CHECK:     Name: Symbol7
+// CHECK:     Section: SecName (11)
 // CHECK:   }
 // CHECK:   Symbol {
-// CHECK:     Name: Symbol4
-// CHECK:     Section: SecName (5)
+// CHECK:     Name: assocSec
+// CHECK:     Section: assocSec (12)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Associative (0x5)
+// CHECK:       AssocSection: assocSec (4)
+// CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
 // CHECK:     Name: Symbol5
-// CHECK:     Section: SecName (6)
+// CHECK:     Section: SecName (9)
 // CHECK:   }
 // CHECK:   Symbol {
-// CHECK:     Name: Symbol6
-// CHECK:     Section: SecName (7)
-// CHECK:   }
-// CHECK:   Symbol {
-// CHECK:     Name: Symbol7
-// CHECK:     Section: SecName (8)
+// CHECK:     Name: Symbol8
+// CHECK:     Section: assocSec (12)
 // CHECK:   }
 // CHECK: ]
diff --git a/test/MC/COFF/section-name-encoding.s b/test/MC/COFF/section-name-encoding.s
index 74cd490..7edd6d7 100644
--- a/test/MC/COFF/section-name-encoding.s
+++ b/test/MC/COFF/section-name-encoding.s
@@ -10,11 +10,11 @@
 // Raw encoding
 
 // CHECK:   Section {
-// CHECK:     Number: 1
+// CHECK:     Number: 4
 // CHECK:     Name: s (73 00 00 00 00 00 00 00)
 // CHECK:   }
 // CHECK:   Section {
-// CHECK:     Number: 2
+// CHECK:     Number: 5
 // CHECK:     Name: s1234567 (73 31 32 33 34 35 36 37)
 // CHECK:   }
 .section s;        .long 1
@@ -25,7 +25,7 @@
 
 // /4
 // CHECK:   Section {
-// CHECK:     Number: 3
+// CHECK:     Number: 6
 // CHECK:     Name: s12345678 (2F 34 00 00 00 00 00 00)
 // CHECK:   }
 .section s12345678; .long 1
@@ -57,7 +57,7 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 //     "s12345678\0"     # of pad sections
 //
 // CHECK:   Section {
-// CHECK:     Number: 9
+// CHECK:     Number: 12
 // CHECK:     Name: seven_digit (2F 31 30 30 30 30 32 39)
 // CHECK:   }
 .section seven_digit; .long 1
@@ -82,7 +82,7 @@ pad_sections_ex aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 // "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26 56".
 //
 // CHECK:   Section {
-// CHECK:     Number: 15
+// CHECK:     Number: 18
 // CHECK:     Name: double_slash (2F 2F 41 41 6D 4A 61 34)
 // CHECK:   }
 .section double_slash; .long 1
diff --git a/test/MC/COFF/seh-stackalloc-zero.s b/test/MC/COFF/seh-stackalloc-zero.s
new file mode 100644
index 0000000..898ac84
--- /dev/null
+++ b/test/MC/COFF/seh-stackalloc-zero.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple x86_64-pc-win32 -filetype=obj %s -o %t.o 2>&1 | FileCheck %s
+
+// CHECK: Allocation size must be non-zero!
+
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+    .seh_stackalloc 0
+smallFunc:
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index 72d42f4..cd884b4 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -35,13 +35,13 @@
 // CHECK-NEXT: ]
 
 // CHECK-NEXT: Relocations [
-// CHECK-NEXT:   Section (2) .xdata {
+// CHECK-NEXT:   Section (4) .xdata {
 // CHECK-NEXT:     0x14 IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
 // CHECK-NEXT:     0x20 IMAGE_REL_AMD64_ADDR32NB func
 // CHECK-NEXT:     0x24 IMAGE_REL_AMD64_ADDR32NB func
 // CHECK-NEXT:     0x28 IMAGE_REL_AMD64_ADDR32NB .xdata
 // CHECK-NEXT:   }
-// CHECK-NEXT:   Section (3) .pdata {
+// CHECK-NEXT:   Section (5) .pdata {
 // CHECK-NEXT:     0x0 IMAGE_REL_AMD64_ADDR32NB func
 // CHECK-NEXT:     0x4 IMAGE_REL_AMD64_ADDR32NB func
 // CHECK-NEXT:     0x8 IMAGE_REL_AMD64_ADDR32NB .xdata
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
index b824470..deac888 100644
--- a/test/MC/COFF/symbol-fragment-offset-64.s
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -36,10 +36,10 @@ _main:                                  # @main
 
 // CHECK: {
 // CHECK:   Machine:                   IMAGE_FILE_MACHINE_AMD64
-// CHECK:   SectionCount:              2
+// CHECK:   SectionCount:              3
 // CHECK:   TimeDateStamp:             {{[0-9]+}}
 // CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
-// CHECK:   SymbolCount:               7
+// CHECK:   SymbolCount:               9
 // CHECK:   OptionalHeaderSize:        0
 // CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index 71b1703..b09c5af 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -36,10 +36,10 @@ L_.str2:
 
 // CHECK: {
 // CHECK:   Machine:                   IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   SectionCount:              2
+// CHECK:   SectionCount:              3
 // CHECK:   TimeDateStamp:             {{[0-9]+}}
 // CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
-// CHECK:   SymbolCount:               7
+// CHECK:   SymbolCount:               9
 // CHECK:   OptionalHeaderSize:        0
 // CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 397a39e..23da001 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -945,10 +945,15 @@
 # CHECK: cset    x9, pl
 # CHECK: csetm    w20, ne
 # CHECK: csetm    x30, ge
+# "cset w2, nv" and "csetm x3, al" are invalid aliases for these two
+# CHECK: csinc    w2, wzr, wzr, al
+# CHECK: csinv    x3, xzr, xzr, nv
 0xe3 0x17 0x9f 0x1a
 0xe9 0x47 0x9f 0x9a
 0xf4 0x3 0x9f 0x5a
 0xfe 0xb3 0x9f 0xda
+0xe2,0xe7,0x9f,0x1a
+0xe3,0xf3,0x9f,0xda
 
 # CHECK: cinc    w3, w5, gt
 # CHECK: cinc    wzr, w4, le
@@ -956,25 +961,35 @@
 # CHECK: cinc    x3, x5, gt
 # CHECK: cinc    xzr, x4, le
 # CHECK: cset    x9, lt
+# "cinc w5, w6, al" and "cinc x1, x2, nv" are invalid aliases for these two
+# CHECK: csinc   w5, w6, w6, nv
+# CHECK: csinc   x1, x2, x2, al
 0xa3 0xd4 0x85 0x1a
 0x9f 0xc4 0x84 0x1a
 0xe9 0xa7 0x9f 0x1a
 0xa3 0xd4 0x85 0x9a
 0x9f 0xc4 0x84 0x9a
 0xe9 0xa7 0x9f 0x9a
+0xc5,0xf4,0x86,0x1a
+0x41,0xe4,0x82,0x9a
 
 # CHECK: cinv    w3, w5, gt
 # CHECK: cinv    wzr, w4, le
-# CHECK: csetm    w9, lt
+# CHECK: csetm   w9, lt
 # CHECK: cinv    x3, x5, gt
 # CHECK: cinv    xzr, x4, le
-# CHECK: csetm    x9, lt
+# CHECK: csetm   x9, lt
+# "cinv x1, x0, nv" and "cinv w9, w8, al" are invalid aliases for these two
+# CHECK: csinv   x1, x0, x0, al
+# CHECK: csinv   w9, w8, w8, nv
 0xa3 0xd0 0x85 0x5a
 0x9f 0xc0 0x84 0x5a
 0xe9 0xa3 0x9f 0x5a
 0xa3 0xd0 0x85 0xda
 0x9f 0xc0 0x84 0xda
 0xe9 0xa3 0x9f 0xda
+0x01 0xe0 0x80 0xda
+0x09,0xf1,0x88,0x5a
 
 # CHECK: cneg     w3, w5, gt
 # CHECK: cneg     wzr, w4, le
@@ -982,12 +997,17 @@
 # CHECK: cneg     x3, x5, gt
 # CHECK: cneg     xzr, x4, le
 # CHECK: cneg     x9, xzr, lt
+# "cneg x4, x8, nv" and "cneg w5, w6, al" are invalid aliases for these two
+# CHECK: csneg    x4, x8, x8, al
+# CHECK: csinv    w9, w8, w8, nv
 0xa3 0xd4 0x85 0x5a
 0x9f 0xc4 0x84 0x5a
 0xe9 0xa7 0x9f 0x5a
 0xa3 0xd4 0x85 0xda
 0x9f 0xc4 0x84 0xda
 0xe9 0xa7 0x9f 0xda
+0x04,0xe5,0x88,0xda
+0x09,0xf1,0x88,0x5a
 
 #------------------------------------------------------------------------------
 # Data-processing (1 source)
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
index 2c423d1..180bb8a 100644
--- a/test/MC/Disassembler/AArch64/lit.local.cfg
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if 'AArch64' not in targets:
+if 'AArch64' not in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/ARM/hex-immediates.txt b/test/MC/Disassembler/ARM/hex-immediates.txt
index 2634d7e..875d667 100644
--- a/test/MC/Disassembler/ARM/hex-immediates.txt
+++ b/test/MC/Disassembler/ARM/hex-immediates.txt
@@ -1,5 +1,11 @@
-# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -hdis < %s | FileCheck %s
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 --disassemble --print-imm-hex < %s | FileCheck %s
 # CHECK: ldr	r4, [pc, #0x20]
 0x08 0x4c
 # CHECK: sub	sp, #0x84
 0xa1 0xb0
+# CHECK: ldr  r0, [sp, #0xb4]
+0x2d 0x98
+# CHECK: str.w  r8, [sp, #0xb4]
+0xcd 0xf8 0xb4 0x80
+# CHECK: ldr.w  r8, [sp, #0xb4]
+0xdd 0xf8 0xb4 0x80
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/MC/Disassembler/ARM/lit.local.cfg
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/Mips/lit.local.cfg b/test/MC/Disassembler/Mips/lit.local.cfg
index 1fa54b4..a3183a2 100644
--- a/test/MC/Disassembler/Mips/lit.local.cfg
+++ b/test/MC/Disassembler/Mips/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt
index adbcd99..afef8ad 100644
--- a/test/MC/Disassembler/Mips/mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6.txt
@@ -30,6 +30,8 @@
 0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
 0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
 0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
+0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
 0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
 0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
 0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
@@ -40,44 +42,46 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
 0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
 0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
 0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
-0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4
+0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
 0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
 0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
 0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4
+0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
 0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4
-0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
 0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
 0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
 # 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256
@@ -114,3 +118,10 @@
 0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
 0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
 0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
+0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
+0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
+0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt
index f5bb14e..3ddef9ab 100644
--- a/test/MC/Disassembler/Mips/mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6.txt
@@ -30,6 +30,8 @@
 0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
 0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
 0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
+0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
 0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
 0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
 0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
@@ -40,44 +42,46 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
 0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
 0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
 0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
-0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4
+0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
 0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
 0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
 0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4
+0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
 0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4
-0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
 0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
 0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5
 0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660
 0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
@@ -99,10 +103,10 @@
 0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
 0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
 0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
-0x00 0x64 0x10 0xb8 # CHECK: dmul $2, $3, $4
-0x00 0x64 0x10 0xf8 # CHECK: dmuh $2, $3, $4
-0x00 0x64 0x10 0xb9 # CHECK: dmulu $2, $3, $4
-0x00 0x64 0x10 0xf9 # CHECK: dmuhu $2, $3, $4
+0x00 0x64 0x10 0x9c # CHECK: dmul $2, $3, $4
+0x00 0x64 0x10 0xdc # CHECK: dmuh $2, $3, $4
+0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
+0x00 0x64 0x10 0xdd # CHECK: dmuhu $2, $3, $4
 0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
 0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
 0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
@@ -127,3 +131,15 @@
 0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
 0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
 0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
+0xec 0x58 0x3c 0x48 # CHECK: ldpc $2, 123456
+0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
+0x7f 0xe0 0x38 0x37 # CHECK: lld $zero, 112($ra)
+0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
+0x7f 0xaf 0xe6 0xa7 # CHECK: scd $15, -51($sp)
+0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x00 0xc0 0x90 0x53 # CHECK: dclo $18, $6
+0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
diff --git a/test/MC/Disassembler/PowerPC/lit.local.cfg b/test/MC/Disassembler/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/MC/Disassembler/PowerPC/lit.local.cfg
+++ b/test/MC/Disassembler/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/Sparc/lit.local.cfg b/test/MC/Disassembler/Sparc/lit.local.cfg
index 4d344fa..fa6a54e 100644
--- a/test/MC/Disassembler/Sparc/lit.local.cfg
+++ b/test/MC/Disassembler/Sparc/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 1a5634d..54a3c5b 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -3355,6 +3355,24 @@
 # CHECK: ldxbr %f13, %f13
 0xb3 0x45 0x00 0xdd
 
+# CHECK: ldxbra	%f0, 0, %f0, 1
+0xb3 0x45 0x01 0x00
+
+# CHECK: ldxbra	%f0, 0, %f0, 15
+0xb3 0x45 0x0f 0x00
+
+# CHECK: ldxbra	%f0, 0, %f13, 1
+0xb3 0x45 0x01 0x0d
+
+# CHECK: ldxbra	%f0, 15, %f0, 1
+0xb3 0x45 0xf1 0x00
+
+# CHECK: ldxbra	%f4, 5, %f8, 9
+0xb3 0x45 0x59 0x48
+
+# CHECK: ldxbra	%f13, 0, %f0, 1
+0xb3 0x45 0x01 0xd0
+
 # CHECK: ldy %f0, -524288
 0xed 0x00 0x00 0x00 0x80 0x65
 
@@ -3400,6 +3418,24 @@
 # CHECK: ledbr %f15, %f15
 0xb3 0x44 0x00 0xff
 
+# CHECK: ledbra	%f0, 0, %f0, 1
+0xb3 0x44 0x01 0x00
+
+# CHECK: ledbra	%f0, 0, %f0, 15
+0xb3 0x44 0x0f 0x00
+
+# CHECK: ledbra	%f0, 0, %f15, 1
+0xb3 0x44 0x01 0x0f
+
+# CHECK: ledbra	%f0, 15, %f0, 1
+0xb3 0x44 0xf1 0x00
+
+# CHECK: ledbra	%f4, 5, %f6, 7
+0xb3 0x44 0x57 0x46
+
+# CHECK: ledbra	%f15, 0, %f0, 1
+0xb3 0x44 0x01 0xf0
+
 # CHECK: ler %f0, %f9
 0x38 0x09
 
@@ -3448,6 +3484,24 @@
 # CHECK: lexbr %f13, %f13
 0xb3 0x46 0x00 0xdd
 
+# CHECK: lexbra	%f0, 0, %f0, 1
+0xb3 0x46 0x01 0x00
+
+# CHECK: lexbra	%f0, 0, %f0, 15
+0xb3 0x46 0x0f 0x00
+
+# CHECK: lexbra	%f0, 0, %f13, 1
+0xb3 0x46 0x01 0x0d
+
+# CHECK: lexbra	%f0, 15, %f0, 1
+0xb3 0x46 0xf1 0x00
+
+# CHECK: lexbra	%f4, 5, %f8, 9
+0xb3 0x46 0x59 0x48
+
+# CHECK: lexbra	%f13, 0, %f0, 1
+0xb3 0x46 0x01 0xd0
+
 # CHECK: ley %f0, -524288
 0xed 0x00 0x00 0x00 0x80 0x64
 
diff --git a/test/MC/Disassembler/SystemZ/lit.local.cfg b/test/MC/Disassembler/SystemZ/lit.local.cfg
index b12af09..5c02dd3 100644
--- a/test/MC/Disassembler/SystemZ/lit.local.cfg
+++ b/test/MC/Disassembler/SystemZ/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'SystemZ' in targets:
+if not 'SystemZ' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/X86/avx-512.txt b/test/MC/Disassembler/X86/avx-512.txt
index e5ad2a9..b1a8aaf 100644
--- a/test/MC/Disassembler/X86/avx-512.txt
+++ b/test/MC/Disassembler/X86/avx-512.txt
@@ -39,6 +39,12 @@
 # CHECK: vgatherdpd      (%rsi,%ymm0,4), %zmm1 {%k2}
 0x62 0xf2 0xfd 0x4a 0x92 0x0c 0x86
 
+# CHECK: vpslld $16, %zmm21, %zmm22
+0x62 0xb1 0x4d 0x40 0x72 0xf5 0x10
+
+# CHECK: vpord %zmm22, %zmm21, %zmm23
+0x62 0xa1 0x55 0x40 0xeb 0xfe
+
 #####################################################
 #                MASK INSTRUCTIONS                  #
 #####################################################
diff --git a/test/MC/Disassembler/X86/hex-immediates.txt b/test/MC/Disassembler/X86/hex-immediates.txt
index 80d2448..fb76c26 100644
--- a/test/MC/Disassembler/X86/hex-immediates.txt
+++ b/test/MC/Disassembler/X86/hex-immediates.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --hdis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
+# RUN: llvm-mc --print-imm-hex --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
 
 # CHECK: movabsq	$0x7fffffffffffffff, %rcx
 0x48 0xb9 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0x7f
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/Disassembler/X86/lit.local.cfg
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/X86/moffs.txt b/test/MC/Disassembler/X86/moffs.txt
index 67d64e8..dd2664c 100644
--- a/test/MC/Disassembler/X86/moffs.txt
+++ b/test/MC/Disassembler/X86/moffs.txt
@@ -1,6 +1,6 @@
-# RUN: llvm-mc --hdis %s -triple=i686-linux-gnu-code16 | FileCheck --check-prefix=16 %s
-# RUN: llvm-mc --hdis %s -triple=i686-linux-gnu | FileCheck --check-prefix=32 %s
-# RUN: llvm-mc --hdis %s -triple=x86_64-linux-gnu | FileCheck --check-prefix=64 %s
+# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=i686-linux-gnu-code16 | FileCheck --check-prefix=16 %s
+# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=i686-linux-gnu | FileCheck --check-prefix=32 %s
+# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=x86_64-linux-gnu | FileCheck --check-prefix=64 %s
 
 # 16: movb 0x5a5a, %al
 # 32: movb 0x5a5a5a5a, %al
diff --git a/test/MC/Disassembler/XCore/lit.local.cfg b/test/MC/Disassembler/XCore/lit.local.cfg
index 4d17d46..bb48713 100644
--- a/test/MC/Disassembler/XCore/lit.local.cfg
+++ b/test/MC/Disassembler/XCore/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
diff --git a/test/MC/ELF/ARM/bss-non-zero-value.s b/test/MC/ELF/ARM/bss-non-zero-value.s
new file mode 100644
index 0000000..999b8b0
--- /dev/null
+++ b/test/MC/ELF/ARM/bss-non-zero-value.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -filetype=obj -triple arm-linux-gnu %s -o %t 2>%t.out
+// RUN: FileCheck --input-file=%t.out %s
+// CHECK: non-zero initializer found in section '.bss'
+	.bss
+	.globl	a
+	.align	2
+a:
+	.long	1
+	.size	a, 4
diff --git a/test/MC/ELF/ARM/gnu-type-hash-diagnostics.s b/test/MC/ELF/ARM/gnu-type-hash-diagnostics.s
new file mode 100644
index 0000000..eb36475
--- /dev/null
+++ b/test/MC/ELF/ARM/gnu-type-hash-diagnostics.s
@@ -0,0 +1,9 @@
+@ RUN: not llvm-mc -triple arm-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+	.syntax unified
+
+	.type TYPE #32
+// CHECK: error: expected symbol type in directive
+// CHECK: .type TYPE #32
+// CHECK:             ^
+
diff --git a/test/MC/ELF/ARM/gnu-type-hash.s b/test/MC/ELF/ARM/gnu-type-hash.s
new file mode 100644
index 0000000..ae5c47c
--- /dev/null
+++ b/test/MC/ELF/ARM/gnu-type-hash.s
@@ -0,0 +1,16 @@
+@ RUN: llvm-mc -triple arm-elf -filetype asm -o - %s | FileCheck %s
+
+	.syntax unified
+
+	.type TYPE #STT_FUNC
+// CHECK: .type TYPE,%function
+
+	.type type #function
+// CHECK: .type type,%function
+
+	.type comma_TYPE, #STT_FUNC
+// CHECK: .type comma_TYPE,%function
+
+	.type comma_type, #function
+// CHECK: .type comma_type,%function
+
diff --git a/test/MC/ELF/ARM/lit.local.cfg b/test/MC/ELF/ARM/lit.local.cfg
new file mode 100644
index 0000000..d825cc0
--- /dev/null
+++ b/test/MC/ELF/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+# We have to reset config.unsupported here because the parent directory is
+# predicated on 'X86'.
+config.unsupported = not 'ARM' in config.root.targets
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index b3768cb..9d639f7 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -28,7 +28,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 410E1444
 // CHECK-NEXT:       0030: 0E080000 00000000
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index d7a53c4..98caa01 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01010000 00030001 0E080000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index eac2c73..59f7400 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -27,7 +27,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 450E0800
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 00d8b99..178ba32 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410D06 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 36e147f..dfb0d4b 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410C07 08000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 839d671..5394ee4 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00411507 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 951a600..a65b4fc 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00418602 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 4abbb53..9441770 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410906 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 34254c8..0dc69c8 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -31,7 +31,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 24000000 1C000000
 // CHECK-NEXT:       0020: 00000000 05000000 00410E08 410D0641
 // CHECK-NEXT:       0030: 11067F41 0E104186 02000000 00000000
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 3de769f..360e7b0 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 00411106 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 98c759d..3a38948 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 03000000 00410A41 0B000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index d25b5ff..e225797 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 0041C600 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 9f5ae4b..2d37f4d 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410806 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
index 15a79e5..b0ba543 100644
--- a/test/MC/ELF/cfi-sections.s
+++ b/test/MC/ELF/cfi-sections.s
@@ -26,7 +26,7 @@ f2:
 // ELF_64-NEXT:     AddressAlignment: 8
 // ELF_64-NEXT:     EntrySize: 0
 // ELF_64-NEXT:     SectionData (
-// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 01000178 100C0708
+// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 03000178 100C0708
 // ELF_64-NEXT:       0010: 90010000 00000000 14000000 00000000
 // ELF_64-NEXT:       0020: 00000000 00000000 01000000 00000000
 // ELF_64-NEXT:       0030: 14000000 00000000 00000000 00000000
@@ -47,7 +47,7 @@ f2:
 // ELF_32-NEXT:     AddressAlignment: 4
 // ELF_32-NEXT:     EntrySize: 0
 // ELF_32-NEXT:     SectionData (
-// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0100017C 080C0404
+// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0300017C 080C0404
 // ELF_32-NEXT:       0010: 88010000 0C000000 00000000 00000000
 // ELF_32-NEXT:       0020: 01000000 0C000000 00000000 01000000
 // ELF_32-NEXT:       0030: 01000000
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index 0233119..98deb0a 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -23,10 +23,10 @@ g:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5253 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A5253 00017810
 // CHECK-NEXT:       0010: 011B0C07 08900100 10000000 1C000000
 // CHECK-NEXT:       0020: 00000000 00000000 00000000 14000000
-// CHECK-NEXT:       0030: 00000000 017A5200 01781001 1B0C0708
+// CHECK-NEXT:       0030: 00000000 037A5200 01781001 1B0C0708
 // CHECK-NEXT:       0040: 90010000 10000000 1C000000 00000000
 // CHECK-NEXT:       0050: 00000000 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 9773a36..568b315 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410706 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
new file mode 100644
index 0000000..10daa1d
--- /dev/null
+++ b/test/MC/ELF/cfi-version.ll
@@ -0,0 +1,45 @@
+; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
+; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+
+; .debug_frame is not emitted for targeting Windows x64.
+; REQUIRES: debug_frame
+
+; Function Attrs: nounwind
+define i32 @foo() #0 {
+entry:
+  %call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12
+  %add = add nsw i32 %call, 1, !dbg !12
+  ret i32 %add, !dbg !12
+}
+
+declare i32 @bar(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0 "}
+!12 = metadata !{i32 2, i32 0, metadata !4, null}
+
+; DWARF2:      .debug_frame contents:
+; DWARF2:        Version:               1
+; DWARF2-NEXT:   Augmentation:
+
+; DWARF34:      .debug_frame contents:
+; DWARF34:        Version:               3
+; DWARF34-NEXT:   Augmentation:
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index c7d438a..b083901 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00412D00 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 05cb0ae..8662839 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -30,7 +30,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 04000000 00410E10 410A0E08
 // CHECK-NEXT:       0030: 410B0000 00000000
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index fd229b6..21be615 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -234,116 +234,116 @@ f37:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A4C52 00017810
 // CHECK-NEXT:       0010: 02031B0C 07089001 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 04000000 00000000
-// CHECK-NEXT:       0030: 20000000 00000000 017A504C 52000178
+// CHECK-NEXT:       0030: 20000000 00000000 037A504C 52000178
 // CHECK-NEXT:       0040: 100B0000 00000000 00000003 1B0C0708
 // CHECK-NEXT:       0050: 90010000 14000000 28000000 00000000
 // CHECK-NEXT:       0060: 01000000 04000000 00000000 14000000
 // CHECK-NEXT:       0070: 70000000 00000000 01000000 04000000
-// CHECK-NEXT:       0080: 00000000 20000000 00000000 017A504C
+// CHECK-NEXT:       0080: 00000000 20000000 00000000 037A504C
 // CHECK-NEXT:       0090: 52000178 100B0000 00000000 00000002
 // CHECK-NEXT:       00A0: 1B0C0708 90010000 10000000 28000000
 // CHECK-NEXT:       00B0: 00000000 01000000 02000000 18000000
-// CHECK-NEXT:       00C0: 00000000 017A5052 00017810 04020000
+// CHECK-NEXT:       00C0: 00000000 037A5052 00017810 04020000
 // CHECK-NEXT:       00D0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       00E0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       00F0: 00000000 017A5052 00017810 06030000
+// CHECK-NEXT:       00F0: 00000000 037A5052 00017810 06030000
 // CHECK-NEXT:       0100: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0110: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0120: 00000000 017A5052 00017810 0A040000
+// CHECK-NEXT:       0120: 00000000 037A5052 00017810 0A040000
 // CHECK-NEXT:       0130: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0140: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0150: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0150: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0160: 040A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0170: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0180: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0180: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0190: 060B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       01A0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       01B0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       01B0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       01C0: 0A0C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       01D0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       01F0: 00017810 0A080000 00000000 00001B0C
 // CHECK-NEXT:       0200: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0210: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0220: 017A5052 00017810 0A100000 00000000
+// CHECK-NEXT:       0220: 037A5052 00017810 0A100000 00000000
 // CHECK-NEXT:       0230: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0240: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0250: 00000000 017A5052 00017810 04120000
+// CHECK-NEXT:       0250: 00000000 037A5052 00017810 04120000
 // CHECK-NEXT:       0260: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0270: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0280: 00000000 017A5052 00017810 06130000
+// CHECK-NEXT:       0280: 00000000 037A5052 00017810 06130000
 // CHECK-NEXT:       0290: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       02A0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       02B0: 00000000 017A5052 00017810 0A140000
+// CHECK-NEXT:       02B0: 00000000 037A5052 00017810 0A140000
 // CHECK-NEXT:       02C0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       02D0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       02E0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       02E0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       02F0: 041A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0300: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0310: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0310: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0320: 061B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0330: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0340: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0340: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0350: 0A1C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0360: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0370: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0370: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0380: 00017810 0A180000 00000000 00001B0C
 // CHECK-NEXT:       0390: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       03A0: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       03B0: 017A5052 00017810 0A800000 00000000
+// CHECK-NEXT:       03B0: 037A5052 00017810 0A800000 00000000
 // CHECK-NEXT:       03C0: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       03D0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       03E0: 00000000 017A5052 00017810 04820000
+// CHECK-NEXT:       03E0: 00000000 037A5052 00017810 04820000
 // CHECK-NEXT:       03F0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0400: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0410: 00000000 017A5052 00017810 06830000
+// CHECK-NEXT:       0410: 00000000 037A5052 00017810 06830000
 // CHECK-NEXT:       0420: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0430: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0440: 00000000 017A5052 00017810 0A840000
+// CHECK-NEXT:       0440: 00000000 037A5052 00017810 0A840000
 // CHECK-NEXT:       0450: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0460: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0470: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0470: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0480: 048A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0490: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04A0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04A0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04B0: 068B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       04C0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04D0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04D0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04E0: 0A8C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       04F0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0500: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0500: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0510: 00017810 0A880000 00000000 00001B0C
 // CHECK-NEXT:       0520: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0530: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0540: 017A5052 00017810 0A900000 00000000
+// CHECK-NEXT:       0540: 037A5052 00017810 0A900000 00000000
 // CHECK-NEXT:       0550: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0560: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0570: 00000000 017A5052 00017810 04920000
+// CHECK-NEXT:       0570: 00000000 037A5052 00017810 04920000
 // CHECK-NEXT:       0580: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0590: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       05A0: 00000000 017A5052 00017810 06930000
+// CHECK-NEXT:       05A0: 00000000 037A5052 00017810 06930000
 // CHECK-NEXT:       05B0: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       05C0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       05D0: 00000000 017A5052 00017810 0A940000
+// CHECK-NEXT:       05D0: 00000000 037A5052 00017810 0A940000
 // CHECK-NEXT:       05E0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       05F0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0600: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0600: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0610: 049A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0620: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0630: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0630: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0640: 069B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0650: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0660: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0660: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0670: 0A9C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0680: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0690: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0690: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       06A0: 00017810 0A980000 00000000 00001B0C
 // CHECK-NEXT:       06B0: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       06C0: 01000000 00000000 10000000 00000000
-// CHECK-NEXT:       06D0: 017A5200 01781001 1B000000 10000000
+// CHECK-NEXT:       06D0: 037A5200 01781001 1B000000 10000000
 // CHECK-NEXT:       06E0: 18000000 00000000 01000000 00000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/gnu-type-diagnostics.s b/test/MC/ELF/gnu-type-diagnostics.s
new file mode 100644
index 0000000..df87d6d
--- /dev/null
+++ b/test/MC/ELF/gnu-type-diagnostics.s
@@ -0,0 +1,18 @@
+// RUN: not llvm-mc -triple i686-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+	.type TYPE FUNC
+// CHECK: error: unsupported attribute in '.type' directive
+// CHECK: .type TYPE FUNC
+// CHECK:            ^
+
+	.type type stt_func
+// CHECK: error: unsupported attribute in '.type' directive
+// CHECK: .type type stt_func
+// CHECK:            ^
+
+	.type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK:              ^
+
+
diff --git a/test/MC/ELF/gnu-type.s b/test/MC/ELF/gnu-type.s
new file mode 100644
index 0000000..19029e4
--- /dev/null
+++ b/test/MC/ELF/gnu-type.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple i686-elf -filetype asm -o - %s | FileCheck %s
+
+	.type TYPE STT_FUNC
+// CHECK: .type TYPE,@function
+
+	.type comma_TYPE, STT_FUNC
+// CHECK: .type comma_TYPE,@function
+
+	.type at_TYPE, @STT_FUNC
+// CHECK: .type at_TYPE,@function
+
+	.type percent_TYPE, %STT_FUNC
+// CHECK: .type percent_TYPE,@function
+
+	.type string_TYPE, "STT_FUNC"
+// CHECK: .type string_TYPE,@function
+
+	.type type function
+// CHECK: .type type,@function
+
+	.type comma_type, function
+// CHECK: .type comma_type,@function
+
+	.type at_type, @function
+// CHECK: .type at_type,@function
+
+	.type percent_type, %function
+// CHECK: .type percent_type,@function
+
+	.type string_type, "function"
+// CHECK: .type string_type,@function
+
+	.type special gnu_unique_object
+// CHECK: .type special,@gnu_unique_object
+
+	.type comma_special, gnu_unique_object
+// CHECK: .type comma_special,@gnu_unique_object
+
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/ELF/lit.local.cfg
+++ b/test/MC/ELF/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/ELF/no-reloc.s b/test/MC/ELF/no-reloc.s
new file mode 100644
index 0000000..78f1b88
--- /dev/null
+++ b/test/MC/ELF/no-reloc.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
+
+// CHECK: Relocations [
+// CHECK-NEXT: ]
+
+	.section	.test1_foo
+.Ltest1_1:
+.Ltest1_2 = .Ltest1_1
+	.section	.test1_bar
+	.long .Ltest1_1-.Ltest1_2
+
+
+        .section test2
+
+.Ltest2_a:
+.Ltest2_b = .Ltest2_a
+.Ltest2_c:
+.Ltest2_d = .Ltest2_c-.Ltest2_b
+	.long	.Ltest2_d
diff --git a/test/MC/ELF/pr19430.s b/test/MC/ELF/pr19430.s
new file mode 100644
index 0000000..a1e5246
--- /dev/null
+++ b/test/MC/ELF/pr19430.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
+
+// Test that we can use .cfi_startproc without a global symbol.
+
+.text
+.space 1000
+.cfi_startproc
+ .cfi_endproc
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (5) .rela.eh_frame {
+// CHECK-NEXT:     0x20 R_X86_64_PC32 .text 0x3E8
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/AArch64/lit.local.cfg b/test/MC/MachO/AArch64/lit.local.cfg
index 9a66a00..cec29af 100644
--- a/test/MC/MachO/AArch64/lit.local.cfg
+++ b/test/MC/MachO/AArch64/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/MachO/ARM/aliased-symbols.s b/test/MC/MachO/ARM/aliased-symbols.s
new file mode 100644
index 0000000..0b4463d
--- /dev/null
+++ b/test/MC/MachO/ARM/aliased-symbols.s
@@ -0,0 +1,115 @@
+// RUN: llvm-mc -triple thumbv7m-apple-darwin-eabi %s -filetype=obj -o %t
+// RUN:     llvm-readobj -symbols %t | FileCheck %s
+
+        .data
+        var1 = var2
+        .long var1
+        .long var2
+        .long var2 + 4
+defined_early:
+        .long 0
+
+        alias_to_early = defined_early
+        alias_to_late = defined_late
+
+defined_late:
+        .long 0
+
+        .global extern_test
+        extern_test = var2
+
+        alias_to_local = Ltmp0
+Ltmp0:
+
+// CHECK: Symbols [
+
+        // defined_early was defined. Actually has value 0xc.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: defined_early
+// CHECK-NEXT:   Type: Section (0xE)
+// CHECK-NEXT:   Section: __data (0x2)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[DEFINED_EARLY:[0-9A-F]+]]
+// CHECK-NEXT: }
+
+        // alias_to_early was an alias to defined_early. But we can resolve it.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: alias_to_early
+// CHECK-NEXT:   Type: Section (0xE)
+// CHECK-NEXT:   Section: __data (0x2)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[DEFINED_EARLY]]
+// CHECK-NEXT: }
+
+        // defined_late was defined. Just after defined_early.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: defined_late
+// CHECK-NEXT:   Type: Section (0xE)
+// CHECK-NEXT:   Section: __data (0x2)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[DEFINED_LATE:[0-9A-F]+]]
+// CHECK-NEXT: }
+
+        // alias_to_late was an alias to defined_late. But we can resolve it.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: alias_to_late
+// CHECK-NEXT:   Type: Section (0xE)
+// CHECK-NEXT:   Section: __data (0x2)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[DEFINED_LATE]]
+// CHECK-NEXT: }
+
+        // alias_to_local is an alias, but what it points to has no
+        // MachO representation. We must resolve it.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: alias_to_local (37)
+// CHECK-NEXT:   Type: Section (0xE)
+// CHECK-NEXT:   Section:  (0x0)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x14
+// CHECK-NEXT: }
+
+        // extern_test was a pure alias to the unknown "var2".
+        // N_INDR and Extern.
+// CHECK:   Name: extern_test
+// CHECK-NEXT:   Extern
+// CHECK-NEXT:   Type: Indirect (0xA)
+// CHECK-NEXT:   Section:  (0x0)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[VAR2_STRINGINDEX:[0-9a-f]+]]
+// CHECK-NEXT: }
+
+        // var1 was another alias to an unknown variable. Not extern this time.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: var1 (1)
+// CHECK-NEXT:   Type: Indirect (0xA)
+// CHECK-NEXT:   Section:  (0x0)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x[[VAR2_STRINGINDEX]]
+// CHECK-NEXT: }
+
+        // var2 was a normal undefined (extern) symbol.
+// CHECK: Symbol {
+// CHECK-NEXT:   Name: var2
+// CHECK-NEXT:   Extern
+// CHECK-NEXT:   Type: Undef (0x0)
+// CHECK-NEXT:   Section:  (0x0)
+// CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Value: 0x0
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/MC/MachO/ARM/lit.local.cfg
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/MachO/eh-frame-reloc.s b/test/MC/MachO/eh-frame-reloc.s
index 1b1c674..e14825b 100644
--- a/test/MC/MachO/eh-frame-reloc.s
+++ b/test/MC/MachO/eh-frame-reloc.s
@@ -1,5 +1,6 @@
 // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.7 -filetype=obj | llvm-readobj -r | FileCheck %s
 // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.6 -filetype=obj | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc < %s -triple=x86_64-apple-ios7.0.0 -filetype=obj | llvm-readobj -r | FileCheck %s
 // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.5 -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=OLD64 %s
 // RUN: llvm-mc < %s -triple=i686-apple-macosx10.6 -filetype=obj | llvm-readobj -r | FileCheck %s
 // RUN: llvm-mc < %s -triple=i686-apple-macosx10.5 -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=OLD32 %s
@@ -16,14 +17,14 @@ _bar:
 
 // OLD32:      Relocations [
 // OLD32-NEXT:   Section __eh_frame {
-// OLD32-NEXT:     0x20 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 -
-// OLD32-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 -
+// OLD32-NEXT:     0x20 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// OLD32-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x20
 // OLD32-NEXT:   }
 // OLD32-NEXT: ]
 
 // OLD64:      Relocations [
 // OLD64-NEXT:   Section __eh_frame {
-// OLD64-NEXT:     0x20 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _bar.eh
+// OLD64-NEXT:     0x20 0 3 0 X86_64_RELOC_SUBTRACTOR 0
 // OLD64-NEXT:     0x20 0 3 1 X86_64_RELOC_UNSIGNED 0 _bar
 // OLD64-NEXT:   }
 // OLD64-NEXT: ]
diff --git a/test/MC/MachO/eh-symbols.s b/test/MC/MachO/eh-symbols.s
deleted file mode 100644
index 6adca56..0000000
--- a/test/MC/MachO/eh-symbols.s
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: llvm-mc -filetype=obj -triple i686-apple-darwin %s  -o - | llvm-readobj -t | FileCheck %s
-
-// Make sure that the exception handling data has the same visibility as the
-// function it's generated for.
-
-	.private_extern	_main
-	.globl	_main
-_main:
-	.cfi_startproc
-	retl
-	.cfi_endproc
-
-"_-[NSString(local) isNullOrNil]":
-	.cfi_startproc
-	retl
-	.cfi_endproc
-
-// CHECK: Name: _-[NSString(local) isNullOrNil].eh
-
-// CHECK:       Name: _main
-// CHECK-NEXT:  PrivateExtern
-
-// CHECK:       Name: _main.eh
-// CHECK-NEXT:  PrivateExtern
-
diff --git a/test/MC/MachO/eh_symbol.s b/test/MC/MachO/eh_symbol.s
index 1135196..738e2b6 100644
--- a/test/MC/MachO/eh_symbol.s
+++ b/test/MC/MachO/eh_symbol.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-nm | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-nm - | FileCheck %s
 
 // test that we don't produce foo.eh symbols in a debug_frame section.
 // CHECK-NOT: _f.eh
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/MachO/lit.local.cfg
+++ b/test/MC/MachO/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/MachO/pr19185.s b/test/MC/MachO/pr19185.s
new file mode 100644
index 0000000..fb21e51
--- /dev/null
+++ b/test/MC/MachO/pr19185.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o %t.o
+f:
+ .cfi_startproc
+ .cfi_endproc
+
+EH_frame0:
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index 8eeb82f..a7fa45d 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -202,10 +202,10 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 8
 // CHECK-I386:    (('n_strx', 1)
-// CHECK-I386:     ('n_type', 0x1)
+// CHECK-I386:     ('n_type', 0xb)
 // CHECK-I386:     ('n_sect', 0)
 // CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 0)
+// CHECK-I386:     ('n_value', 4)
 // CHECK-I386:     ('_string', 'd2')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 9
@@ -403,10 +403,10 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 8
 // CHECK-X86_64:    (('n_strx', 1)
-// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_type', 0xb)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('n_value', 4)
 // CHECK-X86_64:     ('_string', 'd2')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 9
diff --git a/test/MC/Mips/cpsetup-bad.s b/test/MC/Mips/cpsetup-bad.s
new file mode 100644
index 0000000..09252a1
--- /dev/null
+++ b/test/MC/Mips/cpsetup-bad.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -triple mips64-unknown-unknown 2>%t1
+# RUN:   FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .option pic2
+t1:
+        .cpsetup $bar, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: expected register containing function address
+        .cpsetup $33, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: invalid register
+        .cpsetup $31, foo, __cerror
+# ASM: :[[@LINE-1]]:23: error: expected save register or stack offset
+        .cpsetup $31, $32, __cerror
+# ASM: :[[@LINE-1]]:23: error: invalid register
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
index 1671598..d6b9cf0 100644
--- a/test/MC/Mips/eh-frame.s
+++ b/test/MC/Mips/eh-frame.s
@@ -31,7 +31,7 @@ func:
 // MIPS32: 00000000
 
 // Version
-// MIPS32: 01
+// MIPS32: 03
 
 // Augmentation String
 // MIPS32: 7a5200
@@ -67,7 +67,7 @@ func:
 // MIPS32EL: 00000000
 
 // Version
-// MIPS32EL: 01
+// MIPS32EL: 03
 
 // Augmentation String
 // MIPS32EL: 7a5200
@@ -103,7 +103,7 @@ func:
 // MIPS64: 00000000
 
 // Version
-// MIPS64: 01
+// MIPS64: 03
 
 // Augmentation String
 // MIPS64: 7a5200
@@ -141,7 +141,7 @@ func:
 // MIPS64EL: 00000000
 
 // Version
-// MIPS64EL: 01
+// MIPS64EL: 03
 
 // Augmentation String
 // MIPS64EL: 7a5200
diff --git a/test/MC/Mips/elf_eflags.s b/test/MC/Mips/elf_eflags.s
index 8cf4960..36f4f9e 100644
--- a/test/MC/Mips/elf_eflags.s
+++ b/test/MC/Mips/elf_eflags.s
@@ -1,6 +1,12 @@
 # These *MUST* match the output of gas compiled with the same triple and
 # corresponding options (-mcpu=mips32 -> -mips32 for example).
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64r6 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64R6 %s
+# MIPSEL-MIPS64R6: Flags [ (0xA0001500)
+
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64r6 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64R6-NAN2008 %s
+# MIPSEL-MIPS64R6-NAN2008: Flags [ (0xA0001500)
+
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64r2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64R2 %s
 # MIPSEL-MIPS64R2: Flags [ (0x80001100)
 
@@ -13,6 +19,12 @@
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64-NAN2008 %s
 # MIPSEL-MIPS64-NAN2008: Flags [ (0x60001500)
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32r6 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32R6 %s
+# MIPSEL-MIPS32R6: Flags [ (0x90001400)
+
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32r6 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32R6-NAN2008 %s
+# MIPSEL-MIPS32R6-NAN2008: Flags [ (0x90001400)
+
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32r2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32R2 %s
 # MIPSEL-MIPS32R2: Flags [ (0x70001000)
 
@@ -55,12 +67,36 @@
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=-n64,o32,+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-O32-NAN2008 %s
 # MIPS64EL-MIPS64R2-O32-NAN2008: Flags [ (0x80001500)
 
+# RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips5 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS5 %s
+# MIPS5: Flags [ (0x40000000)
+
+ # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips5 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS5-NAN2008 %s
+# MIPS5-NAN2008: Flags [ (0x40000400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips4 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS4 %s
 # MIPS4: Flags [ (0x30000000)
 
  # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips4 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS4-NAN2008 %s
 # MIPS4-NAN2008: Flags [ (0x30000400)
 
+# RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips3 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS3 %s
+# MIPS3: Flags [ (0x20000000)
+
+ # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips3 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS3-NAN2008 %s
+# MIPS3-NAN2008: Flags [ (0x20000400)
+
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS2 %s
+# MIPSEL-MIPS2: Flags [ (0x10001000)
+
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips2 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS2-NAN2008 %s
+# MIPSEL-MIPS2-NAN2008: Flags [ (0x10001400)
+
+# RUN: llvm-mc -filetype=obj -triple mips-unknown-linux -mcpu=mips1 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS1 %s
+# MIPS1: Flags [ (0x1000)
+
+ # RUN: llvm-mc -filetype=obj -triple mips-unknown-linux -mcpu=mips1 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS1-NAN2008 %s
+# MIPS1-NAN2008: Flags [ (0x1400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -mattr=-n64,o32 -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-O32 %s
 # MIPS64EL-MIPS64-O32: Flags [ (0x60001100)
 
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
index 1fa54b4..a3183a2 100644
--- a/test/MC/Mips/lit.local.cfg
+++ b/test/MC/Mips/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/Mips/mips-abi-bad.s b/test/MC/Mips/mips-abi-bad.s
new file mode 100644
index 0000000..c4653cf
--- /dev/null
+++ b/test/MC/Mips/mips-abi-bad.s
@@ -0,0 +1,20 @@
+# Error checking for malformed abi related directives
+# RUN: not llvm-mc -triple mips-unknown-unknown %s 2>&1 | FileCheck %s
+# CHECK: .text
+    .module fp=3
+# CHECK      : mips-abi-bad.s:4:16: error: unsupported option
+# CHECK-NEXT : .module fp=3
+# CHECK-NEXT :           ^
+
+    .set fp=xx,6
+# CHECK      :mips-abi-bad.s:5:15: error: unexpected token in statement
+# CHECK-NEXT :    .set fp=xx,6
+# CHECK-NEXT :              ^
+
+# CHECK       :.set mips16
+    .set mips16
+    .module fp=32
+
+# CHECK      :mips-abi-bad.s:14:13: error: .module directive must come before any code
+# CHECK-NEXT :    .module fp=32
+# CHECK-NEXT :            ^
diff --git a/test/MC/Mips/mips-data-directives.s b/test/MC/Mips/mips-data-directives.s
index 630a807..8b3e0b3 100644
--- a/test/MC/Mips/mips-data-directives.s
+++ b/test/MC/Mips/mips-data-directives.s
@@ -12,7 +12,7 @@
 
 # Checking if the data and reloations were correctly emitted
 # CHECK-OBJ:  Section {
-# CHECK-OBJ:    Name: .data (51)
+# CHECK-OBJ:    Name: .data (66)
 # CHECK-OBJ:    SectionData (
 # CHECK-OBJ:      0000: DEADC0DE DEADC0DE DEADBEEF 00000000
 # CHECK-OBJ:      0010: 00000000 00000000
@@ -20,7 +20,7 @@
 # CHECK-OBJ:  }
 
 # CHECK-OBJ:  Section {
-# CHECK-OBJ:    Name: .rel.data (47)
+# CHECK-OBJ:    Name: .rel.data (62)
 # CHECK-OBJ:    Relocations [
 # CHECK-OBJ:      0xC R_MIPS_32 .data 0x0
 # CHECK-OBJ:      0x10 R_MIPS_64 .data 0x0
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
new file mode 100644
index 0000000..a137deb
--- /dev/null
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .text
+        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
+        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 1622965..f0a04a5 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -8,6 +8,8 @@
 # CHECK: addiu   $6, $zero, -2345    # encoding: [0xd7,0xf6,0x06,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK: addiu   $8, $zero, -8       # encoding: [0xf8,0xff,0x08,0x24]
+
 # CHECK: addiu   $4, $zero, 20       # encoding: [0x14,0x00,0x04,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
@@ -32,17 +34,28 @@
 # CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
 # CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: ldc1    $f0, %lo(symbol)($1)
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: sdc1    $f0, %lo(symbol)($1)
+
     li $5,123
     li $6,-2345
     li $7,65538
+    li $8, ~7
 
     la $a0, 20
     la $7,65538
     la $a0, 20($a1)
     la $7,65538($8)
 
+    .set noat
     lw  $t2, symbol($a0)
+    .set at
     sw  $t2, symbol($t1)
 
     lw  $t2, 655483($a0)
     sw  $t2, 123456($t1)
+
+    ldc1 $f0, symbol
+    sdc1 $f0, symbol
diff --git a/test/MC/Mips/mips-noat.s b/test/MC/Mips/mips-noat.s
index b83c517..07db251 100644
--- a/test/MC/Mips/mips-noat.s
+++ b/test/MC/Mips/mips-noat.s
@@ -10,11 +10,10 @@
 test1:
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test2:
         .set noat
-        lw      $2, 65536($2)
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
+
 
 # Can we switch it back on successfully?
 # CHECK-LABEL: test3:
@@ -25,10 +24,6 @@ test3:
         .set at
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test4:
         .set at=$0
-        lw      $2, 65536($2)
-
-# ERROR-NOT: error
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
diff --git a/test/MC/Mips/mips1/invalid-mips2.s b/test/MC/Mips/mips1/invalid-mips2.s
index 6c3e80a..7db261d 100644
--- a/test/MC/Mips/mips1/invalid-mips2.s
+++ b/test/MC/Mips/mips1/invalid-mips2.s
@@ -21,3 +21,4 @@
         tnei      $t4,-29647      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.d $f22,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.s $f28,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips32.s b/test/MC/Mips/mips1/invalid-mips32.s
new file mode 100644
index 0000000..4ad8d63
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips4.s b/test/MC/Mips/mips1/invalid-mips4.s
index 61aaf58..9f246bc 100644
--- a/test/MC/Mips/mips1/invalid-mips4.s
+++ b/test/MC/Mips/mips1/invalid-mips4.s
@@ -4,7 +4,9 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -50,15 +52,20 @@
         floor.w.s $f8,$f9           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1     $f8,$s7($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f26,$f20,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips5.s b/test/MC/Mips/mips1/invalid-mips5.s
index 1eddf02..af5b278 100644
--- a/test/MC/Mips/mips1/invalid-mips5.s
+++ b/test/MC/Mips/mips1/invalid-mips5.s
@@ -4,7 +4,9 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -49,15 +51,20 @@
         ldxc1     $f8,$s7($t3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1     $f19,$s6($s5)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$a0,$fcc7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$a3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index 473e6b9..66e11ba 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -9,8 +9,18 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -36,7 +46,7 @@
         li        $zero,-29889
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwc3      $10,-32265($k0)
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
@@ -65,6 +75,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         sb        $s6,-19857($14)
         sh        $14,-6704($15)
         sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
@@ -91,7 +102,7 @@
         subu      $sp,$s6,$s6
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swc3      $10,-32265($k0)
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
diff --git a/test/MC/Mips/mips2/invalid-mips3-wrong-error.s b/test/MC/Mips/mips2/invalid-mips3-wrong-error.s
index a3f829b..3eb4ef3 100644
--- a/test/MC/Mips/mips2/invalid-mips3-wrong-error.s
+++ b/test/MC/Mips/mips2/invalid-mips3-wrong-error.s
@@ -7,7 +7,6 @@
 
 	.set noat
         dmult     $s7,$a5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        dsub      $a3,$s6,$a4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ld        $sp,-28645($s1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldl       $t8,-4167($t8)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldr       $t2,-30358($s4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips2/invalid-mips3.s b/test/MC/Mips/mips2/invalid-mips3.s
index ef498d7..458c416 100644
--- a/test/MC/Mips/mips2/invalid-mips3.s
+++ b/test/MC/Mips/mips2/invalid-mips3.s
@@ -38,6 +38,7 @@
         dsrl32     $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsrl32     $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsrlv      $s3,$t2,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub       $a3,$s6,$a4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsubu      $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         eret                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.l.d  $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32.s b/test/MC/Mips/mips2/invalid-mips32.s
index 2975c68..43ea345 100644
--- a/test/MC/Mips/mips2/invalid-mips32.s
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@@ -1,28 +1,38 @@
 # Instructions that are invalid
 #
-# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         clo       $11,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         clz       $sp,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         deret                     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         eret                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jr.hb     $4              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jalr.hb   $4              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jalr.hb   $4, $5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $s6,$13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $zero,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         maddu     $s3,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         maddu     $24,$s2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfc0      $a2,$14,1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -30,3 +40,5 @@
         msubu     $15,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      0               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      1               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32r2.s b/test/MC/Mips/mips2/invalid-mips32r2.s
index 37f2eed..72a570a 100644
--- a/test/MC/Mips/mips2/invalid-mips32r2.s
+++ b/test/MC/Mips/mips2/invalid-mips32r2.s
@@ -1,10 +1,12 @@
 # Instructions that are invalid
 #
-# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         clo     $t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         clz     $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -24,15 +26,20 @@
         maddu   $t8,$s2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfc0    $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf    $gp,$t0,$fcc7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf    $gp,$8,$fcc0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf    $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d  $f6,$f11,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s  $f23,$f5,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn    $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d  $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s  $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt    $zero,$s4,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d  $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s  $f30,$f2,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz    $a1,$s6,$t1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d  $f12,$f29,$t1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s  $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips4.s b/test/MC/Mips/mips2/invalid-mips4.s
index e2eb672..13923f0 100644
--- a/test/MC/Mips/mips2/invalid-mips4.s
+++ b/test/MC/Mips/mips2/invalid-mips4.s
@@ -4,7 +4,9 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -46,15 +48,20 @@
         floor.l.s $f12,$f5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips5.s b/test/MC/Mips/mips2/invalid-mips5.s
index f777ffe..8f460c7 100644
--- a/test/MC/Mips/mips2/invalid-mips5.s
+++ b/test/MC/Mips/mips2/invalid-mips5.s
@@ -4,7 +4,9 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -45,15 +47,20 @@
         ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$a0,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index e3effde..9c3706e 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -9,8 +9,18 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -35,16 +45,16 @@
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldc3      $29,-28645($s1)
         lh        $11,-8556($s5)
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwc3      $10,-32265($k0)
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
@@ -73,12 +83,13 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdc3      $12,5835($10)
         sh        $14,-6704($15)
         sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
@@ -107,10 +118,11 @@
         subu      $sp,$s6,$s6
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swc3      $10,-32265($k0)
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips3/invalid-mips32.s b/test/MC/Mips/mips3/invalid-mips32.s
new file mode 100644
index 0000000..3acd765
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/invalid-mips4.s b/test/MC/Mips/mips3/invalid-mips4.s
index 6e15d79..9cd92d3 100644
--- a/test/MC/Mips/mips3/invalid-mips4.s
+++ b/test/MC/Mips/mips3/invalid-mips4.s
@@ -4,20 +4,27 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movz      $a1,$s6,$9     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movz.d    $f12,$f29,$9   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$10($14)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$12($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/invalid-mips5.s b/test/MC/Mips/mips3/invalid-mips5.s
index d25621b..307eee8 100644
--- a/test/MC/Mips/mips3/invalid-mips5.s
+++ b/test/MC/Mips/mips3/invalid-mips5.s
@@ -4,19 +4,26 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-	.set noat
+        .set noat
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$a4,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         movz      $a1,$s6,$a5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 2067666..cb209fd 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -9,8 +9,19 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -32,7 +43,11 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         dadd      $s3,$at,$ra
+        dadd      $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        dadd      $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddi     $sp,$s4,-27705
+        daddi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
         ddiv      $zero,$k0,$s3
@@ -64,6 +79,10 @@
         dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
         dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
         dsub      $a3,$s6,$8
+        dsub      $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsub      $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
@@ -75,18 +94,18 @@
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldl       $24,-4167($24)
         ldr       $14,-30358($s4)
         lh        $11,-8556($s5)
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
-        lld       $zero,-14736($ra)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
+        lld       $zero,-14736($ra)    # CHECK: lld $zero, -14736($ra) # encoding: [0xd3,0xe0,0xc6,0x70]
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwu       $s3,-24086($v1)
@@ -117,16 +136,17 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
-        scd       $15,-8243($sp)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        scd       $15,-8243($sp)       # CHECK: scd $15, -8243($sp)    # encoding: [0xf3,0xaf,0xdf,0xcd]
         sd        $12,5835($10)
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdl       $a3,-20961($s8)
         sdr       $11,-20423($12)
         sh        $14,-6704($15)
@@ -156,9 +176,10 @@
         subu      $sp,$s6,$s6
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32/abiflags.s b/test/MC/Mips/mips32/abiflags.s
new file mode 100644
index 0000000..896dd84
--- /dev/null
+++ b/test/MC/Mips/mips32/abiflags.s
@@ -0,0 +1,37 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=32
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002001 01010001 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=32
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index bc29bdc..d330905 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -18,8 +31,8 @@
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
-        clo       $11,$a1
-        clz       $sp,$gp
+        clo       $11,$a1              # CHECK: clo $11, $5   # encoding: [0x70,0xab,0x58,0x21]
+        clz       $sp,$gp              # CHECK: clz $sp, $gp  # encoding: [0x73,0x9d,0xe8,0x20]
         ctc1      $a2,$26
         cvt.d.s   $f22,$f28
         cvt.d.w   $f26,$f11
@@ -39,15 +52,15 @@
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         lh        $11,-8556($s5)
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         madd      $s6,$13
@@ -96,12 +109,16 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        sdbbp                          # CHECK: sdbbp                  # encoding: [0x70,0x00,0x00,0x3f]
+        sdbbp     34                   # CHECK: sdbbp 34               # encoding: [0x70,0x00,0x08,0xbf]
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sh        $14,-6704($15)
         sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
         sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
@@ -129,9 +146,11 @@
         subu      $sp,$s6,$s6
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r2/abiflags.s b/test/MC/Mips/mips32r2/abiflags.s
new file mode 100644
index 0000000..41a809a
--- /dev/null
+++ b/test/MC/Mips/mips32r2/abiflags.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=32
+# CHECK-ASM: .set fp=64
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002002 01010001 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=32
+        .set fp=64
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/mips32r2/invalid.s b/test/MC/Mips/mips32r2/invalid.s
new file mode 100644
index 0000000..ebccc43
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid.s
@@ -0,0 +1,10 @@
+# Instructions that are valid for the current ISA but should be rejected by the assembler (e.g.
+# invalid set of operands or operand's restrictions not met).
+
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .set noreorder
+        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 26f8b6b..631c691 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -18,8 +31,8 @@
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
-        clo       $11,$a1
-        clz       $sp,$gp
+        clo       $11,$a1              # CHECK: clo $11, $5   # encoding: [0x70,0xab,0x58,0x21]
+        clz       $sp,$gp              # CHECK: clz $sp, $gp  # encoding: [0x73,0x9d,0xe8,0x20]
         ctc1      $a2,$26
         cvt.d.s   $f22,$f28
         cvt.d.w   $f26,$f11
@@ -40,20 +53,23 @@
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
+        jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
+        jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldxc1     $f8,$s7($15)
         lh        $11,-8556($s5)
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
         luxc1     $f19,$s6($s5)
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwxc1     $f12,$s1($s8)
@@ -113,7 +129,9 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11              
         rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
         rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
@@ -121,9 +139,11 @@
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        sdbbp                          # CHECK: sdbbp                  # encoding: [0x70,0x00,0x00,0x3f]
+        sdbbp     34                   # CHECK: sdbbp 34               # encoding: [0x70,0x00,0x08,0xbf]
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdxc1     $f11,$10($14)
         seb       $25,$15
         seh       $v1,$12
@@ -155,10 +175,12 @@
         suxc1     $f12,$k1($13)
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
index aee068a..52fa5f5 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips1.s b/test/MC/Mips/mips32r6/invalid-mips1.s
index aa7d407..44d4fbb 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1.s
@@ -6,3 +6,19 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.ngl.d   $f29,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.ngle.d  $f0,$f16            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.sf.d    $f30,$f0            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.sf.s    $f14,$f22           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips32r6/invalid-mips2.s b/test/MC/Mips/mips32r6/invalid-mips2.s
index 0638e78..bfa2c4c 100644
--- a/test/MC/Mips/mips32r6/invalid-mips2.s
+++ b/test/MC/Mips/mips32r6/invalid-mips2.s
@@ -6,9 +6,21 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
index e416a20..e63bdd4 100644
--- a/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
@@ -10,6 +10,10 @@
         bc1tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         bc1fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips32.s b/test/MC/Mips/mips32r6/invalid-mips32.s
new file mode 100644
index 0000000..e0889ea
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips32.s
@@ -0,0 +1,25 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        madd      $s6,$13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $zero,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $s3,$gp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $24,$s2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub      $s7,$k1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu     $15,$a1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips32r2.s b/test/MC/Mips/mips32r6/invalid-mips32r2.s
new file mode 100644
index 0000000..25694e3
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips32r2.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        madd.d    $f18,$f19,$f26,$f20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd.s    $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.d    $f10,$f1,$f31,$f18  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.s    $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.d   $f18,$f9,$f14,$f19  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.s   $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.d   $f30,$f8,$f16,$f30  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.s   $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
new file mode 100644
index 0000000..f3131a9
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
@@ -0,0 +1,21 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4.s b/test/MC/Mips/mips32r6/invalid-mips4.s
new file mode 100644
index 0000000..8ba2ed8
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips4.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldxc1     $f8,$s7($15)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$10($14)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$12($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..99d10c3
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid but currently emit the wrong error message.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        bc1any2f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any2t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any4f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any4t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips5.s b/test/MC/Mips/mips32r6/invalid-mips5.s
new file mode 100644
index 0000000..63f1cca
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips5.s
@@ -0,0 +1,9 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        luxc1     $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($13)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid.s b/test/MC/Mips/mips32r6/invalid.s
new file mode 100644
index 0000000..82cb5ab
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid.s
@@ -0,0 +1,14 @@
+# Instructions that are available for the current ISA but should be rejected by
+# the assembler (e.g. invalid set of operands or operand's restrictions not met).
+
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r6 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .set noreorder
+        .set noat
+        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        ldc2    $8,-21181($at)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdc2    $20,23157($s2)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swc2    $25,24880($s0)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/relocations.s b/test/MC/Mips/mips32r6/relocations.s
index 4532e42..13b3387 100644
--- a/test/MC/Mips/mips32r6/relocations.s
+++ b/test/MC/Mips/mips32r6/relocations.s
@@ -5,6 +5,9 @@
 #------------------------------------------------------------------------------
 # Check that the assembler can handle the documented syntax for fixups.
 #------------------------------------------------------------------------------
+# CHECK-FIXUP: addiupc $2, bar  # encoding: [0xec,0b01000AAA,A,A]
+# CHECK-FIXUP:                  # fixup A - offset: 0,
+# CHECK-FIXUP:                    value: bar, kind: fixup_MIPS_PC19_S2
 # CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
 # CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
@@ -31,20 +34,30 @@
 # CHECK-FIXUP:                              #   fixup A - offset: 0,
 # CHECK-FIXUP:                                  value: bar@PCREL_LO16,
 # CHECK-FIXUP:                                  kind: fixup_MIPS_PCLO16
+# CHECK-FIXUP: lwpc    $2, bar  # encoding: [0xec,0b01001AAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
+# CHECK-FIXUP: lwupc   $2, bar  # encoding: [0xec,0b01010AAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
 #------------------------------------------------------------------------------
 # Check that the appropriate relocations were created.
 #------------------------------------------------------------------------------
 # CHECK-ELF: Relocations [
-# CHECK-ELF:     0x0 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x0 R_MIPS_PC19_S2 bar 0x0
 # CHECK-ELF:     0x4 R_MIPS_PC16 bar 0x0
-# CHECK-ELF:     0x8 R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0x8 R_MIPS_PC16 bar 0x0
 # CHECK-ELF:     0xC R_MIPS_PC21_S2 bar 0x0
-# CHECK-ELF:     0x10 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x10 R_MIPS_PC21_S2 bar 0x0
 # CHECK-ELF:     0x14 R_MIPS_PC26_S2 bar 0x0
-# CHECK-ELF:     0x18 R_MIPS_PCHI16 bar 0x0
-# CHECK-ELF:     0x1C R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF:     0x18 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x1C R_MIPS_PCHI16 bar 0x0
+# CHECK-ELF:     0x20 R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF:     0x24 R_MIPS_PC19_S2 bar 0x0
+# CHECK-ELF:     0x28 R_MIPS_PC19_S2 bar 0x0
 # CHECK-ELF: ]
 
+  addiupc   $2,bar
   beqc  $5, $6, bar
   bnec  $5, $6, bar
   beqzc $9, bar
@@ -53,3 +66,5 @@
   bc    bar
   aluipc $2, %pcrel_hi(bar)
   addiu  $2, $2, %pcrel_lo(bar)
+  lwpc      $2,bar
+  lwupc     $2,bar
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 5b4b928..f23dbd7 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -10,15 +10,18 @@
 #   rs > rt
 # appropriately for each branch instruction
 #
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 2> %t0 | FileCheck %s
+# RUN: FileCheck %s -check-prefix=WARNING < %t0
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4             # CHECK: andi $2, $2, 4      # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
         aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
         auipc   $3, -1           # CHECK: auipc $3, -1        # encoding: [0xec,0x7e,0xff,0xff]
+        bal     21100            # CHECK: bal 21100           # encoding: [0x04,0x11,0x14,0x9b]
         balc 14572256            # CHECK: balc 14572256       # encoding: [0xe8,0x37,0x96,0xb8]
         bc 14572256              # CHECK: bc 14572256         # encoding: [0xc8,0x37,0x96,0xb8]
         bc1eqz  $f0,4            # CHECK: bc1eqz $f0, 4       # encoding: [0x45,0x20,0x00,0x01]
@@ -38,6 +41,8 @@
         bnec $5, $6, 256         # CHECK: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
         bnezalc $2, 1332         # CHECK: bnezalc $2, 1332    # encoding: [0x60,0x02,0x01,0x4d]
         beqzc $5, 72256          # CHECK: beqzc $5, 72256     # encoding: [0xd8,0xa0,0x46,0x90]
+        bgec $2, $3, 256         # CHECK: bgec $2, $3, 256    # encoding: [0x58,0x43,0x00,0x40]
+        bgeuc $2, $3, 256        # CHECK: bgeuc $2, $3, 256   # encoding: [0x18,0x43,0x00,0x40]
         bgezalc $2, 1332         # CHECK: bgezalc $2, 1332    # encoding: [0x18,0x42,0x01,0x4d]
         bnezc $5, 72256          # CHECK: bnezc $5, 72256     # encoding: [0xf8,0xa0,0x46,0x90]
         bltzc $5, 256            # CHECK: bltzc $5, 256       # encoding: [0x5c,0xa5,0x00,0x40]
@@ -48,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
@@ -56,47 +63,49 @@
         bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
         bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
         bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4       # encoding: [0x20,0x82,0x00,0x01]
-        cmp.f.s    $f2,$f3,$f4      # CHECK: cmp.f.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
-        cmp.f.d    $f2,$f3,$f4      # CHECK: cmp.f.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
+        cache      1, 8($5)         # CHECK: cache 1, 8($5)         # encoding: [0x7c,0xa1,0x04,0x25]
+        cmp.af.s   $f2,$f3,$f4      # CHECK: cmp.af.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
+        cmp.af.d   $f2,$f3,$f4      # CHECK: cmp.af.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
         cmp.un.s   $f2,$f3,$f4      # CHECK: cmp.un.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x81]
         cmp.un.d   $f2,$f3,$f4      # CHECK: cmp.un.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x81]
         cmp.eq.s   $f2,$f3,$f4      # CHECK: cmp.eq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x82]
         cmp.eq.d   $f2,$f3,$f4      # CHECK: cmp.eq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x82]
         cmp.ueq.s  $f2,$f3,$f4      # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x83]
         cmp.ueq.d  $f2,$f3,$f4      # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x83]
-        cmp.olt.s  $f2,$f3,$f4      # CHECK: cmp.olt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
-        cmp.olt.d  $f2,$f3,$f4      # CHECK: cmp.olt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
+        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s  $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
+        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d  $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
         cmp.ult.s  $f2,$f3,$f4      # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x85]
         cmp.ult.d  $f2,$f3,$f4      # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x85]
-        cmp.ole.s  $f2,$f3,$f4      # CHECK: cmp.ole.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
-        cmp.ole.d  $f2,$f3,$f4      # CHECK: cmp.ole.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
+        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s  $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
+        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d  $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
         cmp.ule.s  $f2,$f3,$f4      # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x87]
         cmp.ule.d  $f2,$f3,$f4      # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x87]
-        cmp.sf.s   $f2,$f3,$f4      # CHECK: cmp.sf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
-        cmp.sf.d   $f2,$f3,$f4      # CHECK: cmp.sf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
-        cmp.ngle.s $f2,$f3,$f4      # CHECK: cmp.ngle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
-        cmp.ngle.d $f2,$f3,$f4      # CHECK: cmp.ngle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
+        cmp.saf.s  $f2,$f3,$f4      # CHECK: cmp.saf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
+        cmp.saf.d  $f2,$f3,$f4      # CHECK: cmp.saf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
+        cmp.sun.s  $f2,$f3,$f4      # CHECK: cmp.sun.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
+        cmp.sun.d  $f2,$f3,$f4      # CHECK: cmp.sun.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
         cmp.seq.s  $f2,$f3,$f4      # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8a]
         cmp.seq.d  $f2,$f3,$f4      # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8a]
-        cmp.ngl.s  $f2,$f3,$f4      # CHECK: cmp.ngl.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8b]
-        cmp.ngl.d  $f2,$f3,$f4      # CHECK: cmp.ngl.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8b]
-        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
-        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
-        cmp.nge.s  $f2,$f3,$f4      # CHECK: cmp.nge.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8d]
-        cmp.nge.d  $f2,$f3,$f4      # CHECK: cmp.nge.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8d]
-        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
-        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
-        cmp.ngt.s  $f2,$f3,$f4      # CHECK: cmp.ngt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8f]
-        cmp.ngt.d  $f2,$f3,$f4      # CHECK: cmp.ngt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8f]
+        cmp.sueq.s $f2,$f3,$f4      # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b]
+        cmp.sueq.d $f2,$f3,$f4      # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b]
+        cmp.slt.s  $f2,$f3,$f4      # CHECK: cmp.slt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
+        cmp.slt.d  $f2,$f3,$f4      # CHECK: cmp.slt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
+        cmp.sult.s $f2,$f3,$f4      # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d]
+        cmp.sult.d $f2,$f3,$f4      # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d]
+        cmp.sle.s  $f2,$f3,$f4      # CHECK: cmp.sle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
+        cmp.sle.d  $f2,$f3,$f4      # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
+        cmp.sule.s $f2,$f3,$f4      # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f]
+        cmp.sule.d $f2,$f3,$f4      # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f]
         div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
         modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
-#        mul     $2,$3,$4         # CHECK-TODO: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
+        mul     $2,$3,$4         # CHECK: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
         muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
         mulu    $2,$3,$4         # CHECK: mulu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x99]
         muhu    $2,$3,$4         # CHECK: muhu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xd9]
@@ -104,6 +113,7 @@
         maddf.d $f2,$f3,$f4      # CHECK: maddf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x98]
         msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
         msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
+        pref    1, 8($5)         # CHECK: pref 1, 8($5)          # encoding: [0x7c,0xa1,0x04,0x35]
         sel.d   $f0,$f1,$f2      # CHECK: sel.d $f0, $f1, $f2 # encoding: [0x46,0x22,0x08,0x10]
         sel.s   $f0,$f1,$f2      # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10]
         seleqz  $2,$3,$4         # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35]
@@ -116,6 +126,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
@@ -124,3 +135,20 @@
         rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
         class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
+        jr.hb   $4               # CHECK: jr.hb $4               # encoding: [0x00,0x80,0x04,0x09]
+        jalr.hb $4               # CHECK: jalr.hb $4             # encoding: [0x00,0x80,0xfc,0x09]
+        jalr.hb $4, $5           # CHECK: jalr.hb $4, $5         # encoding: [0x00,0xa0,0x24,0x09]
+        ldc2    $8, -701($at)    # CHECK: ldc2 $8, -701($1)      # encoding: [0x49,0xc8,0x0d,0x43]
+        lwc2    $18,-841($a2)    # CHECK: lwc2 $18, -841($6)     # encoding: [0x49,0x52,0x34,0xb7]
+        sdc2    $20,629($s2)     # CHECK: sdc2 $20, 629($18)     # encoding: [0x49,0xf4,0x92,0x75]
+        swc2    $25,304($s0)     # CHECK: swc2 $25, 304($16)     # encoding: [0x49,0x79,0x81,0x30]
+        ll      $v0,-153($s2)    # CHECK: ll $2, -153($18)       # encoding: [0x7e,0x42,0xb3,0xb6]
+        sc      $15,-40($s3)     # CHECK: sc $15, -40($19)       # encoding: [0x7e,0x6f,0xec,0x26]
+        clo     $11,$a1          # CHECK: clo $11, $5            # encoding: [0x00,0xa0,0x58,0x51]
+        clz     $sp,$gp          # CHECK: clz $sp, $gp           # encoding: [0x03,0x80,0xe8,0x50]
+        ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS32r6 and is equivalent to a nop instruction
+        ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sdbbp                    # CHECK: sdbbp                  # encoding: [0x00,0x00,0x00,0x0e]
+        sdbbp     34             # CHECK: sdbbp 34               # encoding: [0x00,0x00,0x08,0x8e]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
diff --git a/test/MC/Mips/mips4/invalid-mips32.s b/test/MC/Mips/mips4/invalid-mips32.s
new file mode 100644
index 0000000..52dea02
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 811584e..949b91d 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -32,7 +45,11 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         dadd      $s3,$at,$ra
+        dadd      $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        dadd      $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddi     $sp,$s4,-27705
+        daddi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
         ddiv      $zero,$k0,$s3
@@ -64,8 +81,10 @@
         dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
         dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
         dsub      $a3,$s6,$8
-        dsubu     $a1,$a1,$k0
-        dsub      $a3,$s6,$8
+        dsub      $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsub      $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
@@ -77,7 +96,7 @@
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldl       $24,-4167($24)
         ldr       $14,-30358($s4)
         ldxc1     $f8,$s7($15)
@@ -85,11 +104,11 @@
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
-        lld       $zero,-14736($ra)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
+        lld       $zero,-14736($ra)    # CHECK: lld $zero, -14736($ra) # encoding: [0xd3,0xe0,0xc6,0x70]
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwu       $s3,-24086($v1)
@@ -133,16 +152,18 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
-        scd       $15,-8243($sp)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        scd       $15,-8243($sp)       # CHECK: scd $15, -8243($sp)    # encoding: [0xf3,0xaf,0xdf,0xcd]
         sd        $12,5835($10)
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdl       $a3,-20961($s8)
         sdr       $11,-20423($12)
         sdxc1     $f11,$10($14)
@@ -173,10 +194,11 @@
         subu      $sp,$s6,$s6
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips5/invalid-mips32.s b/test/MC/Mips/mips5/invalid-mips32.s
new file mode 100644
index 0000000..2e2c8da
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/invalid-mips64.s b/test/MC/Mips/mips5/invalid-mips64.s
index 19d64dc..0a15da8 100644
--- a/test/MC/Mips/mips5/invalid-mips64.s
+++ b/test/MC/Mips/mips5/invalid-mips64.s
@@ -10,6 +10,9 @@
         dclo      $s2,$a2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dclz      $s0,$25     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         deret                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jr.hb     $4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jalr.hb   $4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jalr.hb   $4, $5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $s6,$13     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $zero,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         maddu     $s3,$gp     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 19aad05..3afdee1 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -32,7 +45,11 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         dadd      $s3,$at,$ra
+        dadd      $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        dadd      $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddi     $sp,$s4,-27705
+        daddi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
         ddiv      $zero,$k0,$s3
@@ -64,8 +81,10 @@
         dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
         dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
         dsub      $a3,$s6,$8
-        dsubu     $a1,$a1,$k0
-        dsub      $a3,$s6,$8
+        dsub      $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsub      $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
@@ -77,7 +96,7 @@
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldl       $24,-4167($24)
         ldr       $14,-30358($s4)
         ldxc1     $f8,$s7($15)
@@ -85,12 +104,12 @@
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
-        lld       $zero,-14736($ra)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
+        lld       $zero,-14736($ra)    # CHECK: lld $zero, -14736($ra) # encoding: [0xd3,0xe0,0xc6,0x70]
         luxc1     $f19,$s6($s5)
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwu       $s3,-24086($v1)
@@ -134,16 +153,18 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
-        scd       $15,-8243($sp)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        scd       $15,-8243($sp)       # CHECK: scd $15, -8243($sp)    # encoding: [0xf3,0xaf,0xdf,0xcd]
         sd        $12,5835($10)
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdl       $a3,-20961($s8)
         sdr       $11,-20423($12)
         sdxc1     $f11,$10($14)
@@ -175,10 +196,11 @@
         suxc1     $f12,$k1($13)
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
new file mode 100644
index 0000000..0efdd2f
--- /dev/null
+++ b/test/MC/Mips/mips64-expansions.s
@@ -0,0 +1,209 @@
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+#
+# The GNU assembler implements 'dli' and 'dla' variants on 'li' and 'la'
+# supporting double-word lengths.  Test that not only are they present, bu
+# that they also seem to handle 64-bit values.
+#
+# XXXRW: Does using powers of ten make me a bad person?
+#
+# CHECK: ori	$12, $zero, 1           # encoding: [0x01,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10          # encoding: [0x0a,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 100         # encoding: [0x64,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 1000        # encoding: [0xe8,0x03,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10000       # encoding: [0x10,0x27,0x0c,0x34]
+# CHECK: lui	$12, 1                  # encoding: [0x01,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34464         # encoding: [0xa0,0x86,0x8c,0x35]
+# CHECK: lui	$12, 15                 # encoding: [0x0f,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 16960         # encoding: [0x40,0x42,0x8c,0x35]
+# CHECK: lui	$12, 152                # encoding: [0x98,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 38528         # encoding: [0x80,0x96,0x8c,0x35]
+# CHECK: lui	$12, 1525               # encoding: [0xf5,0x05,0x0c,0x3c]
+# CHECK: ori	$12, $12, 57600         # encoding: [0x00,0xe1,0x8c,0x35]
+# CHECK: lui	$12, 15258              # encoding: [0x9a,0x3b,0x0c,0x3c]
+# CHECK: ori	$12, $12, 51712         # encoding: [0x00,0xca,0x8c,0x35]
+# CHECK: lui	$12, 2                  # encoding: [0x02,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 21515         # encoding: [0x0b,0x54,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 58368         # encoding: [0x00,0xe4,0x8c,0x35]
+# CHECK: lui	$12, 23                 # encoding: [0x17,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18550         # encoding: [0x76,0x48,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 59392         # encoding: [0x00,0xe8,0x8c,0x35]
+# CHECK: lui	$12, 232                # encoding: [0xe8,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 54437         # encoding: [0xa5,0xd4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 4096          # encoding: [0x00,0x10,0x8c,0x35]
+# CHECK: lui	$12, 2328               # encoding: [0x18,0x09,0x0c,0x3c]
+# CHECK: ori	$12, $12, 20082         # encoding: [0x72,0x4e,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 40960         # encoding: [0x00,0xa0,0x8c,0x35]
+# CHECK: lui	$12, 23283              # encoding: [0xf3,0x5a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 4218          # encoding: [0x7a,0x10,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 16384         # encoding: [0x00,0x40,0x8c,0x35]
+# CHECK: lui	$12, 3                  # encoding: [0x03,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 36222         # encoding: [0x7e,0x8d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42182         # encoding: [0xc6,0xa4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 35                 # encoding: [0x23,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34546         # encoding: [0xf2,0x86,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 28609         # encoding: [0xc1,0x6f,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 355                # encoding: [0x63,0x01,0x0c,0x3c]
+# CHECK: ori	$12, $12, 17784         # encoding: [0x78,0x45,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23946         # encoding: [0x8a,0x5d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 3552               # encoding: [0xe0,0x0d,0x0c,0x3c]
+# CHECK: ori	$12, $12, 46771         # encoding: [0xb3,0xb6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42852         # encoding: [0x64,0xa7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 35527              # encoding: [0xc7,0x8a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 8964          # encoding: [0x04,0x23,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 35304         # encoding: [0xe8,0x89,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: addiu	$12, $zero, -1          # encoding: [0xff,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10         # encoding: [0xf6,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -100        # encoding: [0x9c,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -1000       # encoding: [0x18,0xfc,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10000      # encoding: [0xf0,0xd8,0x0c,0x24]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65534         # encoding: [0xfe,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 31072         # encoding: [0x60,0x79,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65520         # encoding: [0xf0,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 48576         # encoding: [0xc0,0xbd,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65383         # encoding: [0x67,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 27008         # encoding: [0x80,0x69,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 64010         # encoding: [0x0a,0xfa,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7936          # encoding: [0x00,0x1f,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 50277         # encoding: [0x65,0xc4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 13824         # encoding: [0x00,0x36,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65533         # encoding: [0xfd,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 44020         # encoding: [0xf4,0xab,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7168          # encoding: [0x00,0x1c,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65512         # encoding: [0xe8,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 46985         # encoding: [0x89,0xb7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 6144          # encoding: [0x00,0x18,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65303         # encoding: [0x17,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 11098         # encoding: [0x5a,0x2b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61440         # encoding: [0x00,0xf0,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 63207         # encoding: [0xe7,0xf6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 45453         # encoding: [0x8d,0xb1,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 24576         # encoding: [0x00,0x60,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 42252         # encoding: [0x0c,0xa5,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61317         # encoding: [0x85,0xef,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 49152         # encoding: [0x00,0xc0,0x8c,0x35]
+# CHECK: lui	$12, 65532              # encoding: [0xfc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 29313         # encoding: [0x81,0x72,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23353         # encoding: [0x39,0x5b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 65500              # encoding: [0xdc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 30989         # encoding: [0x0d,0x79,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 36927         # encoding: [0x3f,0x90,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 65180              # encoding: [0x9c,0xfe,0x0c,0x3c]
+# CHECK: ori	$12, $12, 47751         # encoding: [0x87,0xba,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 41590         # encoding: [0x76,0xa2,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 61983              # encoding: [0x1f,0xf2,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18764         # encoding: [0x4c,0x49,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 22684         # encoding: [0x9c,0x58,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 30008              # encoding: [0x38,0x75,0x0c,0x3c]
+# CHECK: ori	$12, $12, 56571         # encoding: [0xfb,0xdc,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 30232         # encoding: [0x18,0x76,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+
+	dli	$t0, 1
+	dli	$t0, 10
+	dli	$t0, 100
+	dli	$t0, 1000
+	dli	$t0, 10000
+	dli	$t0, 100000
+	dli	$t0, 1000000
+	dli	$t0, 10000000
+	dli	$t0, 100000000
+	dli	$t0, 1000000000
+	dli	$t0, 10000000000
+	dli	$t0, 100000000000
+	dli	$t0, 1000000000000
+	dli	$t0, 10000000000000
+	dli	$t0, 100000000000000
+	dli	$t0, 1000000000000000
+	dli	$t0, 10000000000000000
+	dli	$t0, 100000000000000000
+	dli	$t0, 1000000000000000000
+	dli	$t0, 10000000000000000000
+	dli	$t0, -1
+	dli	$t0, -10
+	dli	$t0, -100
+	dli	$t0, -1000
+	dli	$t0, -10000
+	dli	$t0, -100000
+	dli	$t0, -1000000
+	dli	$t0, -10000000
+	dli	$t0, -100000000
+	dli	$t0, -1000000000
+	dli	$t0, -10000000000
+	dli	$t0, -100000000000
+	dli	$t0, -1000000000000
+	dli	$t0, -10000000000000
+	dli	$t0, -100000000000000
+	dli	$t0, -1000000000000000
+	dli	$t0, -10000000000000000
+	dli	$t0, -100000000000000000
+	dli	$t0, -1000000000000000000
+	dli	$t0, -10000000000000000000
diff --git a/test/MC/Mips/mips64/abiflags.s b/test/MC/Mips/mips64/abiflags.s
new file mode 100644
index 0000000..557e32a
--- /dev/null
+++ b/test/MC/Mips/mips64/abiflags.s
@@ -0,0 +1,37 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=64
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00004001 02020001 00000000 00000000  |..@.............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=64
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index b9e1002..1bd057d 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -20,8 +33,8 @@
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
-        clo       $11,$a1
-        clz       $sp,$gp
+        clo       $11,$a1              # CHECK: clo $11, $5   # encoding: [0x70,0xab,0x58,0x21]
+        clz       $sp,$gp              # CHECK: clz $sp, $gp  # encoding: [0x73,0x9d,0xe8,0x20]
         ctc1      $a2,$26
         cvt.d.l   $f4,$f16
         cvt.d.s   $f22,$f28
@@ -34,11 +47,15 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         dadd      $s3,$at,$ra
+        dadd      $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        dadd      $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddi     $sp,$s4,-27705
+        daddi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
-        dclo      $s2,$a2
-        dclz      $s0,$25
+        dclo      $s2,$a2              # CHECK: dclo $18, $6   # encoding: [0x70,0xd2,0x90,0x25]
+        dclz      $s0,$25              # CHECK: dclz $16, $25  # encoding: [0x73,0x30,0x80,0x24]
         deret
         ddiv      $zero,$k0,$s3
         ddivu     $zero,$s0,$s1
@@ -69,8 +86,10 @@
         dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
         dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
         dsub      $a3,$s6,$8
-        dsubu     $a1,$a1,$k0
-        dsub      $a3,$s6,$8
+        dsub      $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsub      $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
@@ -82,7 +101,7 @@
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldl       $24,-4167($24)
         ldr       $14,-30358($s4)
         ldxc1     $f8,$s7($15)
@@ -90,12 +109,12 @@
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
-        lld       $zero,-14736($ra)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
+        lld       $zero,-14736($ra)    # CHECK: lld $zero, -14736($ra) # encoding: [0xd3,0xe0,0xc6,0x70]
         luxc1     $f19,$s6($s5)
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwu       $s3,-24086($v1)
@@ -148,16 +167,20 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
-        scd       $15,-8243($sp)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        scd       $15,-8243($sp)       # CHECK: scd $15, -8243($sp)    # encoding: [0xf3,0xaf,0xdf,0xcd]
+        sdbbp                          # CHECK: sdbbp                  # encoding: [0x70,0x00,0x00,0x3f]
+        sdbbp     34                   # CHECK: sdbbp 34               # encoding: [0x70,0x00,0x08,0xbf]
         sd        $12,5835($10)
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdl       $a3,-20961($s8)
         sdr       $11,-20423($12)
         sdxc1     $f11,$10($14)
@@ -189,10 +212,12 @@
         suxc1     $f12,$k1($13)
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r2/abi-bad.s b/test/MC/Mips/mips64r2/abi-bad.s
new file mode 100644
index 0000000..31d13ab
--- /dev/null
+++ b/test/MC/Mips/mips64r2/abi-bad.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc %s -triple mips-unknown-unknown -mcpu=mips64r2 2>&1 | FileCheck %s
+# CHECK: .text
+
+
+
+        .set fp=xx
+# CHECK     : error: 'set fp=xx'option requires O32 ABI
+# CHECK     : .set fp=xx
+# CHECK     :          ^
diff --git a/test/MC/Mips/mips64r2/abiflags.s b/test/MC/Mips/mips64r2/abiflags.s
new file mode 100644
index 0000000..aa76dee
--- /dev/null
+++ b/test/MC/Mips/mips64r2/abiflags.s
@@ -0,0 +1,37 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r2 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r2 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=64
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00004002 02020001 00000000 00000000  |..@.............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=64
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/mips64r2/invalid.s b/test/MC/Mips/mips64r2/invalid.s
new file mode 100644
index 0000000..f53cfff
--- /dev/null
+++ b/test/MC/Mips/mips64r2/invalid.s
@@ -0,0 +1,10 @@
+# Instructions that are valid for the current ISA but should be rejected by the assembler (e.g.
+# invalid set of operands or operand's restrictions not met).
+
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r2 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .set noreorder
+        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 252589d..7a2244a 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -9,8 +9,21 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
+        bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
+        bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
+        bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
+        bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
+        bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -20,8 +33,8 @@
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
-        clo       $11,$a1
-        clz       $sp,$gp
+        clo       $11,$a1              # CHECK: clo $11, $5   # encoding: [0x70,0xab,0x58,0x21]
+        clz       $sp,$gp              # CHECK: clz $sp, $gp  # encoding: [0x73,0x9d,0xe8,0x20]
         ctc1      $a2,$26
         cvt.d.l   $f4,$f16
         cvt.d.s   $f22,$f28
@@ -34,11 +47,15 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         dadd      $s3,$at,$ra
+        dadd      $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        dadd      $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddi     $sp,$s4,-27705
+        daddi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, -27705 # encoding: [0x62,0x9d,0x93,0xc7]
+        daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
-        dclo      $s2,$a2
-        dclz      $s0,$25
+        dclo      $s2,$a2              # CHECK: dclo $18, $6   # encoding: [0x70,0xd2,0x90,0x25]
+        dclz      $s0,$25              # CHECK: dclz $16, $25  # encoding: [0x73,0x30,0x80,0x24]
         deret
         di        $s8
         ddiv      $zero,$k0,$s3
@@ -77,8 +94,12 @@
         dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
         dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
         dsub      $a3,$s6,$8
-        dsubu     $a1,$a1,$k0
         dsub      $a3,$s6,$8
+        dsub      $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsub      $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
+        dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
+        dsubu     $a1,$a1,$k0
         dsubu     $a1,$a1,$k0
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         ei        $14
@@ -87,11 +108,14 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
+        jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
+        jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
         ldc1      $f11,16391($s0)
-        ldc2      $8,-21181($at)
+        ldc2      $8,-21181($at)        # CHECK: ldc2 $8, -21181($1)   # encoding: [0xd8,0x28,0xad,0x43]
         ldl       $24,-4167($24)
         ldr       $14,-30358($s4)
         ldxc1     $f8,$s7($15)
@@ -99,12 +123,12 @@
         lhu       $s3,-22851($v0)
         li        $at,-29773
         li        $zero,-29889
-        ll        $v0,-7321($s2)
-        lld       $zero,-14736($ra)
+        ll        $v0,-7321($s2)       # CHECK: ll $2, -7321($18)     # encoding: [0xc2,0x42,0xe3,0x67]
+        lld       $zero,-14736($ra)    # CHECK: lld $zero, -14736($ra) # encoding: [0xd3,0xe0,0xc6,0x70]
         luxc1     $f19,$s6($s5)
         lw        $8,5674($a1)
         lwc1      $f16,10225($k0)
-        lwc2      $18,-841($a2)
+        lwc2      $18,-841($a2)        # CHECK: lwc2 $18, -841($6)     # encoding: [0xc8,0xd2,0xfc,0xb7]
         lwl       $s4,-4231($15)
         lwr       $zero,-19147($gp)
         lwu       $s3,-24086($v1)
@@ -163,7 +187,9 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
+        pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11
         rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
         rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
@@ -173,11 +199,13 @@
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
-        sc        $15,18904($s3)
-        scd       $15,-8243($sp)
+        sc        $15,18904($s3)       # CHECK: sc $15, 18904($19)     # encoding: [0xe2,0x6f,0x49,0xd8]
+        scd       $15,-8243($sp)       # CHECK: scd $15, -8243($sp)    # encoding: [0xf3,0xaf,0xdf,0xcd]
+        sdbbp                          # CHECK: sdbbp                  # encoding: [0x70,0x00,0x00,0x3f]
+        sdbbp     34                   # CHECK: sdbbp 34               # encoding: [0x70,0x00,0x08,0xbf]
         sd        $12,5835($10)
         sdc1      $f31,30574($13)
-        sdc2      $20,23157($s2)
+        sdc2      $20,23157($s2)       # CHECK: sdc2 $20, 23157($18)   # encoding: [0xfa,0x54,0x5a,0x75]
         sdl       $a3,-20961($s8)
         sdr       $11,-20423($12)
         sdxc1     $f11,$10($14)
@@ -211,10 +239,12 @@
         suxc1     $f12,$k1($13)
         sw        $ra,-10160($sp)
         swc1      $f6,-8465($24)
-        swc2      $25,24880($s0)
+        swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
index f7949bb..e914c89 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips1.s b/test/MC/Mips/mips64r6/invalid-mips1.s
index 1225005..6efd8f4 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1.s
@@ -6,3 +6,22 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.ngl.d   $f29,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.ngle.d  $f0,$f16            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.sf.d    $f30,$f0            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.sf.s    $f14,$f22           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips64r6/invalid-mips2.s b/test/MC/Mips/mips64r6/invalid-mips2.s
index 0638e78..8a5c50c 100644
--- a/test/MC/Mips/mips64r6/invalid-mips2.s
+++ b/test/MC/Mips/mips64r6/invalid-mips2.s
@@ -6,9 +6,24 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips64r6/invalid-mips3.s b/test/MC/Mips/mips64r6/invalid-mips3.s
index 0638e78..322dabd 100644
--- a/test/MC/Mips/mips64r6/invalid-mips3.s
+++ b/test/MC/Mips/mips64r6/invalid-mips3.s
@@ -6,9 +6,28 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmult     $s7,$9              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       ddiv has been re-encoded. See valid.s
+#       ddivu has been re-encoded. See valid.s
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s
new file mode 100644
index 0000000..cc85f18
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        bc1fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
new file mode 100644
index 0000000..f3131a9
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
@@ -0,0 +1,21 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4.s b/test/MC/Mips/mips64r6/invalid-mips4.s
new file mode 100644
index 0000000..706db27
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips4.s
@@ -0,0 +1,14 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1     $f8,$s7($15)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$10($14)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$12($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
index 6b980e6..4fc94e2 100644
--- a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
@@ -1,4 +1,4 @@
-# Instructions that are invalid
+# Instructions that are invalid but currently emit the wrong error message.
 #
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
 # RUN:     2>%t1
@@ -8,6 +8,10 @@
         abs.ps          $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         add.ps          $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         alnv.ps         $f12,$f18,$f30,$12  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any2f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any2t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any4f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any4t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         c.eq.ps         $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         c.f.ps          $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
         c.le.ps         $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips5.s b/test/MC/Mips/mips64r6/invalid-mips5.s
new file mode 100644
index 0000000..e7fd99a
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips5.s
@@ -0,0 +1,12 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1     $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($13)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips64.s b/test/MC/Mips/mips64r6/invalid-mips64.s
new file mode 100644
index 0000000..51e5708
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips64.s
@@ -0,0 +1,54 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,-27705          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $sp,$s4,-27705      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $sp,-27705          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmult     $s7,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubi     $sp,$s4,-27705      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubi     $sp,-27705          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub      $sp,$s4,-27705      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub      $sp,-27705          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        jalx      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $25                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$s4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult      $sp,$v0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $9,$s2              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+#       ddiv has been re-encoded. See valid.s
+#       ddivu has been re-encoded. See valid.s
+#       div has been re-encoded. See valid.s
+#       divu has been re-encoded. See valid.s
diff --git a/test/MC/Mips/mips64r6/invalid.s b/test/MC/Mips/mips64r6/invalid.s
new file mode 100644
index 0000000..1b01827
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid.s
@@ -0,0 +1,12 @@
+# Instructions that are available for the current ISA but should be rejected by
+# the assembler (e.g. invalid set of operands or operand's restrictions not met).
+
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r6 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .set noreorder
+	.set noat
+        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        ldc2    $8,-21181($at)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/relocations.s b/test/MC/Mips/mips64r6/relocations.s
index db84715..651ebfb 100644
--- a/test/MC/Mips/mips64r6/relocations.s
+++ b/test/MC/Mips/mips64r6/relocations.s
@@ -5,7 +5,10 @@
 #------------------------------------------------------------------------------
 # Check that the assembler can handle the documented syntax for fixups.
 #------------------------------------------------------------------------------
-# CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A]
+# CHECK-FIXUP: addiupc $2, bar  # encoding: [0xec,0b01000AAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
+# CHECK-FIXUP: beqc     $5, $6, bar # encoding: [0x20,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
 # CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
 # CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A]
@@ -31,20 +34,35 @@
 # CHECK-FIXUP:                              #   fixup A - offset: 0,
 # CHECK-FIXUP:                                  value: bar@PCREL_LO16,
 # CHECK-FIXUP:                                  kind: fixup_MIPS_PCLO16
+# CHECK-FIXUP: ldpc    $2, bar  # encoding: [0xec,0b010110AA,A,A]
+# CHECK-FIXUP:                  # fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar,
+# CHECK-FIXUP:                      kind: fixup_Mips_PC18_S3
+# CHECK-FIXUP: lwpc    $2, bar  # encoding: [0xec,0b01001AAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
+# CHECK-FIXUP: lwupc   $2, bar  # encoding: [0xec,0b01010AAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
 #------------------------------------------------------------------------------
 # Check that the appropriate relocations were created.
 #------------------------------------------------------------------------------
 # CHECK-ELF: Relocations [
-# CHECK-ELF:     0x0 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x0 R_MIPS_PC19_S2 bar 0x0
 # CHECK-ELF:     0x4 R_MIPS_PC16 bar 0x0
-# CHECK-ELF:     0x8 R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0x8 R_MIPS_PC16 bar 0x0
 # CHECK-ELF:     0xC R_MIPS_PC21_S2 bar 0x0
-# CHECK-ELF:     0x10 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x10 R_MIPS_PC21_S2 bar 0x0
 # CHECK-ELF:     0x14 R_MIPS_PC26_S2 bar 0x0
-# CHECK-ELF:     0x18 R_MIPS_PCHI16 bar 0x0
-# CHECK-ELF:     0x1C R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF:     0x18 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x1C R_MIPS_PCHI16 bar 0x0
+# CHECK-ELF:     0x20 R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF:     0x24 R_MIPS_PC18_S3 bar 0x0
+# CHECK-ELF:     0x28 R_MIPS_PC19_S2 bar 0x0
+# CHECK-ELF:     0x2C R_MIPS_PC19_S2 bar 0x0
 # CHECK-ELF: ]
 
+  addiupc   $2,bar
   beqc  $5, $6, bar
   bnec  $5, $6, bar
   beqzc $9, bar
@@ -53,3 +71,6 @@
   bc    bar
   aluipc $2, %pcrel_hi(bar)
   addiu  $2, $2, %pcrel_lo(bar)
+  ldpc  $2,bar
+  lwpc  $2,bar
+  lwupc $2,bar
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index efdfc7f..34c1dac 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -10,15 +10,18 @@
 #   rs > rt
 # appropriately for each branch instruction
 #
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 2> %t0 | FileCheck %s
+# RUN: FileCheck %s -check-prefix=WARNING < %t0
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4           # CHECK: andi $2, $2, 4        # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
         aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
         auipc   $3, -1           # CHECK: auipc $3, -1        # encoding: [0xec,0x7e,0xff,0xff]
+        bal     21100            # CHECK: bal 21100           # encoding: [0x04,0x11,0x14,0x9b]
         balc 14572256            # CHECK: balc 14572256       # encoding: [0xe8,0x37,0x96,0xb8]
         bc 14572256              # CHECK: bc 14572256         # encoding: [0xc8,0x37,0x96,0xb8]
         bc1eqz  $f0,4            # CHECK: bc1eqz $f0, 4       # encoding: [0x45,0x20,0x00,0x01]
@@ -38,6 +41,8 @@
         bnec $5, $6, 256         # CHECK: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
         bnezalc $2, 1332         # CHECK: bnezalc $2, 1332    # encoding: [0x60,0x02,0x01,0x4d]
         beqzc $5, 72256          # CHECK: beqzc $5, 72256     # encoding: [0xd8,0xa0,0x46,0x90]
+        bgec $2, $3, 256         # CHECK: bgec $2, $3, 256    # encoding: [0x58,0x43,0x00,0x40]
+        bgeuc $2, $3, 256        # CHECK: bgeuc $2, $3, 256   # encoding: [0x18,0x43,0x00,0x40]
         bgezalc $2, 1332         # CHECK: bgezalc $2, 1332    # encoding: [0x18,0x42,0x01,0x4d]
         bnezc $5, 72256          # CHECK: bnezc $5, 72256     # encoding: [0xf8,0xa0,0x46,0x90]
         bltzc $5, 256            # CHECK: bltzc $5, 256       # encoding: [0x5c,0xa5,0x00,0x40]
@@ -48,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
@@ -56,38 +63,39 @@
         bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
         bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
         bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4      # encoding: [0x20,0x82,0x00,0x01]
-        cmp.f.s    $f2,$f3,$f4      # CHECK: cmp.f.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
-        cmp.f.d    $f2,$f3,$f4      # CHECK: cmp.f.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
+        cache      1, 8($5)         # CHECK: cache 1, 8($5)         # encoding: [0x7c,0xa1,0x04,0x25]
+        cmp.af.s   $f2,$f3,$f4      # CHECK: cmp.af.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
+        cmp.af.d   $f2,$f3,$f4      # CHECK: cmp.af.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
         cmp.un.s   $f2,$f3,$f4      # CHECK: cmp.un.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x81]
         cmp.un.d   $f2,$f3,$f4      # CHECK: cmp.un.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x81]
         cmp.eq.s   $f2,$f3,$f4      # CHECK: cmp.eq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x82]
         cmp.eq.d   $f2,$f3,$f4      # CHECK: cmp.eq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x82]
         cmp.ueq.s  $f2,$f3,$f4      # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x83]
         cmp.ueq.d  $f2,$f3,$f4      # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x83]
-        cmp.olt.s  $f2,$f3,$f4      # CHECK: cmp.olt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
-        cmp.olt.d  $f2,$f3,$f4      # CHECK: cmp.olt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
+        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s  $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
+        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d  $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
         cmp.ult.s  $f2,$f3,$f4      # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x85]
         cmp.ult.d  $f2,$f3,$f4      # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x85]
-        cmp.ole.s  $f2,$f3,$f4      # CHECK: cmp.ole.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
-        cmp.ole.d  $f2,$f3,$f4      # CHECK: cmp.ole.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
+        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s  $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
+        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d  $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
         cmp.ule.s  $f2,$f3,$f4      # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x87]
         cmp.ule.d  $f2,$f3,$f4      # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x87]
-        cmp.sf.s   $f2,$f3,$f4      # CHECK: cmp.sf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
-        cmp.sf.d   $f2,$f3,$f4      # CHECK: cmp.sf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
-        cmp.ngle.s $f2,$f3,$f4      # CHECK: cmp.ngle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
-        cmp.ngle.d $f2,$f3,$f4      # CHECK: cmp.ngle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
+        cmp.saf.s  $f2,$f3,$f4      # CHECK: cmp.saf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
+        cmp.saf.d  $f2,$f3,$f4      # CHECK: cmp.saf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
+        cmp.sun.s  $f2,$f3,$f4      # CHECK: cmp.sun.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
+        cmp.sun.d  $f2,$f3,$f4      # CHECK: cmp.sun.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
         cmp.seq.s  $f2,$f3,$f4      # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8a]
         cmp.seq.d  $f2,$f3,$f4      # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8a]
-        cmp.ngl.s  $f2,$f3,$f4      # CHECK: cmp.ngl.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8b]
-        cmp.ngl.d  $f2,$f3,$f4      # CHECK: cmp.ngl.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8b]
-        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
-        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
-        cmp.nge.s  $f2,$f3,$f4      # CHECK: cmp.nge.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8d]
-        cmp.nge.d  $f2,$f3,$f4      # CHECK: cmp.nge.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8d]
-        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
-        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
-        cmp.ngt.s  $f2,$f3,$f4      # CHECK: cmp.ngt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8f]
-        cmp.ngt.d  $f2,$f3,$f4      # CHECK: cmp.ngt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8f]
+        cmp.sueq.s $f2,$f3,$f4      # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b]
+        cmp.sueq.d $f2,$f3,$f4      # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b]
+        cmp.slt.s  $f2,$f3,$f4      # CHECK: cmp.slt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
+        cmp.slt.d  $f2,$f3,$f4      # CHECK: cmp.slt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
+        cmp.sult.s $f2,$f3,$f4      # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d]
+        cmp.sult.d $f2,$f3,$f4      # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d]
+        cmp.sle.s  $f2,$f3,$f4      # CHECK: cmp.sle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
+        cmp.sle.d  $f2,$f3,$f4      # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
+        cmp.sule.s $f2,$f3,$f4      # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f]
+        cmp.sule.d $f2,$f3,$f4      # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f]
         dalign  $4,$2,$3,5       # CHECK: dalign $4, $2, $3, 5 # encoding: [0x7c,0x43,0x23,0x64]
         daui    $3,$2,0x1234     # CHECK: daui $3, $2, 4660  # encoding: [0x74,0x62,0x12,0x34]
         dahi     $3,0x5678       # CHECK: dahi $3, 22136     # encoding: [0x04,0x66,0x56,0x78]
@@ -103,20 +111,24 @@
         ddivu   $2,$3,$4         # CHECK: ddivu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9f]
         dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
         dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+        dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
+        ldpc    $2,123456        # CHECK: ldpc $2, 123456  # encoding: [0xec,0x58,0x3c,0x48]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
-#        mul     $2,$3,$4         # CHECK-TODO: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
+        mul     $2,$3,$4         # CHECK: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
         muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
         mulu    $2,$3,$4         # CHECK: mulu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x99]
         muhu    $2,$3,$4         # CHECK: muhu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xd9]
-        dmul    $2,$3,$4         # CHECK: dmul $2, $3, $4  # encoding: [0x00,0x64,0x10,0xb8]
-        dmuh    $2,$3,$4         # CHECK: dmuh $2, $3, $4  # encoding: [0x00,0x64,0x10,0xf8]
-        dmulu   $2,$3,$4         # CHECK: dmulu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xb9]
-        dmuhu   $2,$3,$4         # CHECK: dmuhu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xf9]
+        dmul    $2,$3,$4         # CHECK: dmul $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9c]
+        dmuh    $2,$3,$4         # CHECK: dmuh $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdc]
+        dmulu   $2,$3,$4         # CHECK: dmulu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9d]
+        dmuhu   $2,$3,$4         # CHECK: dmuhu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdd]
         maddf.s $f2,$f3,$f4      # CHECK: maddf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x98]
         maddf.d $f2,$f3,$f4      # CHECK: maddf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x98]
         msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
         msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
+        pref    1, 8($5)         # CHECK: pref 1, 8($5)          # encoding: [0x7c,0xa1,0x04,0x35]
         sel.d   $f0,$f1,$f2      # CHECK: sel.d $f0, $f1, $f2 # encoding: [0x46,0x22,0x08,0x10]
         sel.s   $f0,$f1,$f2      # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10]
         seleqz  $2,$3,$4         # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35]
@@ -129,6 +141,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
@@ -137,3 +150,24 @@
         rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
         class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
+        jr.hb   $4               # CHECK: jr.hb $4               # encoding: [0x00,0x80,0x04,0x09]
+        jalr.hb $4               # CHECK: jalr.hb $4             # encoding: [0x00,0x80,0xfc,0x09]
+        jalr.hb $4, $5           # CHECK: jalr.hb $4, $5         # encoding: [0x00,0xa0,0x24,0x09]
+        ldc2    $8, -701($at)    # CHECK: ldc2 $8, -701($1)      # encoding: [0x49,0xc8,0x0d,0x43]
+        lwc2    $18,-841($a2)    # CHECK: lwc2 $18, -841($6)     # encoding: [0x49,0x52,0x34,0xb7]
+        sdc2    $20,629($s2)     # CHECK: sdc2 $20, 629($18)     # encoding: [0x49,0xf4,0x92,0x75]
+        swc2    $25,304($s0)     # CHECK: swc2 $25, 304($16)     # encoding: [0x49,0x79,0x81,0x30]
+        ll      $v0,-153($s2)    # CHECK: ll $2, -153($18)       # encoding: [0x7e,0x42,0xb3,0xb6]
+        lld     $zero,112($ra)   # CHECK: lld $zero, 112($ra)    # encoding: [0x7f,0xe0,0x38,0x37]
+        sc      $15,-40($s3)     # CHECK: sc $15, -40($19)       # encoding: [0x7e,0x6f,0xec,0x26]
+        scd     $15,-51($sp)     # CHECK: scd $15, -51($sp)      # encoding: [0x7f,0xaf,0xe6,0xa7]
+        clo     $11,$a1          # CHECK: clo $11, $5            # encoding: [0x00,0xa0,0x58,0x51]
+        clz     $sp,$gp          # CHECK: clz $sp, $gp           # encoding: [0x03,0x80,0xe8,0x50]
+        dclo    $s2,$a2          # CHECK: dclo $18, $6           # encoding: [0x00,0xc0,0x90,0x53]
+        dclz    $s0,$25          # CHECK: dclz $16, $25          # encoding: [0x03,0x20,0x80,0x52]
+        ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS64r6 and is equivalent to a nop instruction
+        ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sdbbp                    # CHECK: sdbbp                  # encoding: [0x00,0x00,0x00,0x0e]
+        sdbbp     34             # CHECK: sdbbp 34               # encoding: [0x00,0x00,0x08,0x8e]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
diff --git a/test/MC/Mips/mips_abi_flags_xx.s b/test/MC/Mips/mips_abi_flags_xx.s
new file mode 100644
index 0000000..1d65e99
--- /dev/null
+++ b/test/MC/Mips/mips_abi_flags_xx.s
@@ -0,0 +1,37 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=xx
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002001 01010005 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=xx
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/mips_abi_flags_xx_set.s b/test/MC/Mips/mips_abi_flags_xx_set.s
new file mode 100644
index 0000000..56f19d3
--- /dev/null
+++ b/test/MC/Mips/mips_abi_flags_xx_set.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=xx
+# CHECK-ASM: .set    fp=64
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002001 01010005 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=xx
+        .set    fp=64
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/msa/abiflags.s b/test/MC/Mips/msa/abiflags.s
new file mode 100644
index 0000000..83b83cc
--- /dev/null
+++ b/test/MC/Mips/msa/abiflags.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module fp=32
+# CHECK-ASM: .set fp=64
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002002 01030001 00000000 00000200  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+        .module fp=32
+        .set fp=64
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/nacl-mask.s b/test/MC/Mips/nacl-mask.s
index 8205835..22286ac 100644
--- a/test/MC/Mips/nacl-mask.s
+++ b/test/MC/Mips/nacl-mask.s
@@ -283,3 +283,37 @@ test5:
 # CHECK-NEXT:        and     $25, $25, $14
 # CHECK-NEXT:        jalr    $25
 # CHECK-NEXT:        addiu   $4, $zero, 5
+
+
+
+# Test that we can put non-dangerous loads and stores in branch delay slot.
+
+	.align	4
+test6:
+	.set	noreorder
+
+        jal func1
+        sw      $4, 0($sp)
+
+        bal func2
+        lw      $5, 0($t8)
+
+        jalr $t9
+        sw      $sp, 0($sp)
+
+# CHECK-LABEL:   test6:
+
+# CHECK-NEXT:        nop
+# CHECK-NEXT:        nop
+# CHECK-NEXT:        jal
+# CHECK-NEXT:        sw      $4, 0($sp)
+
+# CHECK-NEXT:        nop
+# CHECK-NEXT:        nop
+# CHECK-NEXT:        bal
+# CHECK-NEXT:        lw      $5, 0($24)
+
+# CHECK-NEXT:        nop
+# CHECK-NEXT:        and     $25, $25, $14
+# CHECK-NEXT:        jalr
+# CHECK-NEXT:        sw      $sp, 0($sp)
diff --git a/test/MC/Mips/nooddspreg-cmdarg.s b/test/MC/Mips/nooddspreg-cmdarg.s
new file mode 100644
index 0000000..826db12
--- /dev/null
+++ b/test/MC/Mips/nooddspreg-cmdarg.s
@@ -0,0 +1,43 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64,+nooddspreg | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64,+nooddspreg -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=-n64,+n32,+nooddspreg 2> %t0
+# RUN: FileCheck %s -check-prefix=INVALID < %t0
+#
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=+nooddspreg 2> %t0
+# RUN: FileCheck %s -check-prefix=INVALID < %t0
+#
+# CHECK-ASM-NOT: .module nooddspreg
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002001 01020007 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+# INVALID: ERROR: -mno-odd-spreg requires the O32 ABI
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/nooddspreg-error.s b/test/MC/Mips/nooddspreg-error.s
new file mode 100644
index 0000000..b4aabbe
--- /dev/null
+++ b/test/MC/Mips/nooddspreg-error.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 2> %t0 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+# RUN: FileCheck %s -check-prefix=CHECK-ERROR < %t0
+#
+        .module nooddspreg
+# CHECK-ASM: .module nooddspreg
+
+        add.s $f1, $f2, $f5
+# CHECK-ERROR: :[[@LINE-1]]:15: error: -mno-odd-spreg prohibits the use of odd FPU registers
+# CHECK-ERROR: :[[@LINE-2]]:25: error: -mno-odd-spreg prohibits the use of odd FPU registers
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/nooddspreg.s b/test/MC/Mips/nooddspreg.s
new file mode 100644
index 0000000..5a283f5
--- /dev/null
+++ b/test/MC/Mips/nooddspreg.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=-n64,n32 2> %t1
+# RUN: FileCheck %s -check-prefix=INVALID < %t1
+#
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 2> %t2
+# RUN: FileCheck %s -check-prefix=INVALID < %t2
+#
+# CHECK-ASM: .module nooddspreg
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ:  Section {
+# CHECK-OBJ:    Index: 5
+# CHECK-OBJ:    Name: .MIPS.abiflags (12)
+# CHECK-OBJ:    Type:  (0x7000002A)
+# CHECK-OBJ:     Flags [ (0x2)
+# CHECK-OBJ:      SHF_ALLOC (0x2)
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    Address: 0x0
+# CHECK-OBJ:    Offset: 0x50
+# CHECK-OBJ:    Size: 24
+# CHECK-OBJ:    Link: 0
+# CHECK-OBJ:    Info: 0
+# CHECK-OBJ:    AddressAlignment: 8
+# CHECK-OBJ:    EntrySize: 0
+# CHECK-OBJ:    Relocations [
+# CHECK-OBJ:    ]
+# CHECK-OBJ:    SectionData (
+# CHECK-OBJ:      0000: 00002001 01020007 00000000 00000000  |.. .............|
+# CHECK-OBJ:      0010: 00000000 00000000                    |........|
+# CHECK-OBJ:    )
+# CHECK-OBJ:  }
+
+# INVALID: '.module nooddspreg' requires the O32 ABI
+
+        .module nooddspreg
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/oddspreg.s b/test/MC/Mips/oddspreg.s
new file mode 100644
index 0000000..f5aa9c0
--- /dev/null
+++ b/test/MC/Mips/oddspreg.s
@@ -0,0 +1,56 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-O32
+#
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -mattr=-n64,+n32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -mattr=-n64,+n32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -filetype=obj -o - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-N64
+
+# CHECK-ASM: .module oddspreg
+
+# Checking if the Mips.abiflags were correctly emitted.
+# CHECK-OBJ-ALL:  Section {
+# CHECK-OBJ-ALL:    Index: 5
+# CHECK-OBJ-ALL:    Name: .MIPS.abiflags ({{[0-9]+}})
+# CHECK-OBJ-ALL:    Type:  (0x7000002A)
+# CHECK-OBJ-ALL:     Flags [ (0x2)
+# CHECK-OBJ-ALL:      SHF_ALLOC (0x2)
+# CHECK-OBJ-ALL:    ]
+# CHECK-OBJ-ALL:    Address: 0x0
+# CHECK-OBJ-ALL:    Offset: 0x{{[0-9A-F]+}}
+# CHECK-OBJ-ALL:    Size: 24
+# CHECK-OBJ-ALL:    Link: 0
+# CHECK-OBJ-ALL:    Info: 0
+# CHECK-OBJ-ALL:    AddressAlignment: 8
+# CHECK-OBJ-ALL:    EntrySize: 0
+# CHECK-OBJ-ALL:    Relocations [
+# CHECK-OBJ-ALL:    ]
+# CHECK-OBJ-ALL:    SectionData (
+# CHECK-OBJ-O32:      0000: 00002001 01020006 00000000 00000000  |.. .............|
+# CHECK-OBJ-O32:      0010: 00000001 00000000                    |........|
+# CHECK-OBJ-N32:      0000: 00004001 02020001 00000000 00000000  |..@.............|
+# CHECK-OBJ-N32:      0010: 00000001 00000000                    |........|
+# CHECK-OBJ-N64:      0000: 00004001 02020001 00000000 00000000  |..@.............|
+# CHECK-OBJ-N64:      0010: 00000001 00000000                    |........|
+# CHECK-OBJ-ALL:    )
+# CHECK-OBJ-ALL:  }
+
+        .module oddspreg
+        add.s $f3, $f1, $f5
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/PowerPC/lit.local.cfg b/test/MC/PowerPC/lit.local.cfg
index 193ebeb..0913324 100644
--- a/test/MC/PowerPC/lit.local.cfg
+++ b/test/MC/PowerPC/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s
index ca97e1b..d0bc6b3 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.s
+++ b/test/MC/PowerPC/ppc64-initial-cfa.s
@@ -28,8 +28,8 @@ _proc:
 # STATIC-NEXT:   Relocations [
 # STATIC-NEXT:   ]
 # STATIC-NEXT:   SectionData (
-# STATIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# STATIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# STATIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# STATIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # STATIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # STATIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # STATIC-BE-NEXT:  0020: 00000004 00000000
@@ -69,8 +69,8 @@ _proc:
 # PIC-NEXT:   Relocations [
 # PIC-NEXT:   ]
 # PIC-NEXT:   SectionData (
-# PIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# PIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# PIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# PIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # PIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # PIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # PIC-BE-NEXT:  0020: 00000004 00000000
diff --git a/test/MC/Sparc/lit.local.cfg b/test/MC/Sparc/lit.local.cfg
index 4d344fa..fa6a54e 100644
--- a/test/MC/Sparc/lit.local.cfg
+++ b/test/MC/Sparc/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s
index da23a4b..47dbe08 100644
--- a/test/MC/SystemZ/insn-bad-z196.s
+++ b/test/MC/SystemZ/insn-bad-z196.s
@@ -411,6 +411,60 @@
 	lbh	%r0, 524288
 
 #CHECK: error: invalid operand
+#CHECK: ldxbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: ldxbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: ldxbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: ldxbra	%f0, 16, %f0, 0
+#CHECK: error: invalid register pair
+#CHECK: ldxbra	%f0, 0, %f2, 0
+#CHECK: error: invalid register pair
+#CHECK: ldxbra	%f2, 0, %f0, 0
+
+	ldxbra	%f0, 0, %f0, -1
+	ldxbra	%f0, 0, %f0, 16
+	ldxbra	%f0, -1, %f0, 0
+	ldxbra	%f0, 16, %f0, 0
+	ldxbra	%f0, 0, %f2, 0
+	ldxbra	%f2, 0, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: ledbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: ledbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: ledbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: ledbra	%f0, 16, %f0, 0
+
+	ledbra	%f0, 0, %f0, -1
+	ledbra	%f0, 0, %f0, 16
+	ledbra	%f0, -1, %f0, 0
+	ledbra	%f0, 16, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: lexbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: lexbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: lexbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: lexbra	%f0, 16, %f0, 0
+#CHECK: error: invalid register pair
+#CHECK: lexbra	%f0, 0, %f2, 0
+#CHECK: error: invalid register pair
+#CHECK: lexbra	%f2, 0, %f0, 0
+
+	lexbra	%f0, 0, %f0, -1
+	lexbra	%f0, 0, %f0, 16
+	lexbra	%f0, -1, %f0, 0
+	lexbra	%f0, 16, %f0, 0
+	lexbra	%f0, 0, %f2, 0
+	lexbra	%f2, 0, %f0, 0
+
+#CHECK: error: invalid operand
 #CHECK: lfh	%r0, -524289
 #CHECK: error: invalid operand
 #CHECK: lfh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
index 8004168..a08cb34 100644
--- a/test/MC/SystemZ/insn-bad.s
+++ b/test/MC/SystemZ/insn-bad.s
@@ -1560,6 +1560,11 @@
 	ldxbr	%f0, %f2
 	ldxbr	%f2, %f0
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: ldxbra	%f0, 0, %f0, 0
+
+	ldxbra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid operand
 #CHECK: ldy	%f0, -524289
 #CHECK: error: invalid operand
@@ -1576,6 +1581,11 @@
 	le	%f0, -1
 	le	%f0, 4096
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: ledbra	%f0, 0, %f0, 0
+
+	ledbra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid register pair
 #CHECK: lexbr	%f0, %f2
 #CHECK: error: invalid register pair
@@ -1584,6 +1594,11 @@
 	lexbr	%f0, %f2
 	lexbr	%f2, %f0
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: lexbra	%f0, 0, %f0, 0
+
+	lexbra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid operand
 #CHECK: ley	%f0, -524289
 #CHECK: error: invalid operand
diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s
index 834bdad..db5ecdd 100644
--- a/test/MC/SystemZ/insn-good-z196.s
+++ b/test/MC/SystemZ/insn-good-z196.s
@@ -675,6 +675,48 @@
 	lbh	%r0, 524287(%r15,%r1)
 	lbh	%r15, 0
 
+#CHECK: ldxbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x45,0x00,0x00]
+#CHECK: ldxbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x45,0x0f,0x00]
+#CHECK: ldxbra	%f0, 0, %f13, 0         # encoding: [0xb3,0x45,0x00,0x0d]
+#CHECK: ldxbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x45,0xf0,0x00]
+#CHECK: ldxbra	%f4, 5, %f8, 9          # encoding: [0xb3,0x45,0x59,0x48]
+#CHECK: ldxbra	%f13, 0, %f0, 0         # encoding: [0xb3,0x45,0x00,0xd0]
+
+	ldxbra	%f0, 0, %f0, 0
+	ldxbra	%f0, 0, %f0, 15
+	ldxbra	%f0, 0, %f13, 0
+	ldxbra	%f0, 15, %f0, 0
+	ldxbra	%f4, 5, %f8, 9
+	ldxbra	%f13, 0, %f0, 0
+
+#CHECK: ledbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x44,0x00,0x00]
+#CHECK: ledbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x44,0x0f,0x00]
+#CHECK: ledbra	%f0, 0, %f15, 0         # encoding: [0xb3,0x44,0x00,0x0f]
+#CHECK: ledbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x44,0xf0,0x00]
+#CHECK: ledbra	%f4, 5, %f6, 7          # encoding: [0xb3,0x44,0x57,0x46]
+#CHECK: ledbra	%f15, 0, %f0, 0         # encoding: [0xb3,0x44,0x00,0xf0]
+
+	ledbra	%f0, 0, %f0, 0
+	ledbra	%f0, 0, %f0, 15
+	ledbra	%f0, 0, %f15, 0
+	ledbra	%f0, 15, %f0, 0
+	ledbra	%f4, 5, %f6, 7
+	ledbra	%f15, 0, %f0, 0
+
+#CHECK: lexbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x46,0x00,0x00]
+#CHECK: lexbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x46,0x0f,0x00]
+#CHECK: lexbra	%f0, 0, %f13, 0         # encoding: [0xb3,0x46,0x00,0x0d]
+#CHECK: lexbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x46,0xf0,0x00]
+#CHECK: lexbra	%f4, 5, %f8, 9          # encoding: [0xb3,0x46,0x59,0x48]
+#CHECK: lexbra	%f13, 0, %f0, 0         # encoding: [0xb3,0x46,0x00,0xd0]
+
+	lexbra	%f0, 0, %f0, 0
+	lexbra	%f0, 0, %f0, 15
+	lexbra	%f0, 0, %f13, 0
+	lexbra	%f0, 15, %f0, 0
+	lexbra	%f4, 5, %f8, 9
+	lexbra	%f13, 0, %f0, 0
+
 #CHECK: lfh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xca]
 #CHECK: lfh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xca]
 #CHECK: lfh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xca]
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index b12af09..5c02dd3 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'SystemZ' in targets:
+if not 'SystemZ' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/X86/AlignedBundling/lit.local.cfg b/test/MC/X86/AlignedBundling/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/MC/X86/AlignedBundling/lit.local.cfg
+++ b/test/MC/X86/AlignedBundling/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 2915b7a..187b512 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=ERR < %t.err %s
 
 // CHECK: vaddpd %zmm6, %zmm27, %zmm8
 // CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0xc6]
@@ -3128,6 +3129,14 @@ vpsrad 512(%rdi, %rsi, 4), %zmm12, %zmm25
 // CHECK: encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc8]
 vpbroadcastd  %xmm0, %zmm1 {%k1} {z}
 
+// CHECK: vbroadcasti32x4 {{.*}} {%k7} {z}
+// CHECK: encoding: [0x67,0x62,0xf2,0x7d,0xcf,0x5a,0x52,0x02]
+vbroadcasti32x4 0x20(%edx), %zmm2 {%k7} {z}
+
+// CHECK: vbroadcasti64x4 {{.*}} %zmm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x5b,0x72,0x02]
+vbroadcasti64x4 0x40(%rdx), %zmm22
+
 // CHECK: vmovdqu64 {{.*}} {%k3}
 // CHECK: encoding: [0x62,0xf1,0xfe,0x4b,0x7f,0x07]
 vmovdqu64 %zmm0, (%rdi) {%k3}
@@ -3151,3 +3160,62 @@ vaddpd 512(%rdi, %rsi, 8) {1to8}, %zmm20, %zmm30
 // CHECK: vaddps {{.*}}{1to16}
 // CHECK: encoding: [0x62,0x61,0x5c,0x50,0x58,0xb4,0xf7,0x00,0x02,0x00,0x00]
 vaddps 512(%rdi, %rsi, 8) {1to16}, %zmm20, %zmm30
+
+// CHECK: vmovntdqa
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x2a,0xab,0x78,0x56,0x34,0x12]
+vmovntdqa 0x12345678(%rbx), %zmm13
+
+// CHECK: vmovntdqa
+// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x14,0x56]
+vmovntdqa (%r14,%rdx,2), %zmm18
+
+// CHECK: vmovntdqa
+// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
+vmovntdqa 128(%r12,%rdx), %zmm23
+
+// CHECK: vmovntdq
+// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
+vmovntdq %zmm28, (%rcx,%r13,4)
+
+// CHECK: vmovntpd
+// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
+vmovntpd %zmm6, 4(%rdx)
+
+// CHECK: vmovntps
+// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
+vmovntps %zmm11, (%r13,%rcx,4)
+
+// CHECK: vcmpps $14
+// CHECK: encoding: [0x62,0xb1,0x54,0x48,0xc2,0xd1,0x0e]
+vcmpgtps %zmm17, %zmm5, %k2
+
+// CHECK: vcmppd $13
+// CHECK: encoding: [0x62,0xd1,0xf5,0x40,0xc2,0x76,0x02,0x0d]
+vcmpgepd 0x80(%r14), %zmm17, %k6
+
+// CHECK: vpcmpd $1,
+// CHECK: encoding: [0x62,0x93,0x45,0x4c,0x1f,0xe8,0x01]
+vpcmpd $1, %zmm24, %zmm7, %k5{%k4}
+
+// CHECK: vpcmpuq $2,
+// CHECK: encoding: [0x62,0xf3,0xf5,0x47,0x1e,0x72,0x01,0x02]
+vpcmpuq $2, 0x40(%rdx), %zmm17, %k6{%k7}
+
+// ERR: invalid operand for instruction
+vpcmpd $1, %zmm24, %zmm7, %k5{%k0}
+
+// CHECK: vpermi2d
+// CHECK: encoding: [0x62,0x42,0x6d,0x4b,0x76,0xd6]
+vpermi2d %zmm14, %zmm2, %zmm26 {%k3}
+
+// CHECK: vpermt2pd
+// CHECK: encoding: [0x62,0xf2,0xcd,0xc6,0x7f,0xf3]
+vpermt2pd %zmm3, %zmm22, %zmm6 {%k6} {z}
+
+// CHECK: vpermi2q
+// CHECK: encoding: [0x62,0x62,0xed,0x4b,0x76,0x54,0x58,0x02]
+vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3}
+
+// CHECK: vpermt2d
+// CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00]	
+vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z}
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 540282a..7968918 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -599,3 +599,11 @@ fxrstor64 opaque ptr [rax]
 // CHECK: movq _g0+8, %rcx
 mov rbx, qword ptr [_g0]
 mov rcx, qword ptr [_g0 + 8]
+
+"?half@?0??bar@@YAXXZ@4NA":
+	.quad   4602678819172646912
+
+fadd   "?half@?0??bar@@YAXXZ@4NA"
+fadd   "?half@?0??bar@@YAXXZ@4NA"@IMGREL
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"@IMGREL32
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/MC/X86/lit.local.cfg
+++ b/test/MC/X86/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/MC/X86/no-elf-compact-unwind.s b/test/MC/X86/no-elf-compact-unwind.s
index 017c52a..4e9236b 100644
--- a/test/MC/X86/no-elf-compact-unwind.s
+++ b/test/MC/X86/no-elf-compact-unwind.s
@@ -1,4 +1,5 @@
 // RUN: llvm-mc < %s -filetype=obj -triple x86_64-apple-macosx10.8.0 | llvm-readobj -s | FileCheck -check-prefix=MACHO %s
+// RUN: llvm-mc < %s -filetype=obj -triple x86_64-apple-ios7.0.0 | llvm-readobj -s | FileCheck -check-prefix=MACHO %s
 // RUN: llvm-mc < %s -filetype=obj -triple x86_64-unknown-linux | llvm-readobj -s | FileCheck -check-prefix=ELF %s
 
 	.globl	__Z3barv
diff --git a/test/MC/X86/x86_long_nop.s b/test/MC/X86/x86_long_nop.s
index ac1bc08..eee840c 100644
--- a/test/MC/X86/x86_long_nop.s
+++ b/test/MC/X86/x86_long_nop.s
@@ -2,6 +2,7 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-apple-darwin10.0 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-apple-darwin8 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=slm %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=SLM %s
 
 # Ensure alignment directives also emit sequences of 15-byte NOPs on processors
 # capable of using long NOPs.
@@ -13,3 +14,12 @@ inc %eax
 # CHECK-NEXT: 10:  nop
 # CHECK-NEXT: 1f:  nop
 # CHECK-NEXT: 20:  inc
+
+# On Silvermont we emit only 7 byte NOPs since longer NOPs are not profitable
+# SLM: 0:  inc
+# SLM-NEXT: 1:  nop
+# SLM-NEXT: 8:  nop
+# SLM-NEXT: f:  nop
+# SLM-NEXT: 16:  nop
+# SLM-NEXT: 1d:  nop
+# SLM-NEXT: 20:  inc
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index 059f591..572487b 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -14,6 +14,7 @@
 # RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3 %s | llvm-objdump -d - | FileCheck %s
 # RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3-2 %s | llvm-objdump -d - | FileCheck %s
 # RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=core2 %s | llvm-objdump -d - | FileCheck --check-prefix=NOPL %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=slm %s | llvm-objdump -d - | FileCheck --check-prefix=NOPL %s
 
 
 inc %eax
diff --git a/test/Makefile b/test/Makefile
index dc99fe1..c78c256 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -61,6 +61,15 @@ clang-tools-site-cfg: FORCE
 extra-site-cfgs:: clang-tools-site-cfg
 endif
 
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/lld/Makefile && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/lld/test
+
+# Force creation of lld's lit.site.cfg.
+lld-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/lld/test lit.site.cfg Unit/lit.site.cfg
+extra-site-cfgs:: lld-site-cfg
+endif
+
 ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/polly/Makefile && echo OK), OK)
 LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/polly/test
 
diff --git a/test/Object/ARM/lit.local.cfg b/test/Object/ARM/lit.local.cfg
index 5fc35d8..236e1d3 100644
--- a/test/Object/ARM/lit.local.cfg
+++ b/test/Object/ARM/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
diff --git a/test/Object/Inputs/corrupt-archive.a b/test/Object/Inputs/corrupt-archive.a
new file mode 100644
index 0000000..f8940ff
--- /dev/null
+++ b/test/Object/Inputs/corrupt-archive.a
diff --git a/test/Object/Inputs/darwin-m-test1.mach0-armv7 b/test/Object/Inputs/darwin-m-test1.mach0-armv7
new file mode 100644
index 0000000..2ce3a18
--- /dev/null
+++ b/test/Object/Inputs/darwin-m-test1.mach0-armv7
diff --git a/test/Object/Inputs/darwin-m-test2.macho-i386 b/test/Object/Inputs/darwin-m-test2.macho-i386
new file mode 100644
index 0000000..dc0e865
--- /dev/null
+++ b/test/Object/Inputs/darwin-m-test2.macho-i386
diff --git a/test/Object/Inputs/darwin-m-test3.macho-x86-64 b/test/Object/Inputs/darwin-m-test3.macho-x86-64
new file mode 100755
index 0000000..18960c4
--- /dev/null
+++ b/test/Object/Inputs/darwin-m-test3.macho-x86-64
diff --git a/test/Object/Inputs/hello-world.macho-x86_64 b/test/Object/Inputs/hello-world.macho-x86_64
new file mode 100755
index 0000000..d004bed
--- /dev/null
+++ b/test/Object/Inputs/hello-world.macho-x86_64
diff --git a/test/Object/Inputs/macho-archive-x86_64.a b/test/Object/Inputs/macho-archive-x86_64.a
new file mode 100644
index 0000000..9979ba9
--- /dev/null
+++ b/test/Object/Inputs/macho-archive-x86_64.a
diff --git a/test/Object/Inputs/no-sections.elf-x86-64 b/test/Object/Inputs/no-sections.elf-x86-64
new file mode 100755
index 0000000..9b8ca2e
--- /dev/null
+++ b/test/Object/Inputs/no-sections.elf-x86-64
diff --git a/test/Object/Inputs/program-headers.mips64 b/test/Object/Inputs/program-headers.mips64
new file mode 100644
index 0000000..ad21c7d
--- /dev/null
+++ b/test/Object/Inputs/program-headers.mips64
diff --git a/test/Object/Inputs/trivial.ll b/test/Object/Inputs/trivial.ll
index 25ece76..463442e 100644
--- a/test/Object/Inputs/trivial.ll
+++ b/test/Object/Inputs/trivial.ll
@@ -10,3 +10,7 @@ entry:
 declare i32 @puts(i8* nocapture) nounwind
 
 declare void @SomeOtherFunction(...)
+
+@var = global i32 0
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }]
diff --git a/test/Object/Mips/lit.local.cfg b/test/Object/Mips/lit.local.cfg
index 88262fb..7d12f7a 100644
--- a/test/Object/Mips/lit.local.cfg
+++ b/test/Object/Mips/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Mips' in targets:
+if not 'Mips' in config.root.targets:
     config.unsupported = True
diff --git a/test/Object/X86/archive-ir-asm.ll b/test/Object/X86/archive-ir-asm.ll
new file mode 100644
index 0000000..560ac17
--- /dev/null
+++ b/test/Object/X86/archive-ir-asm.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as %s -o=%t1
+; RUN: rm -f %t2
+; RUN: llvm-ar rcs %t2 %t1
+; RUN: llvm-nm -M %t2 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global global_asm_sym"
+module asm "global_asm_sym:"
+module asm "local_asm_sym:"
+module asm ".long undef_asm_sym"
+
+; CHECK: Archive map
+; CHECK-NEXT: global_asm_sym in archive-ir-asm.ll
+
+; CHECK: archive-ir-asm.ll
+; CHECK-NEXT:         T global_asm_sym
+; CHECK-NEXT:         t local_asm_sym
+; CHECK-NEXT:         U undef_asm_sym
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Object/X86/lit.local.cfg
+++ b/test/Object/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Object/nm-ir.ll b/test/Object/X86/nm-ir.ll
index ddf4125..6bb7e23 100644
--- a/test/Object/nm-ir.ll
+++ b/test/Object/X86/nm-ir.ll
@@ -10,6 +10,17 @@
 ; CHECK-NEXT: d g2
 ; CHECK-NEXT: C g3
 ; CHECK-NOT: g4
+; CHECK-NEXT: T global_asm_sym
+; CHECK-NEXT: t local_asm_sym
+; CHECK-NEXT: U undef_asm_sy
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global global_asm_sym"
+module asm "global_asm_sym:"
+module asm "local_asm_sym:"
+module asm ".long undef_asm_sym"
 
 @g1 = global i32 42
 @g2 = internal global i32 42
diff --git a/test/Object/archive-long-index.test b/test/Object/archive-long-index.test
index f2f4df6..6feb69e 100644
--- a/test/Object/archive-long-index.test
+++ b/test/Object/archive-long-index.test
@@ -1,7 +1,7 @@
 #
 # Check if the index is appearing properly in the output file
 #
-RUN: llvm-nm -s %p/Inputs/liblong_filenames.a | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm -M %p/Inputs/liblong_filenames.a | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 88c9c98..0899828 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -1,6 +1,6 @@
 RUN: rm -f %t.a
 RUN: llvm-ar rcs %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
-RUN: llvm-nm -s %t.a | FileCheck %s
+RUN: llvm-nm -M %t.a | FileCheck %s
 
 CHECK: Archive map
 CHECK-NEXT: main in trivial-object-test.elf-x86-64
@@ -12,24 +12,25 @@ CHECK: trivial-object-test.elf-x86-64:
 CHECK-NEXT:                  U SomeOtherFunction
 CHECK-NEXT: 0000000000000000 T main
 CHECK-NEXT:                  U puts
-CHECK-NEXT: trivial-object-test2.elf-x86-64:
+
+CHECK: trivial-object-test2.elf-x86-64:
 CHECK-NEXT: 0000000000000000 t bar
 CHECK-NEXT: 0000000000000006 T foo
 CHECK-NEXT: 0000000000000016 T main
 
 RUN: rm -f %t.a
 RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
-RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=NOMAP
+RUN: llvm-nm -M %t.a | FileCheck %s --check-prefix=NOMAP
 
 NOMAP-NOT: Archive map
 
 RUN: llvm-ar s %t.a
-RUN: llvm-nm -s %t.a | FileCheck %s
+RUN: llvm-nm -M %t.a | FileCheck %s
 
 check that the archive does have a corrupt symbol table.
 RUN: rm -f %t.a
 RUN: cp %p/Inputs/archive-test.a-corrupt-symbol-table %t.a
-RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=CORRUPT
+RUN: llvm-nm -M %t.a | FileCheck %s --check-prefix=CORRUPT
 
 CORRUPT: Archive map
 CORRUPT-NEXT: mbin in trivial-object-test.elf-x86-64
@@ -40,20 +41,27 @@ CORRUPT: trivial-object-test.elf-x86-64:
 CORRUPT-NEXT:                  U SomeOtherFunction
 CORRUPT-NEXT: 0000000000000000 T main
 CORRUPT-NEXT:                  U puts
-CORRUPT-NEXT: trivial-object-test2.elf-x86-64:
+
+CORRUPT: trivial-object-test2.elf-x86-64:
 CORRUPT-NEXT: 0000000000000000 t bar
 CORRUPT-NEXT: 0000000000000006 T foo
 CORRUPT-NEXT: 0000000000000016 T main
 
 check that the we *don't* update the symbol table.
 RUN: llvm-ar s %t.a
-RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=CORRUPT
+RUN: llvm-nm -M %t.a | FileCheck %s --check-prefix=CORRUPT
 
 repeate the test with llvm-ranlib
 
 RUN: rm -f %t.a
 RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
-RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=NOMAP
+RUN: llvm-nm -M %t.a | FileCheck %s --check-prefix=NOMAP
 
 RUN: llvm-ranlib %t.a
-RUN: llvm-nm -s %t.a | FileCheck %s
+RUN: llvm-nm -M %t.a | FileCheck %s
+
+RUN: llvm-nm -M %p/Inputs/macho-archive-x86_64.a | FileCheck %s --check-prefix=BSD-MachO
+
+BSD-MachO: Archive map
+BSD-MachO: _bar in bar.o
+BSD-MachO: _foo in foo.o
diff --git a/test/Object/coff-archive-short.test b/test/Object/coff-archive-short.test
index fa531b3..2aee956 100644
--- a/test/Object/coff-archive-short.test
+++ b/test/Object/coff-archive-short.test
@@ -5,7 +5,7 @@
 # than 15 characters, thus, unlike coff_archive.lib, it has no string
 # table as the third member.
 #
-RUN: llvm-nm --numeric-sort -s %p/Inputs/coff_archive_short.lib | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm --numeric-sort -M %p/Inputs/coff_archive_short.lib | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: _shortfn1 in short1.obj
diff --git a/test/Object/coff-archive.test b/test/Object/coff-archive.test
index 768fe1c..3b0aa0c 100644
--- a/test/Object/coff-archive.test
+++ b/test/Object/coff-archive.test
@@ -1,7 +1,7 @@
 #
 # Check if the index is appearing properly in the output file 
 #
-RUN: llvm-nm --numeric-sort -s %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm --numeric-sort -M %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: ??0invalid_argument@std@@QAE@PBD@Z in Debug\mymath.obj
diff --git a/test/Object/directory.ll b/test/Object/directory.ll
index 48eefcb..c4b0bbf 100644
--- a/test/Object/directory.ll
+++ b/test/Object/directory.ll
@@ -1,6 +1,6 @@
 ;RUN: rm -f %T/test.a
 ;RUN: not llvm-ar r %T/test.a . 2>&1 | FileCheck %s
-;CHECK: .: Is a directory
+;CHECK: .: {{I|i}}s a directory
 
 ;RUN: rm -f %T/test.a
 ;RUN: touch %T/a-very-long-file-name
diff --git a/test/Object/mangle-ir.ll b/test/Object/mangle-ir.ll
index 725d788..5b3cd09 100644
--- a/test/Object/mangle-ir.ll
+++ b/test/Object/mangle-ir.ll
@@ -2,7 +2,13 @@
 
 target datalayout = "m:o"
 
+; CHECK-NOT: memcpy
 ; CHECK: T _f
+; CHECK-NOT: memcpy
+
 define void @f() {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* null, i64 0, i32 1, i1 false)
   ret void
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index fbbf051..7dbc22a 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -18,6 +18,7 @@ RUN: llvm-nm %t2 | FileCheck %s -check-prefix BITCODE
 BITCODE:          U SomeOtherFunction
 BITCODE-NEXT:          T main
 BITCODE-NEXT:          U puts
+BITCODE-NEXT:          D var
 
 
 Test we don't error with an archive with no symtab.
@@ -29,7 +30,13 @@ RUN: llvm-nm %p/Inputs/archive-test.a-gnu-minimal
 
 
 And don't crash when asked to print a non-existing symtab.
-RUN: llvm-nm -s %p/Inputs/archive-test.a-gnu-minimal
+RUN: llvm-nm -M %p/Inputs/archive-test.a-gnu-minimal
 
 Don't reject an empty archive.
 RUN: llvm-nm %p/Inputs/archive-test.a-empty
+
+This archive has an unaligned member and a unknown format member.
+GNU AR is able to parse the unaligned member and warns about the member with
+the unknown format. We should probably simply warn on both. For now just check
+that we don't produce an error.
+RUN: llvm-nm %p/Inputs/corrupt-archive.a
diff --git a/test/Object/nm-darwin-m.test b/test/Object/nm-darwin-m.test
new file mode 100644
index 0000000..5bb19dc
--- /dev/null
+++ b/test/Object/nm-darwin-m.test
@@ -0,0 +1,53 @@
+RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test1.mach0-armv7 \
+RUN:         | FileCheck %s -check-prefix test1
+RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test2.macho-i386 \
+RUN:         | FileCheck %s -check-prefix test2
+RUN: llvm-nm -m %p/Inputs/darwin-m-test3.macho-x86-64 \
+RUN:         | FileCheck %s -check-prefix test3
+
+# This is testing that the various bits in the n_desc feild are correct
+test1: 00000001 (absolute) non-external _a
+test1: 00000008 (common) (alignment 2^2) external _c
+test1: 0000000a (__DATA,__data) non-external [no dead strip] _d
+test1: 00000004 (__TEXT,__text) non-external [alt entry] _e
+test1: 00000000 (__TEXT,__text) non-external [symbol resolver] _r
+test1: 00000008 (__TEXT,__text) non-external [Thumb] _t
+
+# This is testing that an N_INDR symbol gets its alias name, the "(for ...)"
+test2:          (undefined) external __i
+test2:          (indirect) external _i (for __i)
+
+# This is testing is using darwin-m-test3.macho-x86-64 that is linked with
+# dylibs that have the follow set of -install_names:
+#	Foo.framework/Foo 
+#	/System/Library/Frameworks/FooPath.framework/FooPath 
+#	FooSuffix.framework/FooSuffix_debug
+#	/System/Library/Frameworks/FooPathSuffix.framework/FooPathSuffix_profile
+#	FooVers.framework/Versions/A/FooVers
+#	/System/Library/Frameworks/FooPathVers.framework/Versions/B/FooPathVers
+#	libx.dylib
+#	libxSuffix_profile.dylib
+#	/usr/local/lib/libxPathSuffix_debug.dylib
+#	libATS.A_profile.dylib
+#	/usr/lib/libPathATS.A_profile.dylib
+#	QT.A.qtx
+#	/lib/QTPath.qtx
+#	/usr/lib/libSystem.B.dylib
+# to test that MachOObjectFile::guessLibraryShortName() is correctly parsing 
+# them into their short names.
+test3: 0000000100000000 (__TEXT,__text) [referenced dynamically] external __mh_execute_header
+test3:                  (undefined) external _atsPathVersSuffix (from libPathATS)
+test3:                  (undefined) external _atsVersSuffix (from libATS)
+test3:                  (undefined) external _foo (from Foo)
+test3:                  (undefined) external _fooPath (from FooPath)
+test3:                  (undefined) external _fooPathSuffix (from FooPathSuffix)
+test3:                  (undefined) external _fooPathVers (from FooPathVers)
+test3:                  (undefined) external _fooSuffix (from FooSuffix)
+test3:                  (undefined) external _fooVers (from FooVers)
+test3: 0000000100000e60 (__TEXT,__text) external _main
+test3:                  (undefined) external _qt (from QT)
+test3:                  (undefined) external _qtPath (from QTPath)
+test3:                  (undefined) external _x (from libx)
+test3:                  (undefined) external _xPathSuffix (from libxPathSuffix)
+test3:                  (undefined) external _xSuffix (from libxSuffix)
+test3:                  (undefined) external dyld_stub_binder (from libSystem)
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 20ac662..656d6b0 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,6 +1,6 @@
-RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm - \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm \
+RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm - \
 RUN:         | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
@@ -12,14 +12,27 @@ RUN: llvm-nm %p/Inputs/absolute.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ABSOLUTE-ELF64
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
 RUN:         | FileCheck %s -check-prefix macho
+RUN: llvm-nm -U %p/Inputs/trivial-object-test.macho-i386 \
+RUN:         | FileCheck %s -check-prefix macho-U
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
 RUN:         | FileCheck %s -check-prefix macho64
+RUN: llvm-nm %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-tdb
+RUN: llvm-nm -j %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-j
+RUN: llvm-nm -r %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-r
 RUN: llvm-nm %p/Inputs/common.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF-COMMON
 RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF-SEC-ADDR64
 RUN: llvm-nm %p/Inputs/thumb-symbols.elf.arm \
 RUN:         | FileCheck %s -check-prefix ELF-THUMB
+RUN: mkdir -p %t
+RUN: cd %t
+RUN: cp %p/Inputs/trivial-object-test.macho-i386 a.out
+RUN: llvm-nm | FileCheck %s -check-prefix A-OUT
+REQUIRES: shell
 
 
 COFF: 00000000 d .data
@@ -59,11 +72,32 @@ macho:          U _SomeOtherFunction
 macho: 00000000 T _main
 macho:          U _puts
 
+macho-U-NOT:          U _SomeOtherFunction
+macho-U: 00000000 T _main
+macho-U-NOT:          U _puts
+
 macho64: 0000000000000028 s L_.str
 macho64:                  U _SomeOtherFunction
 macho64: 0000000000000000 T _main
 macho64:                  U _puts
 
+macho-tdb: 0000000000000030 s EH_frame0
+macho-tdb: 0000000000000070 b _b
+macho-tdb: 000000000000000c D _d
+macho-tdb: 0000000000000000 T _t
+macho-tdb: 0000000000000048 S _t.eh
+
+macho-j: EH_frame0
+macho-j: _b
+macho-j: _d
+macho-j: _t
+macho-j: _t.eh
+
+macho-r: 0000000000000048 S _t.eh
+macho-r-NEXT: 0000000000000000 T _t
+macho-r-NEXT: 000000000000000c D _d
+macho-r-NEXT: 0000000000000070 b _b
+macho-r-NEXT: 0000000000000030 s EH_frame0
 
 Test that nm uses addresses even with ELF .o files.
 ELF-SEC-ADDR64:      0000000000000058 D a
@@ -76,3 +110,7 @@ ELF-SEC-ADDR64-NEXT: 0000000000000060 D p
 Test that we drop the thumb bit only from function addresses.
 ELF-THUMB: 00000000 t f
 ELF-THUMB: 00000003 t g
+
+A-OUT:          U _SomeOtherFunction
+A-OUT: 00000000 T _main
+A-OUT:          U _puts
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index c20c733..889377b 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -1,19 +1,31 @@
-RUN: llvm-nm %p/Inputs/macho-universal.x86_64.i386 \
+RUN: llvm-nm -arch all %p/Inputs/macho-universal.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-OBJ
-RUN: llvm-nm %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN: llvm-nm -arch x86_64 %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix CHECK-OBJ-x86_64
+RUN: llvm-nm -arch all %p/Inputs/macho-universal-archive.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-AR
+RUN: llvm-nm -arch i386 %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix CHECK-AR-i386
 
-CHECK-OBJ: macho-universal.x86_64.i386:x86_64
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture x86_64):
 CHECK-OBJ: 0000000100000f60 T _main
-CHECK-OBJ: macho-universal.x86_64.i386:i386
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture i386):
 CHECK-OBJ: 00001fa0 T _main
 
-CHECK-AR: macho-universal-archive.x86_64.i386:x86_64:hello.o:
+CHECK-OBJ-x86_64: 0000000100000000 T __mh_execute_header
+CHECK-OBJ-x86_64: 0000000100000f60 T _main
+CHECK-OBJ-x86_64:                  U dyld_stub_binder
+
+CHECK-AR: macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64):
 CHECK-AR: 0000000000000068 s EH_frame0
 CHECK-AR: 000000000000003b s L_.str
 CHECK-AR: 0000000000000000 T _main
 CHECK-AR: 0000000000000080 S _main.eh
 CHECK-AR:                  U _printf
-CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o:
-CHECK-AR: 00000008 S _bar
+CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386):
+CHECK-AR: 00000008 D _bar
 CHECK-AR: 00000000 T _foo
+
+CHECK-AR-i386: macho-universal-archive.x86_64.i386(foo.o):
+CHECK-AR-i386: 00000008 D _bar
+CHECK-AR-i386: 00000000 T _foo
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index 1c15263..98b40d5 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -201,8 +201,8 @@ ELF-MIPSEL-NEXT:     Content:         0000023C00004224E8FFBD271400BFAF1000B0AF21
 ELF-MIPSEL-NEXT:   - Name:            .rel.text
 ELF-MIPSEL-NEXT:     Type:            SHT_REL
 ELF-MIPSEL-NEXT:     Link:            .symtab
-ELF-MIPSEL-NEXT:     Info:            .text
 ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPSEL-NEXT:     Info:            .text
 ELF-MIPSEL-NEXT:     Relocations:
 ELF-MIPSEL-NEXT:       - Offset:          0
 ELF-MIPSEL-NEXT:         Symbol:          _gp_disp
@@ -300,8 +300,8 @@ ELF-MIPS64EL-NEXT:     Content:         '00000000000000000000000000000000'
 ELF-MIPS64EL-NEXT:   - Name:            .rela.data
 ELF-MIPS64EL-NEXT:     Type:            SHT_RELA
 ELF-MIPS64EL-NEXT:     Link:            .symtab
-ELF-MIPS64EL-NEXT:     Info:            .data
 ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000008
+ELF-MIPS64EL-NEXT:     Info:            .data
 ELF-MIPS64EL-NEXT:     Relocations:
 ELF-MIPS64EL-NEXT:       - Offset:          0
 ELF-MIPS64EL-NEXT:         Symbol:          zed
@@ -370,8 +370,8 @@ ELF-X86-64-NEXT:   - Name:            .rela.text
 ELF-X86-64-NEXT:     Type:            SHT_RELA
 ELF-X86-64-NEXT:     Address:         0x0000000000000038
 ELF-X86-64-NEXT:     Link:            .symtab
-ELF-X86-64-NEXT:     Info:            .text
 ELF-X86-64-NEXT:     AddressAlign:    0x0000000000000008
+ELF-X86-64-NEXT:     Info:            .text
 ELF-X86-64-NEXT:     Relocations:
 ELF-X86-64-NEXT:       - Offset:          0x000000000000000D
 ELF-X86-64-NEXT:         Symbol:          .rodata.str1.1
diff --git a/test/Object/objdump-no-sectionheaders.test b/test/Object/objdump-no-sectionheaders.test
new file mode 100644
index 0000000..5130100
--- /dev/null
+++ b/test/Object/objdump-no-sectionheaders.test
@@ -0,0 +1,6 @@
+; RUN: llvm-objdump -h %p/Inputs/no-sections.elf-x86-64 \
+; RUN:              | FileCheck %s
+
+; CHECK: Sections:
+; CHECK: Idx Name          Size      Address          Type
+; CHECK-NOT: {{.}}
diff --git a/test/Object/simple-archive.test b/test/Object/simple-archive.test
index 3e6760e..085a91e 100644
--- a/test/Object/simple-archive.test
+++ b/test/Object/simple-archive.test
@@ -1,7 +1,7 @@
 #
 # Check if the index is appearing properly in the output file
 #
-RUN: llvm-nm -s %p/Inputs/libsimple_archive.a | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm -M %p/Inputs/libsimple_archive.a | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 6ecdf5c..a6d3d1c 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -2,6 +2,22 @@ RUN: llvm-size -A %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix A
 RUN: llvm-size -B %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix B
+RUN: llvm-size -format darwin %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix m
+RUN: llvm-size %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix AR
+RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix mAR
+RUN: llvm-size -m -x -l %p/Inputs/hello-world.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix mxl
+RUN: llvm-size -arch all %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix u
+RUN: llvm-size -arch i386 %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix u-i386
+RUN: llvm-size -arch all %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix uAR
+RUN: llvm-size -arch x86_64 %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix uAR-x86_64
 
 A: section              size   addr
 A: __text                 12      0
@@ -11,5 +27,63 @@ A: __compact_unwind       32     16
 A: __eh_frame             64     48
 A: Total                 116
 
-B:   text    data     bss     dec     hex filename
-B:     12     100       4     116      74 
+B:	__TEXT	__DATA	__OBJC	others	dec	hex
+B:	76	8	0	32	116	74	
+
+m: Segment : 116
+m: 	Section (__TEXT, __text): 12
+m: 	Section (__DATA, __data): 4
+m: 	Section (__DATA, __bss): 4
+m: 	Section (__LD, __compact_unwind): 32
+m: 	Section (__TEXT, __eh_frame): 64
+m: 	total 116
+m: total 116
+
+AR: __TEXT	__DATA	__OBJC	others	dec	hex
+AR: 70	0	0	32	102	66	{{.*}}/macho-archive-x86_64.a(foo.o)
+AR: 0	4	0	0	4	4	{{.*}}/macho-archive-x86_64.a(bar.o)
+
+mAR: {{.*}}/macho-archive-x86_64.a(foo.o):
+mAR: Segment : 104
+mAR: 	Section (__TEXT, __text): 6
+mAR: 	Section (__LD, __compact_unwind): 32
+mAR: 	Section (__TEXT, __eh_frame): 64
+mAR: 	total 102
+mAR: total 104
+mAR: {{.*}}/macho-archive-x86_64.a(bar.o):
+mAR: Segment : 4
+mAR: 	Section (__TEXT, __text): 0
+mAR: 	Section (__DATA, __data): 4
+mAR: 	total 4
+mAR: total 4
+
+
+mxl: Segment __PAGEZERO: 0x100000000 (vmaddr 0x0 fileoff 0)
+mxl: Segment __TEXT: 0x1000 (vmaddr 0x100000000 fileoff 0)
+mxl: 	Section __text: 0x3b (addr 0x100000f30 offset 3888)
+mxl: 	Section __stubs: 0x6 (addr 0x100000f6c offset 3948)
+mxl: 	Section __stub_helper: 0x1a (addr 0x100000f74 offset 3956)
+mxl: 	Section __cstring: 0xd (addr 0x100000f8e offset 3982)
+mxl: 	Section __unwind_info: 0x48 (addr 0x100000f9b offset 3995)
+mxl: 	Section __eh_frame: 0x18 (addr 0x100000fe8 offset 4072)
+mxl: 	total 0xc8
+mxl: Segment __DATA: 0x1000 (vmaddr 0x100001000 fileoff 4096)
+mxl: 	Section __nl_symbol_ptr: 0x10 (addr 0x100001000 offset 4096)
+mxl: 	Section __la_symbol_ptr: 0x8 (addr 0x100001010 offset 4112)
+mxl: 	total 0x18
+mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192)
+mxl: total 0x100003000
+
+u: __TEXT	__DATA	__OBJC	others	dec	hex
+u: 4096	0	0	4294971392	4294975488	100002000	{{.*}}/macho-universal.x86_64.i386 (for architecture x86_64)
+u: 4096	0	0	8192	12288	3000	{{.*}}/macho-universal.x86_64.i386 (for architecture i386)
+
+u-i386: __TEXT	__DATA	__OBJC	others	dec	hex
+u-i386: 4096	0	0	8192	12288	3000	
+
+uAR: __TEXT	__DATA	__OBJC	others	dec	hex
+uAR: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64)
+uAR: 5	4	0	0	9	9	{{.*}}/macho-universal-archive.x86_64.i386(foo.o) (for architecture i386)
+
+uAR-x86_64: __TEXT	__DATA	__OBJC	others	dec	hex
+uAR-x86_64: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386(hello.o)
diff --git a/test/Object/yaml2obj-coff-multi-doc.test b/test/Object/yaml2obj-coff-multi-doc.test
new file mode 100644
index 0000000..1cf7203
--- /dev/null
+++ b/test/Object/yaml2obj-coff-multi-doc.test
@@ -0,0 +1,91 @@
+# RUN: yaml2obj -format=coff -docnum=1 %s \
+# RUN:   | llvm-readobj -symbols - | FileCheck -check-prefix=DOC1 %s
+# RUN: yaml2obj -format=coff -docnum=2 %s \
+# RUN:   | llvm-readobj -symbols - | FileCheck -check-prefix=DOC2 %s
+# RUN: not yaml2obj -format=coff -docnum=3 %s 2>&1 \
+# RUN:   | FileCheck -check-prefix=DOC3 %s
+
+# DOC1: Name: _sym1
+# DOC2: Name: _sym2
+# DOC3: yaml2obj: Cannot find the 3rd document
+
+---
+header:
+  Machine: IMAGE_FILE_MACHINE_I386
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
+
+sections:
+  - Name: .text
+    Alignment: 16
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE,
+                       IMAGE_SCN_MEM_READ ]
+    SectionData: "00000000"
+
+symbols:
+  - Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          36
+      NumberOfRelocations: 3
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
+
+  - Name: _main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+
+  - Name: _sym1
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+
+---
+header:
+  Machine: IMAGE_FILE_MACHINE_I386
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
+
+sections:
+  - Name: .text
+    Alignment: 16
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE,
+                       IMAGE_SCN_MEM_READ ]
+    SectionData: "00000000"
+
+symbols:
+  - Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          36
+      NumberOfRelocations: 3
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
+
+  - Name: _main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+
+  - Name: _sym2
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+...
diff --git a/test/Object/yaml2obj-elf-multi-doc.test b/test/Object/yaml2obj-elf-multi-doc.test
new file mode 100644
index 0000000..c51f803
--- /dev/null
+++ b/test/Object/yaml2obj-elf-multi-doc.test
@@ -0,0 +1,56 @@
+# RUN: yaml2obj -format=elf -docnum=1 %s \
+# RUN:   | llvm-readobj -symbols - | FileCheck -check-prefix=DOC1 %s
+# RUN: yaml2obj -format=elf -docnum=2 %s \
+# RUN:   | llvm-readobj -symbols - | FileCheck -check-prefix=DOC2 %s
+# RUN: not yaml2obj -format=elf -docnum=3 %s 2>&1 \
+# RUN:   | FileCheck -check-prefix=DOC3 %s
+
+# DOC1: Name: T1 (1)
+# DOC2: Name: T2 (1)
+# DOC3: yaml2obj: Cannot find the 3rd document
+
+--- !ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_MIPS
+  Flags: [EF_MIPS_CPIC]
+
+Sections:
+- Name: .text
+  Type: SHT_PROGBITS
+  Content:  "0000000000000000"
+  AddressAlign: 16
+  Flags: [SHF_EXECINSTR, SHF_ALLOC]
+
+Symbols:
+  Global:
+    - Name: T1
+      Section: .text
+      Type: STT_FUNC
+      Value: 0x0
+      Size: 8
+
+--- !ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_MIPS
+
+Sections:
+- Name: .text
+  Type: SHT_PROGBITS
+  Content:  "00000000"
+  AddressAlign: 16
+  Flags: [SHF_EXECINSTR, SHF_ALLOC]
+
+Symbols:
+  Global:
+    - Name: T2
+      Section: .text
+      Type: STT_FUNC
+      Value: 0x0
+      Size: 4
+...
diff --git a/test/Object/yaml2obj-elf-symbol-visibility.yaml b/test/Object/yaml2obj-elf-symbol-visibility.yaml
new file mode 100644
index 0000000..113354a
--- /dev/null
+++ b/test/Object/yaml2obj-elf-symbol-visibility.yaml
@@ -0,0 +1,126 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -symbols - | \
+# RUN:   FileCheck --check-prefix OBJ %s
+# RUN: yaml2obj -format=elf %s | obj2yaml - | FileCheck --check-prefix YAML %s
+
+# OBJ:      Symbol {
+# OBJ:        Name: default1 (36)
+# OBJ-NEXT:   Value: 0x0
+# OBJ-NEXT:   Size: 4
+# OBJ-NEXT:   Binding: Global (0x1)
+# OBJ-NEXT:   Type: Object (0x1)
+# OBJ-NEXT:   Other: 0
+# OBJ-NEXT:   Section: .data (0x1)
+# OBJ-NEXT: }
+# OBJ-NEXT: Symbol {
+# OBJ-NEXT:   Name: default2 (27)
+# OBJ-NEXT:   Value: 0x4
+# OBJ-NEXT:   Size: 4
+# OBJ-NEXT:   Binding: Global (0x1)
+# OBJ-NEXT:   Type: Object (0x1)
+# OBJ-NEXT:   Other: 0
+# OBJ-NEXT:   Section: .data (0x1)
+# OBJ-NEXT: }
+# OBJ-NEXT: Symbol {
+# OBJ-NEXT:   Name: internal (8)
+# OBJ-NEXT:   Value: 0x8
+# OBJ-NEXT:   Size: 4
+# OBJ-NEXT:   Binding: Global (0x1)
+# OBJ-NEXT:   Type: Object (0x1)
+# OBJ-NEXT:   Other: 1
+# OBJ-NEXT:   Section: .data (0x1)
+# OBJ-NEXT: }
+# OBJ-NEXT: Symbol {
+# OBJ-NEXT:   Name: hidden (1)
+# OBJ-NEXT:   Value: 0xC
+# OBJ-NEXT:   Size: 4
+# OBJ-NEXT:   Binding: Global (0x1)
+# OBJ-NEXT:   Type: Object (0x1)
+# OBJ-NEXT:   Other: 2
+# OBJ-NEXT:   Section: .data (0x1)
+# OBJ-NEXT: }
+# OBJ-NEXT: Symbol {
+# OBJ-NEXT:   Name: protected (17)
+# OBJ-NEXT:   Value: 0x10
+# OBJ-NEXT:   Size: 4
+# OBJ-NEXT:   Binding: Global (0x1)
+# OBJ-NEXT:   Type: Object (0x1)
+# OBJ-NEXT:   Other: 3
+# OBJ-NEXT:   Section: .data (0x1)
+# OBJ-NEXT: }
+
+# YAML:      Symbols:
+# YAML-NEXT:   Global:
+# YAML-NEXT:     - Name:            default1
+# YAML-NEXT:       Type:            STT_OBJECT
+# YAML-NEXT:       Section:         .data
+# YAML-NEXT:       Size:            0x0000000000000004
+# YAML-NEXT:     - Name:            default2
+# YAML-NEXT:       Type:            STT_OBJECT
+# YAML-NEXT:       Section:         .data
+# YAML-NEXT:       Value:           0x0000000000000004
+# YAML-NEXT:       Size:            0x0000000000000004
+# YAML-NEXT:     - Name:            internal
+# YAML-NEXT:       Type:            STT_OBJECT
+# YAML-NEXT:       Section:         .data
+# YAML-NEXT:       Value:           0x0000000000000008
+# YAML-NEXT:       Size:            0x0000000000000004
+# YAML-NEXT:       Visibility:      STV_INTERNAL
+# YAML-NEXT:     - Name:            hidden
+# YAML-NEXT:       Type:            STT_OBJECT
+# YAML-NEXT:       Section:         .data
+# YAML-NEXT:       Value:           0x000000000000000C
+# YAML-NEXT:       Size:            0x0000000000000004
+# YAML-NEXT:       Visibility:      STV_HIDDEN
+# YAML-NEXT:     - Name:            protected
+# YAML-NEXT:       Type:            STT_OBJECT
+# YAML-NEXT:       Section:         .data
+# YAML-NEXT:       Value:           0x0000000000000010
+# YAML-NEXT:       Size:            0x0000000000000004
+# YAML-NEXT:       Visibility:      STV_PROTECTED
+
+---
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_MIPS
+  Flags:           [ EF_MIPS_ABI_O32, EF_MIPS_ARCH_32 ]
+
+Sections:
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_WRITE ]
+    AddressAlign:    0x04
+    Size:            0x14
+
+Symbols:
+  Global:
+    - Name:            default1
+      Type:            STT_OBJECT
+      Visibility:      STV_DEFAULT
+      Section:         .data
+      Value:           0x00
+      Size:            0x04
+    - Name:            default2
+      Type:            STT_OBJECT
+      Section:         .data
+      Value:           0x04
+      Size:            0x04
+    - Name:            internal
+      Type:            STT_OBJECT
+      Visibility:      STV_INTERNAL
+      Section:         .data
+      Value:           0x08
+      Size:            0x04
+    - Name:            hidden
+      Type:            STT_OBJECT
+      Visibility:      STV_HIDDEN
+      Section:         .data
+      Value:           0x0C
+      Size:            0x04
+    - Name:            protected
+      Type:            STT_OBJECT
+      Visibility:      STV_PROTECTED
+      Section:         .data
+      Value:           0x10
+      Size:            0x04
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Other/X86/lit.local.cfg
+++ b/test/Other/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index aed4145..3874898 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -457,7 +457,7 @@ define i8* @different_addrspace() nounwind noinline {
   %p = getelementptr inbounds i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*),
                                   i32 2
   ret i8* %p
-; OPT: ret i8* getelementptr (i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
+; OPT: ret i8* getelementptr (i8* addrspacecast (i8 addrspace(12)* getelementptr inbounds ([4 x i8] addrspace(12)* @p12, i32 0, i32 0) to i8*), i32 2)
 }
 
 define i8* @same_addrspace() nounwind noinline {
@@ -467,4 +467,21 @@ define i8* @same_addrspace() nounwind noinline {
 ; OPT: ret i8* getelementptr inbounds ([4 x i8]* @p0, i32 0, i32 2)
 }
 
+@gv1 = internal global i32 1
+@gv2 = internal global [1 x i32] [ i32 2 ]
+@gv3 = internal global [1 x i32] [ i32 2 ]
+
+; Handled by TI-independent constant folder
+define i1 @gv_gep_vs_gv() {
+  ret i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @gv2, i32 0, i32 0), i32* @gv1)
+}
+; PLAIN: gv_gep_vs_gv
+; PLAIN: ret i1 false
+
+define i1 @gv_gep_vs_gv_gep() {
+  ret i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @gv2, i32 0, i32 0), i32* getelementptr inbounds ([1 x i32]* @gv3, i32 0, i32 0))
+}
+; PLAIN: gv_gep_vs_gv_gep
+; PLAIN: ret i1 false
+
 ; CHECK: attributes #0 = { nounwind }
diff --git a/test/Other/llvm-nm-without-aliases.ll b/test/Other/llvm-nm-without-aliases.ll
index 9d9408c..6ef72c7 100644
--- a/test/Other/llvm-nm-without-aliases.ll
+++ b/test/Other/llvm-nm-without-aliases.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s > %t
-; RUN: llvm-nm -without-aliases < %t | FileCheck %s
-; RUN: llvm-nm < %t | FileCheck --check-prefix=WITH %s
+; RUN: llvm-nm -without-aliases - < %t | FileCheck %s
+; RUN: llvm-nm - < %t | FileCheck --check-prefix=WITH %s
 
 ; CHECK-NOT: T a0bar
 ; CHECK-NOT: T a0foo
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index 4aacc74..25208fa 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -51,8 +51,10 @@ foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
 // CHECK: string Name = "R7";
 // CHECK: int Index = 7;
 
-foreach i = {0-3,9-7} in
+foreach i = {0-3,9-7} in {
   def S#i : Register<"Q"#i, i>;
+  def : Register<"T"#i, i>;
+}
 
 // CHECK: def S0
 // CHECK: def S1
@@ -61,3 +63,25 @@ foreach i = {0-3,9-7} in
 // CHECK: def S7
 // CHECK: def S8
 // CHECK: def S9
+
+// CHECK: def
+// CHECK: string Name = "T0";
+
+// CHECK: def
+// CHECK: string Name = "T1";
+
+// CHECK: def
+// CHECK: string Name = "T2";
+
+// CHECK: def
+// CHECK: string Name = "T3";
+
+// CHECK: def
+// CHECK: string Name = "T9";
+
+// CHECK: def
+// CHECK: string Name = "T8";
+
+// CHECK: def
+// CHECK: string Name = "T7";
+
diff --git a/test/TableGen/if-empty-list-arg.td b/test/TableGen/if-empty-list-arg.td
new file mode 100644
index 0000000..39edf58
--- /dev/null
+++ b/test/TableGen/if-empty-list-arg.td
@@ -0,0 +1,7 @@
+// RUN: llvm-tblgen %s
+// XFAIL: vg_leak
+
+class C<bit cond> {
+  list<int> X = !if(cond, [1, 2, 3], []);
+  list<int> Y = !if(cond, [], [4, 5, 6]);
+}
diff --git a/test/Transforms/ArgumentPromotion/basictest.ll b/test/Transforms/ArgumentPromotion/basictest.ll
index d3d21fc..8f78b98 100644
--- a/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,23 +1,29 @@
-; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | not grep alloca
+; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
 define internal i32 @test(i32* %X, i32* %Y) {
-        %A = load i32* %X               ; <i32> [#uses=1]
-        %B = load i32* %Y               ; <i32> [#uses=1]
-        %C = add i32 %A, %B             ; <i32> [#uses=1]
-        ret i32 %C
+; CHECK-LABEL: define internal i32 @test(i32 %X.val, i32 %Y.val)
+  %A = load i32* %X
+  %B = load i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
 }
 
 define internal i32 @caller(i32* %B) {
-        %A = alloca i32         ; <i32*> [#uses=2]
-        store i32 1, i32* %A
-        %C = call i32 @test( i32* %A, i32* %B )         ; <i32> [#uses=1]
-        ret i32 %C
+; CHECK-LABEL: define internal i32 @caller(i32 %B.val1)
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @test(i32* %A, i32* %B)
+; CHECK: call i32 @test(i32 1, i32 %B.val1)
+  ret i32 %C
 }
 
 define i32 @callercaller() {
-        %B = alloca i32         ; <i32*> [#uses=2]
-        store i32 2, i32* %B
-        %X = call i32 @caller( i32* %B )                ; <i32> [#uses=1]
-        ret i32 %X
+; CHECK-LABEL: define i32 @callercaller()
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+; CHECK: call i32 @caller(i32 2)
+  ret i32 %X
 }
 
diff --git a/test/Transforms/ArgumentPromotion/byval-2.ll b/test/Transforms/ArgumentPromotion/byval-2.ll
index 368c689..b412f5e 100644
--- a/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -1,26 +1,31 @@
-; RUN: opt < %s -argpromotion -S | grep -F "i32* byval" | count 2
-; Argpromote + scalarrepl should change this to passing the two integers by value.
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
-	%struct.ss = type { i32, i64 }
+; Arg promotion eliminates the struct argument.
+; FIXME: Should it eliminate the i32* argument?
+
+%struct.ss = type { i32, i64 }
 
 define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1, i32* byval %X)
 entry:
-	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0
-	%tmp1 = load i32* %tmp, align 4
-	%tmp2 = add i32 %tmp1, 1	
-	store i32 %tmp2, i32* %tmp, align 4
+  %tmp = getelementptr %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
 
-	store i32 0, i32* %X
-	ret void
+  store i32 0, i32* %X
+  ret void
 }
 
 define i32 @test(i32* %X) {
+; CHECK-LABEL: define i32 @test
 entry:
-	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
-	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp1, align 8
-	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
-	store i64 2, i64* %tmp4, align 4
-	call void @f( %struct.ss* byval %S, i32* byval %X) 
-	ret i32 0
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval %S, i32* byval %X)
+; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}}, i32* byval %{{.*}})
+  ret i32 0
 }
diff --git a/test/Transforms/ArgumentPromotion/byval.ll b/test/Transforms/ArgumentPromotion/byval.ll
index 44b26fc..27305e9 100644
--- a/test/Transforms/ArgumentPromotion/byval.ll
+++ b/test/Transforms/ArgumentPromotion/byval.ll
@@ -1,25 +1,28 @@
-; RUN: opt < %s -argpromotion -scalarrepl -S | not grep load
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-; Argpromote + scalarrepl should change this to passing the two integers by value.
 
-	%struct.ss = type { i32, i64 }
+%struct.ss = type { i32, i64 }
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
+; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1)
 entry:
-	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
-	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
-	store i32 %tmp2, i32* %tmp, align 4
-	ret void
+  %tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+  %tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+  %tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
 }
 
 define i32 @main() nounwind  {
+; CHECK-LABEL: define i32 @main
 entry:
-	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
-	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp1, align 8
-	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
-	store i64 2, i64* %tmp4, align 4
-	call void @f( %struct.ss* byval  %S ) nounwind 
-	ret i32 0
+  %S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+  %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval  %S ) nounwind 
+; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}})
+  ret i32 0
 }
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
new file mode 100644
index 0000000..70503af
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+; CHECK: call void @test(), !dbg [[DBG_LOC:![0-9]]]
+; CHECK: [[TEST_FN:.*]] = {{.*}} void ()* @test
+; CHECK: [[DBG_LOC]] = metadata !{i32 8, i32 0, metadata [[TEST_FN]], null}
+
+define internal void @test(i32* %X) {
+  ret void
+}
+
+define void @caller() {
+  call void @test(i32* null), !dbg !1
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!1 = metadata !{i32 8, i32 0, metadata !2, null}
+!2 = metadata !{i32 786478, null, null, metadata !"test", metadata !"test", metadata !"", i32 3, null, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @test, null, null, null, i32 3}
+!3 = metadata !{i32 786449, null, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, null, null, metadata !4, null, null, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
+!4 = metadata !{metadata !2}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
index ac9fc1f..6a93016 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
@@ -80,8 +80,8 @@ define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
-; CHECK: [[NEWVAL_TMP:%.*]] = xor i16 %nandend, -1
-; CHECK: [[NEWVAL:%.*]] = and i16 [[OLDVAL]], [[NEWVAL_TMP]]
+; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
+; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
@@ -229,22 +229,28 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence seq_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[OLDVAL]]
 
-  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
 }
 
@@ -257,22 +263,28 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence seq_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[OLDVAL]]
 
-  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
 }
 
@@ -284,21 +296,27 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence acquire
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence acquire
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[OLDVAL]]
 
-  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
 }
 
@@ -317,7 +335,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -325,16 +343,22 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[OLDVAL]]
 
-  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
 }
 \ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
index bec5bef..8092c10 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
@@ -91,22 +91,28 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[OLDVAL]]
 
-  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
 }
 
@@ -119,22 +125,28 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[OLDVAL]]
 
-  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
 }
 
@@ -146,21 +158,27 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[OLDVAL]]
 
-  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
 }
 
@@ -179,7 +197,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -187,16 +205,22 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[OLDVAL]]
 
-  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
 }
 \ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
new file mode 100644
index 0000000..07a4a7f
--- /dev/null
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
@@ -0,0 +1,97 @@
+; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+
+define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_seq_cst
+; CHECK:     fence release
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
+
+define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_weak_fail
+; CHECK:     fence release
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i1 [[SUCCESS]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 1
+  ret i1 %oldval
+}
+
+define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_monotonic
+; CHECK-NOT: fence
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
+++ b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
index c420349..7184443 100644
--- a/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/X86/large-immediate.ll b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
index e0af9c9..b8c04f3 100644
--- a/test/Transforms/ConstantHoisting/X86/large-immediate.ll
+++ b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
@@ -25,3 +25,12 @@ define i196 @test3(i196 %a) nounwind {
   %2 = mul i196 %1, 2
   ret i196 %2
 }
+
+; Check that we don't hoist immediates with small values.
+define i96 @test4(i96 %a) nounwind {
+; CHECK-LABEL: test4
+; CHECK-NOT: %const = bitcast i96 2 to i96
+  %1 = mul i96 %a, 2
+  %2 = add i96 %1, 2
+  ret i96 %2
+}
diff --git a/test/Transforms/ConstantHoisting/X86/lit.local.cfg b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/ConstantHoisting/X86/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll
index cef022e..9df4417 100644
--- a/test/Transforms/ConstantHoisting/X86/stackmap.ll
+++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll
@@ -6,11 +6,11 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Test if the 3rd argument of a stackmap is hoisted.
 define i128 @test1(i128 %a) {
 ; CHECK-LABEL:  @test1
-; CHECK:        %const = bitcast i128 13464618275673403322 to i128
+; CHECK:        %const = bitcast i128 134646182756734033220 to i128
 ; CHECK:        tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
 entry:
-  %0 = add i128 %a, 13464618275673403322
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 13464618275673403322)
+  %0 = add i128 %a, 134646182756734033220
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220)
   ret i128 %0
 }
 
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index d2460c0..d3842c8 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -68,7 +68,7 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
 	ret i1* %lookup
 }
 
-; CHECK: define i1 @c7(i32* readnone %q, i32 %bitno)
+; CHECK: define i1 @c7(i32* readonly %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
 	%val = load i1* %ptr
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 7ae38bb..b4e904c 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -51,3 +51,17 @@ define void @test6_2(i8** %p, i8* %q) {
 define void @test7_1(i32* inalloca %a) {
   ret void
 }
+
+; CHECK: define i32* @test8_1(i32* readnone %p)
+define i32* @test8_1(i32* %p) {
+entry:
+  ret i32* %p
+}
+
+; CHECK: define void @test8_2(i32* %p)
+define void @test8_2(i32* %p) {
+entry:
+  %call = call i32* @test8_1(i32* %p)
+  store i32 10, i32* %call, align 4
+  ret void
+}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
new file mode 100644
index 0000000..722a096
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -0,0 +1,58 @@
+; RUN: echo '!16 = metadata !{metadata !"%T/global-ctor.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
+; RUN: rm %T/global-ctor.gcno
+
+; REQUIRES: shell
+
+@x = global i32 0, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
+
+; Function Attrs: nounwind
+define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  %call = call i32 @_Z1fv(), !dbg !13
+  store i32 %call, i32* @x, align 4, !dbg !13
+  ret void, !dbg !13
+}
+
+declare i32 @_Z1fv() #1
+
+; Function Attrs: nounwind
+define internal void @_GLOBAL__sub_I_global-ctor.ll() #0 section ".text.startup" {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  call void @__cxx_global_var_init(), !dbg !14
+  ret void, !dbg !14
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.gcov = !{!16}
+!llvm.ident = !{!12}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 210217)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"/home/nlewycky"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
+!5 = metadata !{metadata !"global-ctor.ll", metadata !"/home/nlewycky"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"", metadata !"", metadata !"_GLOBAL__sub_I_global-ctor.ll", i32 0, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, metadata !2, i32 0} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!9 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!11 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{metadata !"clang version 3.5.0 (trunk 210217)"}
+!13 = metadata !{i32 2, i32 0, metadata !4, null}
+!14 = metadata !{i32 0, i32 0, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !5, metadata !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
new file mode 100644
index 0000000..e2f8324
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -0,0 +1,143 @@
+; RUN: sed -e 's@PATTERN@\%T@g' < %s > %t1
+; RUN: opt -insert-gcov-profiling -disable-output < %t1
+; RUN: rm %T/linezero.gcno %t1
+; REQUIRES: shell
+
+; This is a crash test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.vector = type { i8 }
+
+; Function Attrs: nounwind
+define i32 @_Z4testv() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %__range = alloca %struct.vector*, align 8
+  %ref.tmp = alloca %struct.vector, align 1
+  %undef.agg.tmp = alloca %struct.vector, align 1
+  %__begin = alloca i8*, align 8
+  %__end = alloca i8*, align 8
+  %spec = alloca i8, align 1
+  call void @llvm.dbg.declare(metadata !{%struct.vector** %__range}, metadata !27), !dbg !30
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  call void @_Z13TagFieldSpecsv(), !dbg !31
+  store %struct.vector* %ref.tmp, %struct.vector** %__range, align 8, !dbg !31
+  call void @llvm.dbg.declare(metadata !{i8** %__begin}, metadata !32), !dbg !30
+  %1 = load %struct.vector** %__range, align 8, !dbg !31
+  %call = call i8* @_ZN6vector5beginEv(%struct.vector* %1), !dbg !31
+  store i8* %call, i8** %__begin, align 8, !dbg !31
+  call void @llvm.dbg.declare(metadata !{i8** %__end}, metadata !33), !dbg !30
+  %2 = load %struct.vector** %__range, align 8, !dbg !31
+  %call1 = call i8* @_ZN6vector3endEv(%struct.vector* %2), !dbg !31
+  store i8* %call1, i8** %__end, align 8, !dbg !31
+  br label %for.cond, !dbg !31
+
+for.cond:                                         ; preds = %for.inc, %0
+  %3 = load i8** %__begin, align 8, !dbg !34
+  %4 = load i8** %__end, align 8, !dbg !34
+  %cmp = icmp ne i8* %3, %4, !dbg !34
+  br i1 %cmp, label %for.body, label %for.end, !dbg !34
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata !{i8* %spec}, metadata !37), !dbg !31
+  %5 = load i8** %__begin, align 8, !dbg !38
+  %6 = load i8* %5, align 1, !dbg !38
+  store i8 %6, i8* %spec, align 1, !dbg !38
+  br label %for.inc, !dbg !38
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i8** %__begin, align 8, !dbg !40
+  %incdec.ptr = getelementptr inbounds i8* %7, i32 1, !dbg !40
+  store i8* %incdec.ptr, i8** %__begin, align 8, !dbg !40
+  br label %for.cond, !dbg !40
+
+for.end:                                          ; preds = %for.cond
+  call void @llvm.trap(), !dbg !42
+  unreachable, !dbg !42
+
+return:                                           ; No predecessors!
+  %8 = load i32* %retval, !dbg !44
+  ret i32 %8, !dbg !44
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @_Z13TagFieldSpecsv() #2
+
+declare i8* @_ZN6vector5beginEv(%struct.vector*) #2
+
+declare i8* @_ZN6vector3endEv(%struct.vector*) #2
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap() #3
+
+; Function Attrs: nounwind
+define void @_Z2f1v() #0 {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  ret void, !dbg !45
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { noreturn nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.gcov = !{!25}
+!llvm.ident = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 209871)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"PATTERN"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"vector", i32 21, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !"linezero.cc", metadata !"PATTERN"}
+!6 = metadata !{metadata !7, metadata !13}
+!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"begin", metadata !"begin", metadata !"_ZN6vector5beginEv", i32 25, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 25} ; [ DW_TAG_subprogram ] [line 25] [begin]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{metadata !10, metadata !12}
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
+!13 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"end", metadata !"end", metadata !"_ZN6vector3endEv", i32 26, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 26} ; [ DW_TAG_subprogram ] [line 26] [end]
+!14 = metadata !{metadata !15, metadata !20}
+!15 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 50, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4testv, null, null, metadata !2, i32 50} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
+!16 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 54, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z2f1v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
+!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{null}
+!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!24 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!25 = metadata !{metadata !"PATTERN/linezero.o", metadata !0}
+!26 = metadata !{metadata !"clang version 3.5.0 (trunk 209871)"}
+!27 = metadata !{i32 786688, metadata !28, metadata !"__range", null, i32 0, metadata !29, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__range] [line 0]
+!28 = metadata !{i32 786443, metadata !5, metadata !15, i32 51, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!29 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
+!30 = metadata !{i32 0, i32 0, metadata !28, null}
+!31 = metadata !{i32 51, i32 0, metadata !28, null}
+!32 = metadata !{i32 786688, metadata !28, metadata !"__begin", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
+!33 = metadata !{i32 786688, metadata !28, metadata !"__end", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__end] [line 0]
+!34 = metadata !{i32 51, i32 0, metadata !35, null}
+!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 51, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!36 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!37 = metadata !{i32 786688, metadata !28, metadata !"spec", metadata !16, i32 51, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [spec] [line 51]
+!38 = metadata !{i32 51, i32 0, metadata !39, null}
+!39 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!40 = metadata !{i32 51, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!42 = metadata !{i32 51, i32 0, metadata !43, null}
+!43 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!44 = metadata !{i32 52, i32 0, metadata !15, null}
+!45 = metadata !{i32 54, i32 0, metadata !20, null}
diff --git a/test/Transforms/GVN/calloc-load-removal.ll b/test/Transforms/GVN/calloc-load-removal.ll
new file mode 100644
index 0000000..2dde5b7
--- /dev/null
+++ b/test/Transforms/GVN/calloc-load-removal.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS
+; Check that loads from calloc are recognized as being zero.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1() {
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  ; This load is trivially constant zero
+  %3 = load i32* %2, align 4
+  ret i32 %3
+
+; CHECK-LABEL: @test1(
+; CHECK-NOT: %3 = load i32* %2, align 4
+; CHECK: ret i32 0
+
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: ret i32 %
+
+}
+
+declare noalias i8* @calloc(i64, i64)
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
new file mode 100644
index 0000000..80e2226
--- /dev/null
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -0,0 +1,31 @@
+; Test if the !invariant.load metadata is maintained by GVN.
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+define i32 @test1(i32* nocapture %p, i8* nocapture %q) {
+; CHECK-LABEL: test1
+; CHECK: %x = load i32* %p, align 4, !invariant.load !0
+; CHECK-NOT: %y = load
+entry:
+  %x = load i32* %p, align 4, !invariant.load !0
+  %conv = trunc i32 %x to i8
+  store i8 %conv, i8* %q, align 1
+  %y = load i32* %p, align 4, !invariant.load !0
+  %add = add i32 %y, 1
+  ret i32 %add
+}
+
+define i32 @test2(i32* nocapture %p, i8* nocapture %q) {
+; CHECK-LABEL: test2
+; CHECK-NOT: !invariant.load
+; CHECK-NOT: %y = load
+entry:
+  %x = load i32* %p, align 4
+  %conv = trunc i32 %x to i8
+  store i8 %conv, i8* %q, align 1
+  %y = load i32* %p, align 4, !invariant.load !0
+  %add = add i32 %y, 1
+  ret i32 %add
+}
+
+!0 = metadata !{ }
+
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 4b96799..0bdced5 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -11,8 +11,8 @@
 @L1 = alias i32* @A
 ; CHECK: @L1 = alias i32* @A
 
-@L2 = alias internal i32* @A
-; DEAD-NOT: @L2
+@L2 = alias internal i32* @L1
+; CHECK: @L2 = alias internal i32* @L1
 
-@L3 = alias i32* @A
-; CHECK: @L3 = alias i32* @A
+@L3 = alias i32* @L2
+; CHECK: @L3 = alias i32* @L2
diff --git a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
deleted file mode 100644
index 9a66a00..0000000
--- a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/GlobalMerge/ARM/lit.local.cfg b/test/Transforms/GlobalMerge/ARM/lit.local.cfg
deleted file mode 100644
index 8a3ba96..0000000
--- a/test/Transforms/GlobalMerge/ARM/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index 03d6ee4..d6a565a 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
 
 @g = global i32 0
 
-@a = alias i8, i32* @g
+@a = alias bitcast (i32* @g to i8*)
 
 define void @f() {
 	%tmp = load i8* @a
diff --git a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
index 62f75e1..930a96e 100644
--- a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
+++ b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
@@ -1,11 +1,23 @@
-; RUN: opt < %s -globalopt -S | grep internal | count 2
+; RUN: opt < %s -globalopt -S | FileCheck %s
 
 global i32 0
-define i32* @1() {
+; CHECK-DAG: @0 = internal global i32 0
+
+private global i32 0
+; CHECK-DAG: @1 = private global i32 0
+
+define i32* @2() {
 	ret i32* @0
 }
+; CHECK-DAG: define internal fastcc i32* @2()
+
 define i32* @f() {
 entry:
-	call i32* @1()
+	call i32* @2()
 	ret i32* %0
 }
+
+define i32* @g() {
+entry:
+	ret i32* @1
+}
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index bd07b31..9d70c70 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
-@foo1 = alias void ()* @bar2
+@foo1 = alias void ()* @foo2
 ; CHECK: @foo1 = alias void ()* @bar2
 
-@foo2 = alias void()* @bar2
+@foo2 = alias void()* @bar1
 ; CHECK: @foo2 = alias void ()* @bar2
 
 @bar1  = alias void ()* @bar2
@@ -12,6 +12,10 @@
 @weak1 = alias weak void ()* @bar2
 ; CHECK: @weak1 = alias weak void ()* @bar2
 
+@bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
+@foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+
 define void @bar2() {
   ret void
 }
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index ce6e2c4..4a25d66 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -50,7 +50,41 @@ entry:
   ret void
 }
 
+; PR19955
+
+@dllimportptr = global i32* null, align 4
+; CHECK: @dllimportptr = global i32* null, align 4
+@dllimportvar = external dllimport global i32
+define internal void @test3() {
+entry:
+  store i32* @dllimportvar, i32** @dllimportptr, align 4
+  ret void
+}
+
+@dllexportptr = global i32* null, align 4
+; CHECK: @dllexportptr = global i32* @dllexportvar, align 4
+@dllexportvar = dllexport global i32 0, align 4
+; CHECK: @dllexportvar = dllexport global i32 20, align 4
+define internal void @test4() {
+entry:
+  store i32 20, i32* @dllexportvar, align 4
+  store i32* @dllexportvar, i32** @dllexportptr, align 4
+  ret void
+}
+
+@threadlocalptr = global i32* null, align 4
+; CHECK: @threadlocalptr = global i32* null, align 4
+@threadlocalvar = external thread_local global i32
+define internal void @test5() {
+entry:
+  store i32* @threadlocalvar, i32** @threadlocalptr, align 4
+  ret void
+}
+
 @llvm.global_ctors = appending constant
-  [2 x { i32, void ()* }]
+  [5 x { i32, void ()* }]
   [{ i32, void ()* } { i32 65535, void ()* @test1 },
-   { i32, void ()* } { i32 65535, void ()* @test2 }]
+   { i32, void ()* } { i32 65535, void ()* @test2 },
+   { i32, void ()* } { i32 65535, void ()* @test3 },
+   { i32, void ()* } { i32 65535, void ()* @test4 },
+   { i32, void ()* } { i32 65535, void ()* @test5 }]
diff --git a/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
new file mode 100644
index 0000000..2c738de
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -loop-unswitch -instcombine -indvars < %s | FileCheck %s
+
+; This used to crash in SCEVExpander when there were congruent phis with and
+; undef incoming value from the loop header. The -loop-unswitch -instcombine is
+; necessary to create just this pattern, which is essentially a nop and gets
+; folded away aggressively if spelled out in IR directly.
+; PR 20093
+
+@c = external global i32**, align 8
+
+define void @test1() {
+entry:
+  br i1 undef, label %for.end12, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %0 = load i32*** @c, align 8
+  %1 = load i32** %0, align 8
+  %2 = load i32* %1, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond.backedge, %for.body9.us, %for.cond.preheader
+  %3 = phi i32* [ %1, %for.cond.preheader ], [ %3, %for.cond.backedge ], [ %6, %for.body9.us ]
+  %4 = phi i32 [ %2, %for.cond.preheader ], [ undef, %for.cond.backedge ], [ %7, %for.body9.us ]
+  %i.024 = phi i32 [ 0, %for.cond.preheader ], [ %inc, %for.cond.backedge ], [ 0, %for.body9.us ]
+  %tobool1 = icmp eq i32 %4, 0
+  br i1 %tobool1, label %if.end, label %for.cond.backedge
+
+if.end:                                           ; preds = %for.body
+  %5 = load i32* %3, align 4
+  %tobool4 = icmp eq i32 %5, 0
+  br i1 %tobool4, label %for.cond3, label %for.body9.preheader
+
+for.body9.preheader:                              ; preds = %if.end
+  %tobool8 = icmp eq i32 %i.024, 1
+  br i1 %tobool8, label %for.body9.us, label %for.body9
+
+for.body9.us:                                     ; preds = %for.body9.preheader
+  %6 = load i32** undef, align 8
+  %7 = load i32* %6, align 4
+  br label %for.body
+
+for.cond3:                                        ; preds = %for.cond3, %if.end
+  br label %for.cond3
+
+for.body9:                                        ; preds = %for.body9, %for.body9.preheader
+  br label %for.body9
+
+for.cond.backedge:                                ; preds = %for.body
+  %inc = add nsw i32 %i.024, 1
+  br i1 false, label %for.body, label %for.end12
+
+for.end12:                                        ; preds = %for.cond.backedge, %entry
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: phi
+}
diff --git a/test/Transforms/Inline/blockaddress.ll b/test/Transforms/Inline/blockaddress.ll
index 4206312..8eb3072 100644
--- a/test/Transforms/Inline/blockaddress.ll
+++ b/test/Transforms/Inline/blockaddress.ll
@@ -1,8 +1,9 @@
 ; RUN: opt -inline -S < %s | FileCheck %s
 ; PR10162
 
-; Make sure the blockaddress is mapped correctly when doit is inlined
-; CHECK: store i8* blockaddress(@f, %here.i), i8** @ptr1, align 8
+; Make sure doit is not inlined since the blockaddress is taken
+; which could be unsafe
+; CHECK: store i8* blockaddress(@doit, %here), i8** %pptr, align 8
 
 @i = global i32 1, align 4
 @ptr1 = common global i8* null, align 8
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
new file mode 100644
index 0000000..41d6074
--- /dev/null
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -always-inline -S | FileCheck %s
+
+; Test that the debug location is preserved when rewriting an inlined call as an invoke
+
+; CHECK: invoke void @test()
+; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
+; CHECK: [[EMPTY:.*]] = metadata !{}
+; CHECK: [[INL_LOC]] = metadata !{i32 1, i32 0, metadata [[EMPTY]], metadata [[INL_AT:.*]]}
+; CHECK: [[INL_AT]] = metadata !{i32 2, i32 0, metadata [[EMPTY]], null}
+
+declare void @test()
+declare i32 @__gxx_personality_v0(...)
+
+attributes #0 = { alwaysinline }
+define void @inl() #0 {
+  call void @test(), !dbg !3
+  ret void
+}
+
+define void @caller() {
+  invoke void @inl()
+    to label %cont unwind label %lpad, !dbg !4
+
+cont:
+  ret void
+
+lpad:
+  landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+    cleanup
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!2 = metadata !{}
+!3 = metadata !{i32 1, i32 0, metadata !2, null}
+!4 = metadata !{i32 2, i32 0, metadata !2, null}
diff --git a/test/Transforms/Inline/null-function.ll b/test/Transforms/Inline/null-function.ll
new file mode 100644
index 0000000..2aecfa8
--- /dev/null
+++ b/test/Transforms/Inline/null-function.ll
@@ -0,0 +1,9 @@
+; RUN: opt -print-before=always-inline -always-inline < %s -o /dev/null 2>&1 | FileCheck %s
+
+define i32 @main() #0 {
+entry:
+  ret i32 0
+}
+
+; CHECK: *** IR Dump Before Inliner for always_inline functions ***
+; CHECK: Printing <null> Function
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index b1384ec..e0def99 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -22,11 +22,11 @@ define i1 @PR6486() nounwind {
 define i1 @PR16462_1() nounwind {
 ; CHECK-LABEL: @PR16462_1(
   ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
-; CHECK: ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
+; CHECK: ret i1 false
 }
 
 define i1 @PR16462_2() nounwind {
 ; CHECK-LABEL: @PR16462_2(
   ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 42)
-; CHECK: ret i1 icmp sgt (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16), i16 42)
+; CHECK: ret i1 false
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 4d185bf..ac9c555 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ; CHECK: addrspacecast
 
-@base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
+@base = internal unnamed_addr addrspace(3) global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
new file mode 100644
index 0000000..8f3d429
--- /dev/null
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @oppositesign
+; CHECK: add nsw i16 %a, %b
+define i16 @oppositesign(i16 %x, i16 %y) {
+; %a is negative, %b is positive
+  %a = or i16 %x, 32768
+  %b = and i16 %y, 32767
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+define i16 @zero_sign_bit(i16 %a) {
+; CHECK-LABEL: @zero_sign_bit(
+; CHECK-NEXT: and
+; CHECK-NEXT: add nuw
+; CHECK-NEXT: ret
+  %1 = and i16 %a, 32767
+  %2 = add i16 %1, 512
+  ret i16 %2
+}
+
+define i16 @zero_sign_bit2(i16 %a, i16 %b) {
+; CHECK-LABEL: @zero_sign_bit2(
+; CHECK-NEXT: and
+; CHECK-NEXT: and
+; CHECK-NEXT: add nuw
+; CHECK-NEXT: ret
+  %1 = and i16 %a, 32767
+  %2 = and i16 %b, 32767
+  %3 = add i16 %1, %2
+  ret i16 %3
+}
+
+declare i16 @bounded(i16 %input);
+declare i32 @__gxx_personality_v0(...);
+!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
+!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+
+define i16 @add_bounded_values(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !0
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0
+cont:
+; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow.
+  %e = add i16 %c, %d
+; CHECK: add nuw i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+define i16 @add_bounded_values_2(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values_2(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !1
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1
+cont:
+; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore,
+; %c + %d may unsigned overflow and we cannot add NUW.
+  %e = add i16 %c, %d
+; CHECK: add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+; CHECK-LABEL: @ripple_nsw1
+; CHECK: add nsw i16 %a, %b
+define i16 @ripple_nsw1(i16 %x, i16 %y) {
+; %a has at most one bit set
+  %a = and i16 %y, 1
+
+; %b has a 0 bit other than the sign bit
+  %b = and i16 %x, 49151
+
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+; CHECK-LABEL: @ripple_nsw2
+; CHECK: add nsw i16 %b, %a
+define i16 @ripple_nsw2(i16 %x, i16 %y) {
+  %a = and i16 %y, 1
+  %b = and i16 %x, 49151
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+; CHECK-LABEL: @ripple_no_nsw1
+; CHECK: add i32 %a, %x
+define i32 @ripple_no_nsw1(i32 %x, i32 %y) {
+; We know nothing about %x
+  %a = and i32 %y, 1
+  %b = add i32 %a, %x
+  ret i32 %b
+}
+
+; CHECK-LABEL: @ripple_no_nsw2
+; CHECK: add nuw i16 %a, %b
+define i16 @ripple_no_nsw2(i16 %x, i16 %y) {
+; %a has at most one bit set
+  %a = and i16 %y, 1
+
+; %b has a 0 bit, but it is the sign bit
+  %b = and i16 %x, 32767
+
+  %c = add i16 %a, %b
+  ret i16 %c
+}
diff --git a/test/Transforms/InstCombine/abs_abs.ll b/test/Transforms/InstCombine/abs_abs.ll
new file mode 100644
index 0000000..de10fd1
--- /dev/null
+++ b/test/Transforms/InstCombine/abs_abs.ll
@@ -0,0 +1,961 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @abs_abs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll
index 3edb392..67a990f 100644
--- a/test/Transforms/InstCombine/add-shrink.ll
+++ b/test/Transforms/InstCombine/add-shrink.ll
@@ -1,9 +1,11 @@
-; RUN: opt < %s -instcombine -S | grep "add nsw i32"
-; RUN: opt < %s -instcombine -S | grep sext | count 1
-
-; Should only have one sext and the add should be i32 instead of i64.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; CHECK-LABEL: define i64 @test
 define i64 @test1(i32 %A) {
+; CHECK: %[[ADD:.*]] = add nsw i32 %B, %C
+; CHECK: %F = sext i32 %[[ADD]] to i64
+; CHECK: ret i64 %F
+
 	%B = ashr i32 %A, 7		; <i32> [#uses=1]
 	%C = ashr i32 %A, 9		; <i32> [#uses=1]
 	%D = sext i32 %B to i64		; <i64> [#uses=1]
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 40edf71..3b5485e 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "add nsw i32"
+; RUN: opt < %s -instcombine -S | grep "add nuw nsw i32"
 
 define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 67d560e..d7eac4b 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -76,3 +76,240 @@ define <2 x i64> @test8(<2 x i64> %A) {
 ; CHECK-NEXT: %add = sub <2 x i64> <i64 1, i64 2>, %A
 ; CHECK-NEXT: ret <2 x i64> %add
 }
+
+define i16 @test9(i16 %a) {
+       %b = mul i16 %a, 2
+       %c = mul i16 %a, 32767
+       %d = add i16 %b, %c
+       ret i16 %d
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  %d = mul i16 %a, -32767
+; CHECK-NEXT:  ret i16 %d
+}
+
+; y + (~((x >> 3) & 0x55555555) + 1) -> y - ((x >> 3) & 0x55555555)
+define i32 @test10(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x & 0x55555555) + 1) -> y - (x & 0x55555555)
+define i32 @test11(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x & 0x55555555) -> y - (x & 0x55555555)
+define i32 @test12(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x & 0x55555556) + 1) -> y - (x & 0x55555556)
+define i32 @test13(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x & 0x55555556) -> y - (x & 0x55555556)
+define i32 @test14(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x | 0x55555556) + 1) -> y - (x | 0x55555556)
+define i32 @test15(i32 %x, i32 %y) {
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x | 0x55555556) -> y - (x | 0x555555556)
+define i32 @test16(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x | 0x55555555) + 1) -> y - (x | 0x55555555)
+define i32 @test17(i32 %x, i32 %y) {
+  %x.not = and i32 %x, -1431655766
+  %add2 = xor i32 %x.not, -1431655765
+  %add1 = add nsw i32 %add2, %y
+  ret i32 %add1
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x | 0x55555555) -> y - (x | 0x55555555)
+define i32 @test18(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655766
+  %neg = xor i32 %x.not, -1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+ %add1 = add nsw i16 %x, %x
+ %add2 = add nsw i16 %add1, %x
+ ret i16 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 3
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %x, %mul1
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %mul1, %x
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+ %mul1 = mul i16 %a, 2
+ %mul2 = mul i16 %a, 3
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT: %add = mul i16 %a, 5
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+ %mul1 = mul nsw i16 %a, 2
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+ %mul1 = mul nsw i16 %a, 3
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT: %add = mul nsw i16 %a, 10
+; CHECK-NEXT: ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT: %mul1 = mul nsw i32 %x, %y
+; CHECK-NEXT: %add = mul nsw i32 %mul1, 6
+; CHECK-NEXT: ret i32 %add
+}
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+; CHECK: or i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = metadata !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+; CHECK: or i16 %cttz, -16
+  ret i16 %b
+}
+!1 = metadata !{i16 0, i16 32}
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
index d908b55..c168436 100644
--- a/test/Transforms/InstCombine/addrspacecast.ll
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -28,13 +28,91 @@ define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*
 
 define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
 ; CHECK-LABEL: @combine_redundant_addrspacecast_types(
-; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float*
 ; CHECK-NEXT: ret
   %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
   %z = addrspacecast i32 addrspace(3)* %y to float*
   ret float* %z
 }
 
+define <4 x float*> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x float*>
+  ret <4 x float*> %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_1(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+  %z = bitcast i32 addrspace(2)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_2(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  %z = bitcast float addrspace(2)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_1(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_2(
+; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  ret float addrspace(2)* %y
+}
+
+define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float addrspace(2)*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(2)*>
+  ret <4 x float addrspace(2)*> %y
+}
+
+define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
+; CHECK-LABEL: @canonicalize_addrspacecast(
+; CHECK-NEXT: getelementptr inbounds [16 x i32] addrspace(1)* %arr, i32 0, i32 0
+; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
+; CHECK-NEXT: load i32*
+; CHECK-NEXT: ret i32
+  %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
+  %v = load i32* %p
+  ret i32 %v
+}
+
 @const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
                                                 i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
                                                 i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index 5bca46d..f6a8776 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -31,7 +31,7 @@ bb1:
   store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
 
   %indvar.next = add i64 %j, 2
-  %exitcond = icmp eq i64 %indvar.next, 557
+  %exitcond = icmp eq i64 %indvar.next, 556
   br i1 %exitcond, label %bb11, label %bb1
 
 bb11:
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index 284960b..a6b56f9 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
 ; Cases that should be bitcast
 
 ; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias float (float), i32 (i32)* @func_i32
+@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
 
 ; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
+@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
 
 ; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), i64 (i64)* @func_i64
+@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias  i64 (i64), <2 x float> (<2 x float>)* @func_v2f32
+@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
 
 ; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), <2 x i32*> (<2 x i32*>)* @func_v2i32p
+@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
 
 
 ; Cases that should be invalid and unchanged
 
 ; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias float (float), i64 (i64)* @func_i64
+@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
 
 ; Test cast between vectors with different bit sizes but the
 ; same number of elements
-@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i64> (<2 x i64>)* @func_v2i64
+@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast between vectors with same number of bits and different
 ; numbers of elements
-@alias_v2i32_to_v4f32 = alias  <4 x float> (<4 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
+@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
 
 ; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias i64 (i64), <4 x float> (<4 x float>)* @func_v4f32
+@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
 
 ; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), i64 (i64)* @func_i64
+@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
 
 ; Test cast from scalar to vector of pointers with same number of bits
 ; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias  i64 (i64), <2 x i32*> (<2 x i32*>)* @func_v2i32p
+@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
 
 ; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), i64 (i64)* @func_i64
+@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
 
 
 
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 4fab92f..0cbfbb0 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -370,7 +370,7 @@ define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
 	ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
 ; CHECK-LABEL: @test43(
 ; CHECK-NEXT: %A = zext i8 %on_off to i64
-; CHECK-NEXT: %B = add i64 %A, -1
+; CHECK-NEXT: %B = add nsw i64 %A, -1
 ; CHECK-NEXT: ret i64 %B
 }
 
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 9f21d54..7fac78a 100644
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -230,3 +230,13 @@ define i32 @constant_through_array_as_ptrs() {
   %b = load i32 addrspace(1)* %a, align 4
   ret i32 %b
 }
+
+@shared_mem = external addrspace(3) global [0 x i8]
+
+define float @canonicalize_addrspacecast(i32 %i) {
+; CHECK-LABEL: @canonicalize_addrspacecast
+; CHECK-NEXT: getelementptr inbounds float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 %i
+  %p = getelementptr inbounds float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i
+  %v = load float* %p
+  ret float %v
+}
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll
new file mode 100644
index 0000000..7990fdb
--- /dev/null
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define internal i8* @descale_zero() {
+entry:
+; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
+; CHECK-NEXT: bitcast i16*
+; CHECK-NEXT: ret i8*
+  %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
+  %num = load i64* inttoptr (i64 64 to i64*), align 64
+  %num_times_2 = shl i64 %num, 1
+  %num_times_2_plus_4 = add i64 %num_times_2, 4
+  %i8_ptr = bitcast i16* %i16_ptr to i8*
+  %i8_ptr_num_times_2_plus_4 = getelementptr i8* %i8_ptr, i64 %num_times_2_plus_4
+  %num_times_neg2 = mul i64 %num, -2
+  %num_times_neg2_minus_4 = add i64 %num_times_neg2, -4
+  %addr = getelementptr i8* %i8_ptr_num_times_2_plus_4, i64 %num_times_neg2_minus_4
+  ret i8* %addr
+}
diff --git a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll b/test/Transforms/InstCombine/distribute.ll
index 9ea0a5e..e6360f8 100644
--- a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
+++ b/test/Transforms/InstCombine/distribute.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instsimplify -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @factorize(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize(
@@ -28,27 +28,32 @@ define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
   %r = or i32 %x, %b
   %z = and i32 %l, %r
   ret i32 %z
-; CHECK: ret i32 %r
+; CHECK: %z = or i32 %b, %x
+; CHECK: ret i32 %z
 }
 
 define i32 @factorize4(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize4(
+; ((Y << 1) * X) - (X * Y) -> (X * (Y * 2 - Y)) -> (X * Y)
   %sh = shl i32 %y, 1
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
   %s = sub i32 %ml, %mr
   ret i32 %s
-; CHECK: ret i32 %mr
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
 }
 
 define i32 @factorize5(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize5(
+; ((Y * 2) * X) - (X * Y) -> (X * Y)
   %sh = mul i32 %y, 2
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
   %s = sub i32 %ml, %mr
   ret i32 %s
-; CHECK: ret i32 %mr
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
 }
 
 define i32 @expand(i32 %x) {
@@ -58,5 +63,6 @@ define i32 @expand(i32 %x) {
   %b = or i32 %a, 2
   %c = and i32 %b, 1
   ret i32 %c
+; CHECK: %a = and i32 %x, 1
 ; CHECK: ret i32 %a
 }
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 1dec11d..c8763dc 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -103,7 +103,7 @@ define i32 @test_simplify13(i32 %x) {
 ; CHECK-LABEL: @test_simplify13(
   %ret = call i32 @ffs(i32 %x)
 ; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
   ret i32 %ret
@@ -114,7 +114,7 @@ define i32 @test_simplify14(i32 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify14(
   %ret = call i32 @ffsl(i32 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
   ret i32 %ret
@@ -125,7 +125,7 @@ define i32 @test_simplify15(i64 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify15(
   %ret = call i32 @ffsll(i64 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
new file mode 100644
index 0000000..9aab609
--- /dev/null
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -0,0 +1,56 @@
+; RUN: opt -instcombine -S  < %s | FileCheck %s
+
+%struct1 = type { %struct2*, i32, i32, i32 }
+%struct2 = type { i32, i32 }
+
+define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2** %tmp, align 8
+  br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+  %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  br label %bb3
+
+bb2:
+  %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  br label %bb3
+
+bb3:
+  %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ]
+  %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1
+  %tmp25 = load i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test1(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ]
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1
+
+}
+
+define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2** %tmp, align 8
+  %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1
+  %tmp25 = load i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test2(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index ef0cb29..3240c6d 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -732,7 +732,8 @@ define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %
 define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {
 ; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
 ; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
-; CHECK-NEXT: %t = addrspacecast double*
+; CHECK-NEXT: bitcast double*
+; CHECK-NEXT: %t = addrspacecast i64*
 ; CHECK: load i64 addrspace(3)* %t
   %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
   %V = mul i64 %N, 8
@@ -802,10 +803,22 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
-define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind {
+define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
 ; CHECK: ret
+  %gep = getelementptr i32* %p, i32 0
+  %x = addrspacecast i32* %gep to i32 addrspace(1)*
+  ret i32 addrspace(1)* %x
+}
+
+; Do not merge the GEP and the addrspacecast, because it would undo the
+; addrspacecast canonicalization.
+define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind {
+; CHECK-LABEL: @ascast_0_0_gep(
+; CHECK-NEXT: getelementptr [128 x i32]
+; CHECK-NEXT: addrspacecast i32*
+; CHECK-NEXT: ret i32 addrspace(1)*
   %gep = getelementptr [128 x i32]* %p, i32 0, i32 0
   %x = addrspacecast i32* %gep to i32 addrspace(1)*
   ret i32 addrspace(1)* %x
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index f45897c..26e144f 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,7 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout =
-"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
@@ -166,6 +165,14 @@ define i1 @test17(i32 %x) nounwind {
 ; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
 }
 
+define i1 @test17a(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK-LABEL: @test17a(
+; CHECK-NEXT: %cmp = icmp ugt i32 %x, 2
+}
 
 define i1 @test18(i32 %x) nounwind {
   %sh = lshr i32 8, %x
@@ -194,6 +201,15 @@ define i1 @test20(i32 %x) nounwind {
 ; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
 }
 
+define i1 @test20a(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+; CHECK-LABEL: @test20a(
+; CHECK-NEXT: %cmp = icmp ult i32 %x, 3
+}
+
 define i1 @test21(i8 %x, i8 %y) {
 ; CHECK-LABEL: @test21(
 ; CHECK-NOT: or i8
@@ -657,6 +673,49 @@ define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+; Same as test60, but look through an addrspacecast instead of a
+; bitcast. This uses the same sized addrspace.
+define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+  %bit = addrspacecast i8* %foo to i32 addrspace(3)*
+  %gep1 = getelementptr inbounds i32 addrspace(3)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+  %bit = addrspacecast i8* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: trunc i64 %j to i16
+; CHECK-NEXT: icmp sgt i16 %1, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+  %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
+  %gep1 = getelementptr inbounds i32 addrspace(2)* %bit, i32 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT:  %gep1.idx = shl nuw i32 %i, 2
+; CHECK-NEXT:  trunc i32 %gep1.idx to i16
+; CHECK-NEXT:  icmp slt i16 %1, %j
+; CHECK-NEXT:  ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 91c4470..9b58d93 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -3,6 +3,7 @@
 %overflow.result = type {i8, i1}
 
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
@@ -89,6 +90,18 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
 ; CHECK-NEXT: ret i8 %z
 }
 
+; PR20194
+define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
+  %A = sext i8 %a to i32
+  %B = sext i8 %b to i32
+  %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
+  ret { i32, i1 } %x
+; CHECK-LABEL: @saddtest1
+; CHECK: %x = add nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret { i32, i1 } %1
+}
+
 
 define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index b5a0ab8..3bc1d36 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -78,7 +78,8 @@ define void @test2_addrspacecast() {
 ; %A alloca is deleted
 ; This doesn't exactly match what test2 does, because folding the type
 ; cast into the alloca doesn't work for the addrspacecast yet.
-; CHECK-NEXT: alloca %T
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr
 ; CHECK-NEXT: addrspacecast
 
 ; use @G instead of %A
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
index 04019ae..cbb2f5f 100644
--- a/test/Transforms/InstCombine/overflow-mul.ll
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -162,3 +162,14 @@ entry:
   ret i32 %retval
 }
 
+define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: @pr20113
+; CHECK-NOT: mul.with.overflow
+; CHECK: ret
+  %vmovl.i.i726 = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i.i712 = zext <4 x i16> %b to <4 x i32>
+  %mul.i703 = mul <4 x i32> %vmovl.i.i712, %vmovl.i.i726
+  %tmp = icmp sge <4 x i32> %mul.i703, zeroinitializer
+  %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
+  ret <4 x i32> %vcgez.i
+}
diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll
new file mode 100644
index 0000000..0ef3159
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20059.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+; CHECK-LABEL: @do_not_reorder
+; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
+define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) {
+  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %retval = srem <4 x i32> %splat1, %splat2
+  ret <4 x i32> %retval
+}
diff --git a/test/Transforms/InstCombine/pr20079.ll b/test/Transforms/InstCombine/pr20079.ll
new file mode 100644
index 0000000..ce9c4de
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20079.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+@b = internal global [1 x i32] zeroinitializer, align 4
+@c = internal global i32 0, align 4
+
+; CHECK-LABEL: @fn1
+; CHECK-NEXT: ret i32 0
+define i32 @fn1(i32 %a) {
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
new file mode 100644
index 0000000..1db6b0d
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 9f07702..0595a67 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -127,7 +127,7 @@ define i64 @test14(i64 %x, i32 %y) {
 ; CHECK-LABEL: @test14(
 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %y
 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT]], -1
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[ZEXT]], -1
 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[ADD]], %x
 ; CHECK-NEXT: ret i64 [[AND]]
 	%shl = shl i32 1, %y
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 2213be1..d625f3b 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -281,7 +281,7 @@ define i32 @test15i(i32 %X) {
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
 ; CHECK-NEXT: %2 = xor i32 %1, 512
-; CHECK-NEXT: %3 = add i32 %2, 577
+; CHECK-NEXT: %3 = add nuw nsw i32 %2, 577
 ; CHECK-NEXT: ret i32 %3
 }
 
@@ -294,7 +294,7 @@ define i32 @test15j(i32 %X) {
 ; CHECK-LABEL: @test15j(
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
-; CHECK-NEXT: %2 = add i32 %1, 577
+; CHECK-NEXT: %2 = add nuw nsw i32 %1, 577
 ; CHECK-NEXT: ret i32 %2
 }
 
@@ -521,7 +521,7 @@ define i32 @test35(i32 %x) {
 ; CHECK-LABEL: @test35(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, 40
-; CHECK: add i32 {{.*}}, 60
+; CHECK: add nuw nsw i32 {{.*}}, 60
 ; CHECK: ret
 }
 
@@ -532,7 +532,7 @@ define i32 @test36(i32 %x) {
 ; CHECK-LABEL: @test36(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, -40
-; CHECK: add i32 {{.*}}, 100
+; CHECK: add nsw i32 {{.*}}, 100
 ; CHECK: ret
 }
 
@@ -996,6 +996,111 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
   ret <2 x i32> %select
 }
 
+; CHECK-LABEL: @select_icmp_and_8_eq_0_or_8(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_8_eq_0_or_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 8
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
+; CHECK-LABEL: @select_icmp_and_8_ne_0_xor_8(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_8_ne_0_xor_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_xor_8(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_8_eq_0_xor_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+; CHECK-LABEL: @select_icmp_and_8_ne_0_and_not_8(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_8_ne_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %x.and1 = select i1 %cmp, i32 %x, i32 %and1
+  ret i32 %x.and1
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_and_not_8(
+; CHECK-NEXT: ret i32 %x
+define i32 @select_icmp_and_8_eq_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %and1.x = select i1 %cmp, i32 %and1, i32 %x
+  ret i32 %and1.x
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8(
+; CHECK: select i1 %cmp, i64 %y, i64 %xor
+define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %y.xor = select i1 %cmp, i64 %y, i64 %xor
+  ret i64 %y.xor
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %y, i64 %and1
+define i64 @select_icmp_x_and_8_eq_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %y.and1 = select i1 %cmp, i64 %y, i64 %and1
+  ret i64 %y.and1
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8(
+; CHECK: select i1 %cmp, i64 %xor, i64 %y
+define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %xor.y = select i1 %cmp, i64 %xor, i64 %y
+  ret i64 %xor.y
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %and1, i64 %y
+define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %and1.y = select i1 %cmp, i64 %and1, i64 %y
+  ret i64 %and1.y
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8(
+; CHECK: xor i64 %1, 8
+; CHECK: or i64 %2, %y
+define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i64 %y, 8
+  %or.y = select i1 %cmp, i64 %or, i64 %y
+  ret i64 %or.y
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
@@ -1130,4 +1235,4 @@ define i32 @test75(i32 %x) {
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 68
 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
 ; CHECK-NEXT: ret i32 [[SEL]]
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index b8dfe22..f04afcc 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -145,7 +145,7 @@ define i32 @test13(i32 %x) nounwind {
 ; CHECK-LABEL: @test13(
 ; CHECK-NEXT: %and = lshr i32 %x, 3
 ; CHECK-NEXT: %1 = and i32 %and, 1
-; CHECK-NEXT: %sext = add i32 %1, -1
+; CHECK-NEXT: %sext = add nsw i32 %1, -1
 ; CHECK-NEXT: ret i32 %sext
 }
 
@@ -157,7 +157,7 @@ define i32 @test14(i16 %x) nounwind {
 ; CHECK-LABEL: @test14(
 ; CHECK-NEXT: %and = lshr i16 %x, 4
 ; CHECK-NEXT: %1 = and i16 %and, 1
-; CHECK-NEXT: %sext = add i16 %1, -1
+; CHECK-NEXT: %sext = add nsw i16 %1, -1
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 41d803c8..67b7c49 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -444,3 +444,23 @@ define <2 x i64> @test36(<2 x i64> %A) {
 ; CHECK-NEXT: %sub = mul <2 x i64> %A, <i64 7, i64 15>
 ; CHECK-NEXT: ret <2 x i64> %sub
 }
+
+define <2 x i32> @test37(<2 x i32> %A) {
+  %div = sdiv <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+  %sub = sub nsw <2 x i32> zeroinitializer, %div
+  ret <2 x i32> %sub
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[ICMP]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[SEXT]]
+}
+
+define i32 @test38(i32 %A) {
+  %div = sdiv i32 %A, -2147483648
+  %sub = sub nsw i32 0, %div
+  ret i32 %sub
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 %A, -2147483648
+; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
+; CHECK-NEXT: ret i32 [[SEXT]]
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index fc0f8bd..eb4e9d6 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -405,3 +405,12 @@ define i32 @pr19737(<4 x i32> %in0) {
   %rv = extractelement <4 x i32> %and.i, i32 0
   ret i32 %rv
 }
+
+define <4 x i32> @pr20114(<4 x i32> %__mask) {
+; CHECK-LABEL: @pr20114
+; CHECK: shufflevector
+; CHECK: and
+  %mask01.i = shufflevector <4 x i32> %__mask, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i
+  ret <4 x i32> %masked_new.i.i.i
+}
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index d7f338b..6fa4d70 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -6,7 +6,7 @@ entry:
 ; CHECK-LABEL: @a(
 ; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add nsw i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
new file mode 100644
index 0000000..5d314db
--- /dev/null
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instsimplify -S | not grep or
+
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i39 @test1(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL @test1
+; CHECK-NEXT: and {{.*}}, -274877906944
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. 
+define i399 @test2(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL @test2
+; CHECK-NEXT: and {{.*}}, 18446742974197923840
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 105e244..7d0cd9c 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -883,3 +883,33 @@ define i1 @returns_nonnull() {
 ; CHECK: ret i1 false
 }
 
+; If a bit is known to be zero for A and known to be one for B,
+; then A and B cannot be equal.
+define i1 @icmp_eq_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_eq_const
+; CHECK-NEXT: ret i1 false 
+}
+
+define i1 @icmp_ne_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp ne i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_ne_const
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_sdiv_int_min(i32 %a) {
+  %div = sdiv i32 -2147483648, %a
+  %cmp = icmp ne i32 %div, -1073741824
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_sdiv_int_min
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 -2147483648, %a
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[DIV]], -1073741824
+; CHECK-NEXT: ret i1 [[CMP]]
+}
diff --git a/test/Transforms/JumpThreading/pr15851_hang.ll b/test/Transforms/JumpThreading/pr15851_hang.ll
new file mode 100644
index 0000000..0484bc9
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr15851_hang.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry
+; CHECK: ret void
+; CHECK-NOT: for.cond1
+; CHECK-NOT: for.body
+
+define void @f() {
+entry:
+  ret void
+
+for.cond1:
+  %i.025 = phi i32 [ %inc, %for.body ], [ %inc, %for.body ], [ 1, %for.cond1 ]
+  %cmp = icmp slt i32 %i.025, 2
+  br i1 %cmp, label %for.body, label %for.cond1
+
+for.body:
+  %inc = add nsw i32 %i.025, 0
+  %a = icmp ugt i32 %inc, 2
+  br i1 %a, label %for.cond1, label %for.cond1
+}
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 201e604..545e86c 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -127,7 +127,7 @@ L4:
 ; CHECK: test_switch_default
 ; CHECK: entry:
 ; CHECK: load
-; CHECK: switch
+; CHECK: icmp
 ; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
 ; CHECK: store
 ; CHECK: br
diff --git a/test/Transforms/LICM/extra-copies.ll b/test/Transforms/LICM/extra-copies.ll
new file mode 100644
index 0000000..ef52f9f
--- /dev/null
+++ b/test/Transforms/LICM/extra-copies.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+; PR19835
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @f(i32 %x) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %storemerge4 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %mul = mul nsw i32 %x, %x
+  %add2 = add nsw i32 %mul, %x
+  %mul3 = add nsw i32 %add2, %mul
+  %inc = add nsw i32 %storemerge4, 1
+  %cmp = icmp slt i32 %inc, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %a9.0.lcssa = phi i32 [ %mul3, %for.body ]
+  ret i32 %a9.0.lcssa
+}
+
+; Test that there is exactly one copy of mul nsw i32 %x, %x in the exit block.
+; CHECK: define i32 @f(i32 [[X:%.*]])
+; CHECK: for.end:
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]
+; CHECK: mul nsw i32 [[X]], [[X]]
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
new file mode 100644
index 0000000..639dca5
--- /dev/null
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -0,0 +1,201 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure the basic alloca pointer hoisting works:
+; CHECK-LABEL: @test1
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %c = alloca i32
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type:
+; CHECK-LABEL: @test2
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i64
+  %c = bitcast i64* %ca to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type (where the bitcast also needs to be hoisted):
+; CHECK-LABEL: @test3
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i64
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %c = bitcast i64* %ca to i32*
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting does not happen through a bitcast
+; to a pointer to a larger type:
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+; Function Attrs: nounwind uwtable
+define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i16
+  %c = bitcast i16* %ca to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Don't crash on bitcasts to unsized types.
+; CHECK-LABEL: @test5
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+%atype = type opaque
+
+; Function Attrs: nounwind uwtable
+define void @test5(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i16
+  %cab = bitcast i16* %ca to %atype*
+  %c = bitcast %atype* %cab to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/LoadCombine/load-combine.ll b/test/Transforms/LoadCombine/load-combine.ll
new file mode 100644
index 0000000..c4d9241
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine.ll
@@ -0,0 +1,190 @@
+; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Combine read from char* idiom.
+define i64 @LoadU64_x64_0(i64* %pData) {
+  %1 = bitcast i64* %pData to i8*
+  %2 = load i8* %1, align 1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw i64 %3, 56
+  %5 = getelementptr inbounds i8* %1, i64 1
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i64
+  %8 = shl nuw nsw i64 %7, 48
+  %9 = or i64 %8, %4
+  %10 = getelementptr inbounds i8* %1, i64 2
+  %11 = load i8* %10, align 1
+  %12 = zext i8 %11 to i64
+  %13 = shl nuw nsw i64 %12, 40
+  %14 = or i64 %9, %13
+  %15 = getelementptr inbounds i8* %1, i64 3
+  %16 = load i8* %15, align 1
+  %17 = zext i8 %16 to i64
+  %18 = shl nuw nsw i64 %17, 32
+  %19 = or i64 %14, %18
+  %20 = getelementptr inbounds i8* %1, i64 4
+  %21 = load i8* %20, align 1
+  %22 = zext i8 %21 to i64
+  %23 = shl nuw nsw i64 %22, 24
+  %24 = or i64 %19, %23
+  %25 = getelementptr inbounds i8* %1, i64 5
+  %26 = load i8* %25, align 1
+  %27 = zext i8 %26 to i64
+  %28 = shl nuw nsw i64 %27, 16
+  %29 = or i64 %24, %28
+  %30 = getelementptr inbounds i8* %1, i64 6
+  %31 = load i8* %30, align 1
+  %32 = zext i8 %31 to i64
+  %33 = shl nuw nsw i64 %32, 8
+  %34 = or i64 %29, %33
+  %35 = getelementptr inbounds i8* %1, i64 7
+  %36 = load i8* %35, align 1
+  %37 = zext i8 %36 to i64
+  %38 = or i64 %34, %37
+  ret i64 %38
+; CHECK-LABEL: @LoadU64_x64_0(
+; CHECK: load i64* %{{.*}}, align 1
+; CHECK-NOT: load
+}
+
+; Combine simple adjacent loads.
+define i32 @"2xi16_i32"(i16* %x) {
+  %1 = load i16* %x, align 2
+  %2 = getelementptr inbounds i16* %x, i64 1
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32"(
+; CHECK: load i32* %{{.*}}, align 2
+; CHECK-NOT: load
+}
+
+; Don't combine loads across stores.
+define i32 @"2xi16_i32_store"(i16* %x, i16* %y) {
+  %1 = load i16* %x, align 2
+  store i16 0, i16* %y, align 2
+  %2 = getelementptr inbounds i16* %x, i64 1
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_store"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: store
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Don't combine loads with a gap.
+define i32 @"2xi16_i32_gap"(i16* %x) {
+  %1 = load i16* %x, align 2
+  %2 = getelementptr inbounds i16* %x, i64 2
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_gap"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Combine out of order loads.
+define i32 @"2xi16_i32_order"(i16* %x) {
+  %1 = getelementptr inbounds i16* %x, i64 1
+  %2 = load i16* %1, align 2
+  %3 = zext i16 %2 to i32
+  %4 = load i16* %x, align 2
+  %5 = shl nuw i32 %3, 16
+  %6 = zext i16 %4 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_order"(
+; CHECK: load i32* %{{.*}}, align 2
+; CHECK-NOT: load
+}
+
+; Overlapping loads.
+define i32 @"2xi16_i32_overlap"(i8* %x) {
+  %1 = bitcast i8* %x to i16*
+  %2 = load i16* %1, align 2
+  %3 = getelementptr inbounds i8* %x, i64 1
+  %4 = bitcast i8* %3 to i16*
+  %5 = load i16* %4, align 2
+  %6 = zext i16 %5 to i32
+  %7 = shl nuw i32 %6, 16
+  %8 = zext i16 %2 to i32
+  %9 = or i32 %7, %8
+  ret i32 %9
+; CHECK-LABEL: @"2xi16_i32_overlap"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Combine valid alignments.
+define i64 @"2xi16_i64_align"(i8* %x) {
+  %1 = bitcast i8* %x to i32*
+  %2 = load i32* %1, align 4
+  %3 = getelementptr inbounds i8* %x, i64 4
+  %4 = bitcast i8* %3 to i16*
+  %5 = load i16* %4, align 2
+  %6 = getelementptr inbounds i8* %x, i64 6
+  %7 = bitcast i8* %6 to i16*
+  %8 = load i16* %7, align 2
+  %9 = zext i16 %8 to i64
+  %10 = shl nuw i64 %9, 48
+  %11 = zext i16 %5 to i64
+  %12 = shl nuw nsw i64 %11, 32
+  %13 = zext i32 %2 to i64
+  %14 = or i64 %12, %13
+  %15 = or i64 %14, %10
+  ret i64 %15
+; CHECK-LABEL: @"2xi16_i64_align"(
+; CHECK: load i64* %{{.*}}, align 4
+}
+
+; Non power of two.
+define i64 @"2xi16_i64_npo2"(i8* %x) {
+  %1 = load i8* %x, align 1
+  %2 = zext i8 %1 to i64
+  %3 = getelementptr inbounds i8* %x, i64 1
+  %4 = load i8* %3, align 1
+  %5 = zext i8 %4 to i64
+  %6 = shl nuw nsw i64 %5, 8
+  %7 = or i64 %6, %2
+  %8 = getelementptr inbounds i8* %x, i64 2
+  %9 = load i8* %8, align 1
+  %10 = zext i8 %9 to i64
+  %11 = shl nuw nsw i64 %10, 16
+  %12 = or i64 %11, %7
+  %13 = getelementptr inbounds i8* %x, i64 3
+  %14 = load i8* %13, align 1
+  %15 = zext i8 %14 to i64
+  %16 = shl nuw nsw i64 %15, 24
+  %17 = or i64 %16, %12
+  %18 = getelementptr inbounds i8* %x, i64 4
+  %19 = load i8* %18, align 1
+  %20 = zext i8 %19 to i64
+  %21 = shl nuw nsw i64 %20, 32
+  %22 = or i64 %21, %17
+  %23 = getelementptr inbounds i8* %x, i64 5
+  %24 = load i8* %23, align 1
+  %25 = zext i8 %24 to i64
+  %26 = shl nuw nsw i64 %25, 40
+  %27 = or i64 %26, %22
+  %28 = getelementptr inbounds i8* %x, i64 6
+  %29 = load i8* %28, align 1
+  %30 = zext i8 %29 to i64
+  %31 = shl nuw nsw i64 %30, 48
+  %32 = or i64 %31, %27
+  ret i64 %32
+; CHECK-LABEL: @"2xi16_i64_npo2"(
+; CHECK: load i32* %{{.*}}, align 1
+}
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopIdiom/X86/lit.local.cfg
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
index 6642d28..675f48e 100644
--- a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
@@ -1,5 +1,4 @@
 config.suffixes = ['.ll']
 
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 756ea82..1d56dde 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s
 ;
 ; LSR should only check for valid address modes when the IV user is a
 ; memory address.
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
+++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/X86/lit.local.cfg b/test/Transforms/LoopUnroll/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopUnroll/X86/lit.local.cfg
+++ b/test/Transforms/LoopUnroll/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll
new file mode 100644
index 0000000..62f2610
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr18861.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-unroll -indvars -disable-output
+
+@b = external global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define void @fn1() #0 {
+entry:
+  br label %for.cond1thread-pre-split
+
+for.cond1thread-pre-split:                        ; preds = %for.inc8, %entry
+  %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph
+
+for.cond2.preheader.lr.ph:                        ; preds = %for.cond1thread-pre-split
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.inc5, %for.cond2.preheader.lr.ph
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.body3, %for.cond2.preheader
+  %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ]
+  %cmp = icmp slt i32 %storemerge, 1
+  br i1 %cmp, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond2
+  %tobool4 = icmp eq i32 %storemerge, 0
+  %add = add nsw i32 %storemerge, 1
+  br i1 %tobool4, label %for.cond2, label %if.then
+
+if.then:                                          ; preds = %for.body3
+  store i32 %storemerge1, i32* @b, align 4
+  ret void
+
+for.inc5:                                         ; preds = %for.cond2
+  br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader
+
+for.cond1.for.inc8_crit_edge:                     ; preds = %for.inc5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split
+  %inc9 = add nsw i32 %storemerge1, 1
+  br label %for.cond1thread-pre-split
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index d8bbea9..a14087d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -2,6 +2,12 @@
 
 ; Tests for unrolling loops with run-time trip counts
 
+; CHECK: %xtraiter = and i32 %n
+; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK: br i1 %lcmp.or, label %unr.cmp
+
 ; CHECK: unr.cmp{{.*}}:
 ; CHECK: for.body.unr{{.*}}:
 ; CHECK: for.body:
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
new file mode 100644
index 0000000..5e45a2d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -0,0 +1,285 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; loop4 contains a small loop which should be completely unrolled by
+; the default unrolling heuristics.  It serves as a control for the
+; unroll(disable) pragma test loop4_with_disable.
+;
+; CHECK-LABEL: @loop4(
+; CHECK-NOT: br i1
+define void @loop4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(disable)
+;
+; CHECK-LABEL: @loop4_with_disable(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop4_with_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = metadata !{metadata !1, metadata !2}
+!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+
+; loop64 has a high enough count that it should *not* be unrolled by
+; the default unrolling heuristic.  It serves as the control for the
+; unroll(enable) pragma test loop64_with_.* tests below.
+;
+; CHECK-LABEL: @loop64(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!3 = metadata !{metadata !3, metadata !4}
+!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+
+; #pragma clang loop unroll_count(4)
+; Loop should be unrolled 4 times.
+;
+; CHECK-LABEL: @loop64_with_count4(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64_with_count4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!5 = metadata !{metadata !5, metadata !6}
+!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+
+
+; #pragma clang loop unroll_count(enable) unroll_count(4)
+; Loop should be unrolled 4 times.
+;
+; CHECK-LABEL: @loop64_with_enable_and_count4(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64_with_enable_and_count4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!7 = metadata !{metadata !7, metadata !6, metadata !4}
+
+; #pragma clang loop unroll_count(enable)
+; Full unrolling is requested, but loop has a dynamic trip count so
+; no unrolling should occur.
+;
+; CHECK-LABEL: @dynamic_loop_with_enable(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!8 = metadata !{metadata !8, metadata !4}
+
+; #pragma clang loop unroll_count(4)
+; Loop has a dynamic trip count.  Unrolling should occur, but no
+; conditional branches can be removed.
+;
+; CHECK-LABEL: @dynamic_loop_with_count4(
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK-NOT: br i1
+define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!9 = metadata !{metadata !9, metadata !6}
+
+; #pragma clang loop unroll_count(1)
+; Loop should not be unrolled
+;
+; CHECK-LABEL: @unroll_1(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @unroll_1(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!10 = metadata !{metadata !10, metadata !11}
+!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
+
+; #pragma clang loop unroll(enable)
+; Loop has very high loop count (1 million) and full unrolling was requested.
+; Loop should unrolled up to the pragma threshold, but not completely.
+;
+; CHECK-LABEL: @unroll_1M(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1
+define void @unroll_1M(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!12 = metadata !{metadata !12, metadata !4}
diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
index f1d1f88..937cffb 100644
--- a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
@@ -1,6 +1,5 @@
 config.suffixes = ['.ll']
 
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index faed77d..fce3b70 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -40,7 +40,7 @@ for.end:                                          ; preds = %for.body
 
 ; Now, we check for the Hint metadata
 ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
-; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
 ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
 
diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll
new file mode 100644
index 0000000..a220866
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -0,0 +1,35 @@
+; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Verify that we generate 512-bit wide vectors for a basic integer memset
+; loop.
+
+; CHECK-LABEL: f:
+; CHECK: vmovdqu32 %zmm{{.}}, (
+; CHECK-NOT: %ymm
+
+define void @f(i32* %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %n, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopVectorize/X86/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 9e4e989..8e0ca41 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -9,8 +9,9 @@
 ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
 ; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
 
-; This file tests the llvm.vectorizer.pragma forcing vectorization even when
-; optimization levels are too low, or when vectorization is disabled.
+; This file tests the llvm.loop.vectorize.enable metadata forcing
+; vectorization even when optimization levels are too low, or when
+; vectorization is disabled.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -170,6 +171,6 @@ for.end:                                          ; preds = %for.body
 }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1}
+!1 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 1}
 !2 = metadata !{metadata !2, metadata !3}
-!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0}
+!3 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 0}
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 84ffb27..074313b 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -53,7 +53,7 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This method will not be vectorized, as scalar cost is lower than any of vector costs.
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 1b979e5..97c31a1 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -44,7 +44,7 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This loop will not be vectorized as the trip count is below the threshold.
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
new file mode 100644
index 0000000..6cdd29b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -0,0 +1,160 @@
+; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for tests
+; void test(int *A, int Length) {
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < Length; i++) {
+;     A[i] = i;
+;     if (A[i] > Length)
+;       break;
+;   }
+; }
+
+; void test_disabled(int *A, int Length) {
+; #pragma clang loop vectorize(disable) interleave(disable)
+;   for (int i = 0; i < Length; i++)
+;     A[i] = i;
+; }
+
+; void test_array_bounds(int *A, int *B, int Length) {
+; #pragma clang loop vectorize(enable)
+;   for (int i = 0; i < Length; i++)
+;     A[i] = A[B[i]];
+; }
+
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
+; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
+  br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16
+  %0 = trunc i64 %indvars.iv to i32, !dbg !16
+  store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
+  %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, %Length, !dbg !12
+  %or.cond = and i1 %cmp3, %cmp, !dbg !22
+  br i1 %or.cond, label %for.body, label %for.end, !dbg !22
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
+  br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30
+  %0 = trunc i64 %indvars.iv to i32, !dbg !30
+  store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !25, !llvm.loop !27
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !31
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !35
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0"}
+!12 = metadata !{i32 3, i32 8, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!14 = metadata !{metadata !14, metadata !15, metadata !15}
+!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!16 = metadata !{i32 4, i32 5, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
+!18 = metadata !{metadata !19, metadata !19, i64 0}
+!19 = metadata !{metadata !"int", metadata !20, i64 0}
+!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
+!21 = metadata !{metadata !"Simple C/C++ TBAA"}
+!22 = metadata !{i32 5, i32 9, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
+!24 = metadata !{i32 8, i32 1, metadata !4, null}
+!25 = metadata !{i32 12, i32 8, metadata !26, null}
+!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
+!27 = metadata !{metadata !27, metadata !28, metadata !29}
+!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+!30 = metadata !{i32 13, i32 5, metadata !26, null}
+!31 = metadata !{i32 14, i32 1, metadata !7, null}
+!32 = metadata !{i32 18, i32 8, metadata !33, null}
+!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
+!34 = metadata !{metadata !34, metadata !15}
+!35 = metadata !{i32 19, i32 5, metadata !33, null}
+!36 = metadata !{i32 20, i32 1, metadata !8, null}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 685d034..f683447 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -1,8 +1,17 @@
 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
-; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
-; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; This code has all the !dbg annotations needed to track source line information,
+; but is missing the llvm.dbg.cu annotation. This prevents code generation from
+; emitting debug info in the final output.
+; RUN: llc -mtriple x86_64-pc-linux-gnu %s -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
+; DEBUG-OUTPUT-NOT: .loc
+; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
+
+; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
+; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -37,11 +46,9 @@ for.end:                                          ; preds = %for.body
 
 declare void @ibar(i32*) #1
 
-!llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [./vectorization-remarks.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
index 4d17d46..bb48713 100644
--- a/test/Transforms/LoopVectorize/XCore/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
new file mode 100644
index 0000000..e4ba77f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for control flow test
+; int test(int *A, int Length) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] > 10.0) goto end;
+;     A[i] = 0;
+;   }
+; end:
+;   return 0;
+; }
+
+; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
+; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !12
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !12
+  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+  %cmp1 = icmp sgt i32 %0, 10, !dbg !12
+  br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
+
+if.else:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %1 = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %cmp = icmp slt i32 %1, %Length, !dbg !10
+  br i1 %cmp, label %for.body, label %end.loopexit, !dbg !10
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  br label %end
+
+end:                                              ; preds = %end.loopexit, %entry
+  ret i32 0, !dbg !20
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{i32 5, i32 9, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
+!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
+!15 = metadata !{metadata !16, metadata !16, i64 0}
+!16 = metadata !{metadata !"int", metadata !17, i64 0}
+!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
+!18 = metadata !{metadata !"Simple C/C++ TBAA"}
+!19 = metadata !{i32 8, i32 7, metadata !13, null}
+!20 = metadata !{i32 12, i32 3, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index dbe0243..6e3e8ed 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -156,7 +156,7 @@ for.body:
   br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
 
 cond.false:
-  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32))
+  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32))
   br label %cond.end
 
 cond.end:
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index ad2c663..7dabcb2 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -108,3 +108,64 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; <label>:5                                       ; preds = %1
   ret i32 %2
 }
+
+; This loop has a backedge taken count of i32_max. We need to check for this
+; condition and branch directly to the scalar loop.
+
+; CHECK-LABEL: max_i32_backedgetaken
+; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
+; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+
+; CHECK: scalar.ph:
+; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
+; CHECK:  %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]
+
+define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
+
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i32 %b.0, -1
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
+
+; When generating the overflow check we must sure that the induction start value
+; is defined before the branch to the scalar preheader.
+
+; CHECK-LABEL: testoverflowcheck
+; CHECK: entry
+; CHECK: %[[LOAD:.*]] = load i8
+; CHECK: %[[VAL:.*]] =  zext i8 %[[LOAD]] to i32
+; CHECK: br
+
+; CHECK: scalar.ph
+; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+
+@e = global i8 1, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+define i32 @testoverflowcheck() {
+entry:
+  %.pr.i = load i8* @e, align 1
+  %0 = load i32* @d, align 4
+  %c.promoted.i = load i32* @c, align 4
+  br label %cond.end.i
+
+cond.end.i:
+  %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
+  %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
+  %and.i = and i32 %0, %and3.i
+  %inc.i = add i8 %inc4.i, 1
+  %tobool.i = icmp eq i8 %inc.i, 0
+  br i1 %tobool.i, label %loopexit, label %cond.end.i
+
+loopexit:
+  ret i32 %and.i
+}
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index c3d570c..7dfaf03 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1090,3 +1090,105 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
+
+;CHECK-LABEL: @powi_f64(
+;CHECK: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;CHECK-LABEL: @powi_f64_neg(
+;CHECK-NOT: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %1 = trunc i64 %indvars.iv to i32
+  %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @cttz_f64(
+;CHECK: llvm.cttz.v4i64
+;CHECK: ret void
+define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+  %0 = load i64* %arrayidx, align 8
+  %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
+  %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+  store i64 %call, i64* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @ctlz_f64(
+;CHECK: llvm.ctlz.v4i64
+;CHECK: ret void
+define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+  %0 = load i64* %arrayidx, align 8
+  %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
+  %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+  store i64 %call, i64* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll
index 7f10372..2fcc53a 100644
--- a/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.unroll", i32 2}
+!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2}
diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll
index 1960c0b..87de655 100644
--- a/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/test/Transforms/LoopVectorize/metadata-width.ll
@@ -28,4 +28,4 @@ for.end:                                          ; preds = %for.body, %entry
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
+!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
new file mode 100644
index 0000000..52b4285
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  switch i32 %0, label %for.inc [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb3
+  ], !dbg !14
+
+sw.bb:                                            ; preds = %for.body
+  %1 = trunc i64 %indvars.iv to i32, !dbg !20
+  %mul = shl nsw i32 %1, 1, !dbg !20
+  br label %for.inc, !dbg !22
+
+sw.bb3:                                           ; preds = %for.body
+  %2 = trunc i64 %indvars.iv to i32, !dbg !23
+  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
+  %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{metadata !12, metadata !13, metadata !13}
+!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!14 = metadata !{i32 4, i32 5, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
+!16 = metadata !{metadata !17, metadata !17, i64 0}
+!17 = metadata !{metadata !"int", metadata !18, i64 0}
+!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
+!19 = metadata !{metadata !"Simple C/C++ TBAA"}
+!20 = metadata !{i32 6, i32 7, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
+!22 = metadata !{i32 7, i32 5, metadata !21, null}
+!23 = metadata !{i32 9, i32 7, metadata !21, null}
+!24 = metadata !{i32 14, i32 1, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index e7b1e2a..01e28bc 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -5,6 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK-LABEL: @add_ints(
 ;CHECK: br
+;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
 ;CHECK-DAG: icmp uge
diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
index dc3df7a..f646567 100644
--- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll
+++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ;
 ; Test #1
 ;
-; Ensure that "llvm.vectorizer.enable" metadata was not lost prior to LoopVectorize pass.
+; Ensure that "llvm.loop.vectorize.enable" metadata was not lost prior to LoopVectorize pass.
 ; In past LoopRotate was clearing that metadata.
 ;
 ; The source C code is:
@@ -62,12 +62,12 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; Test #2
 ;
-; Ensure that "llvm.vectorizer.enable" metadata was not lost even
+; Ensure that "llvm.loop.vectorize.enable" metadata was not lost even
 ; if loop was not rotated (see http://reviews.llvm.org/D3348#comment-4).
 ;
 define i32 @nonrotated(i32 %a) {
@@ -85,4 +85,4 @@ return:
 }
 
 !3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!4 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 7800469..47de13d 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -69,9 +69,9 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
 ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
-; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
 ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
index c319834..cb11241 100644
--- a/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -3,15 +3,20 @@
 define i8 @cmpswap() {
 ; CHECK-LABEL: @cmpswap(
   %i = alloca i8
-  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
-; CHECK: [[INST:%[a-z0-9]+]] = load
-; CHECK-NEXT: icmp
-; CHECK-NEXT: select
-; CHECK-NEXT: store
+  %pair = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
+  %j = extractvalue { i8, i1 } %pair, 0
+; CHECK: [[OLDVAL:%[a-z0-9]+]] = load i8* [[ADDR:%[a-z0-9]+]]
+; CHECK-NEXT: [[SAME:%[a-z0-9]+]] = icmp eq i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[TO_STORE:%[a-z0-9]+]] = select i1 [[SAME]], i8 42, i8 [[OLDVAL]]
+; CHECK-NEXT: store i8 [[TO_STORE]], i8* [[ADDR]]
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = insertvalue { i8, i1 } undef, i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[RES:%[a-z0-9]+]] = insertvalue { i8, i1 } [[TMP]], i1 [[SAME]], 1
+; CHECK-NEXT: [[VAL:%[a-z0-9]+]] = extractvalue { i8, i1 } [[RES]], 0
   ret i8 %j
-; CHECK: ret i8 [[INST]]
+; CHECK: ret i8 [[VAL]]
 }
 
+
 define i8 @swap() {
 ; CHECK-LABEL: @swap(
   %i = alloca i8
diff --git a/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll b/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
new file mode 100644
index 0000000..3673c04
--- /dev/null
+++ b/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+; CHECK-NOT: icmp eq i32 %0, 1
+
+define i32 @foo(i32 %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+
+sw.bb:
+  ret i32 12
+
+sw.bb1:
+  ret i32 4
+
+sw.bb2:
+  ret i32 2
+
+sw.default:
+  ret i32 9
+}
diff --git a/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll b/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
new file mode 100644
index 0000000..0f73721
--- /dev/null
+++ b/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+; CHECK-NOT: {{.*}}icmp eq{{.*}}
+;
+;int foo(int a) {
+;
+;  switch (a) {
+;  case 0:
+;    return 10;
+;  case 1:
+;    return 3;
+;  default:
+;    __builtin_unreachable();
+;  }
+;
+;}
+
+define i32 @foo(i32 %a) nounwind ssp uwtable {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %a, i32* %2, align 4
+  %3 = load i32* %2, align 4
+  switch i32 %3, label %6 [
+    i32 0, label %4
+    i32 1, label %5
+  ]
+
+; <label>:4 
+  store i32 10, i32* %1
+  br label %7
+
+; <label>:5
+  store i32 3, i32* %1
+  br label %7
+
+; <label>:6
+  unreachable
+
+; <label>:7
+  %8 = load i32* %1
+  ret i32 %8
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index e85f03e..09d25f0 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -3,93 +3,57 @@
 ; We have switch on input.
 ; On output we should got binary comparison tree. Check that all is fine.
 
-;CHECK:      entry:
-;CHECK-NEXT:   br label %NodeBlock37
+;CHECK:     entry:
+;CHECK-NEXT:  br label %NodeBlock19
 
-;CHECK:      NodeBlock37:                                      ; preds = %entry
-;CHECK-NEXT:   %Pivot38 = icmp slt i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
+;CHECK:     NodeBlock19:                                      ; preds = %entry
+;CHECK-NEXT:  %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT:  br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
 
-;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot36 = icmp slt i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
+;CHECK:     NodeBlock17:                                      ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT:  br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
 
-;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot34 = icmp slt i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
+;CHECK:     NodeBlock15:                                      ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT:  br i1 %Pivot16, label %bb330, label %NodeBlock13
 
-;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %Pivot32 = icmp slt i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
+;CHECK:     NodeBlock13:                                      ; preds = %NodeBlock15
+;CHECK-NEXT:  %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %Pivot14, label %bb332, label %LeafBlock11
 
-;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf30 = icmp eq i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb334, label %NewDefault
+;CHECK:     LeafBlock11:                                      ; preds = %NodeBlock13
+;CHECK-NEXT:  %SwitchLeaf12 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %SwitchLeaf12, label %bb334, label %NewDefault
 
-;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb332, label %NewDefault
+;CHECK:     NodeBlock9:                                       ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT:  br i1 %Pivot10, label %bb324, label %NodeBlock7
 
-;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb330, label %NewDefault
+;CHECK:     NodeBlock7:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:  %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT:  br i1 %Pivot8, label %bb326, label %bb328
 
-;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot24 = icmp slt i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
+;CHECK:     NodeBlock5:                                       ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT:  br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
 
-;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %Pivot22 = icmp slt i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
+;CHECK:     NodeBlock3:                                       ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT:  br i1 %Pivot4, label %bb, label %NodeBlock1
 
-;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb328, label %NewDefault
+;CHECK:     NodeBlock1:                                       ; preds = %NodeBlock3
+;CHECK-NEXT:  %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT:  br i1 %Pivot2, label %bb338, label %bb322
 
-;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb326, label %NewDefault
+;CHECK:     NodeBlock:                                        ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot = icmp slt i32 %tmp158, 0
+;CHECK-NEXT:  br i1 %Pivot, label %LeafBlock, label %bb338
 
-;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb324, label %NewDefault
-
-;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot14 = icmp slt i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
-
-;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot12 = icmp slt i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
-
-;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %Pivot10 = icmp slt i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
-
-;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb322, label %NewDefault
-
-;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb338, label %NewDefault
-
-;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb, label %NewDefault
-
-;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot = icmp slt i32 %tmp158, 0
-;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
-
-;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf2 = icmp ule i32 %tmp158, 6
-;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb338, label %NewDefault
-
-;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
-;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
-;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158.off, 4
-;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
+;CHECK:     LeafBlock:                                        ; preds = %NodeBlock
+;CHECK-NEXT:  %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:  %SwitchLeaf = icmp ule i32 %tmp158.off, 4
+;CHECK-NEXT:  br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
 entry:
diff --git a/test/Transforms/MergeFunc/functions.ll b/test/Transforms/MergeFunc/functions.ll
new file mode 100644
index 0000000..006fdf5
--- /dev/null
+++ b/test/Transforms/MergeFunc/functions.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Be sure we don't merge cross-referenced functions of same type.
+
+; CHECK-LABEL: @left
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @right(i64 %p)
+define void @left(i64 %p) {
+entry-block:
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: @right
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @left(i64 %p)
+define void @right(i64 %p) {
+entry-block:
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  ret void
+}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
new file mode 100644
index 0000000..e25ff1d
--- /dev/null
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -0,0 +1,43 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8* %0, !range !0
+  %v2 = load i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT  %v1 = load i8* %0
+; CHECK-NEXT  %v2 = load i8* %1
+; CHECK-NEXT  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT  ret i1 %out
+  %v1 = load i8* %0
+  %v2 = load i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8* %0, !range !1
+  %v2 = load i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8* %0, !range !0
+  %v2 = load i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+!0 = metadata !{i8 0, i8 2}
+!1 = metadata !{i8 5, i8 7}
diff --git a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
index 5780990..2430035 100644
--- a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
+++ b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -reassociate -instcombine -constprop -dce -S | not grep add
+; RUN: opt < %s -reassociate -S | FileCheck %s
 
-define i32 @test(i32 %A) {
-	%X = add i32 %A, 1		; <i32> [#uses=1]
-	%Y = add i32 %A, 1		; <i32> [#uses=1]
-	%r = sub i32 %X, %Y		; <i32> [#uses=1]
-	ret i32 %r
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: test1
+; CHECK: ret i32 0
+  %X = add i32 %A, 1
+  %Y = add i32 %A, 1
+  %r = sub i32 %X, %Y
+  ret i32 %r
 }
-
diff --git a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
index e8bccbd..5f3c920 100644
--- a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
+++ b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
@@ -1,9 +1,11 @@
-; RUN: opt < %s -reassociate -instcombine -constprop -die -S | not grep 5
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %A, i32 %B) {
-	%W = add i32 %B, -5		; <i32> [#uses=1]
-	%Y = add i32 %A, 5		; <i32> [#uses=1]
-	%Z = add i32 %W, %Y		; <i32> [#uses=1]
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: test1
+; CHECK: %Z = add i32 %B, %A
+; CHECK: ret i32 %Z
+	%W = add i32 %B, -5
+	%Y = add i32 %A, 5
+	%Z = add i32 %W, %Y
 	ret i32 %Z
 }
-
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
index c18af5e..29c178f 100644
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
+++ b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
@@ -1,12 +1,30 @@
+; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | FileCheck %s
+
 ; With sub reassociation, constant folding can eliminate all of the constants.
-;
-; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | not grep add
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %Z = sub i32 %A, %B
+; CHECK-NEXT: ret i32 %Z
 
-define i32 @test(i32 %A, i32 %B) {
-	%W = add i32 5, %B		; <i32> [#uses=1]
-	%X = add i32 -7, %A		; <i32> [#uses=1]
-	%Y = sub i32 %X, %W		; <i32> [#uses=1]
-	%Z = add i32 %Y, 12		; <i32> [#uses=1]
-	ret i32 %Z
+  %W = add i32 5, %B
+  %X = add i32 -7, %A
+  %Y = sub i32 %X, %W
+  %Z = add i32 %Y, 12
+  ret i32 %Z
 }
+ 
+; With sub reassociation, constant folding can eliminate the two 12 constants.
+define i32 @test2(i32 %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %sum = add i32 %B, %A
+; CHECK-NEXT: %sum1 = add i32 %sum, %C
+; CHECK-NEXT: %Q = sub i32 %D, %sum1
+; CHECK-NEXT: ret i32 %Q
 
+  %M = add i32 %A, 12
+  %N = add i32 %M, %B
+  %O = add i32 %N, %C
+  %P = sub i32 %D, %O
+  %Q = add i32 %P, 12
+  ret i32 %Q
+}
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
deleted file mode 100644
index 5848821..0000000
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; With sub reassociation, constant folding can eliminate the two 12 constants.
-;
-; RUN: opt < %s -reassociate -constprop -dce -S | not grep 12
-
-define i32 @test(i32 %A, i32 %B, i32 %C, i32 %D) {
-	%M = add i32 %A, 12		; <i32> [#uses=1]
-	%N = add i32 %M, %B		; <i32> [#uses=1]
-	%O = add i32 %N, %C		; <i32> [#uses=1]
-	%P = sub i32 %D, %O		; <i32> [#uses=1]
-	%Q = add i32 %P, 12		; <i32> [#uses=1]
-	ret i32 %Q
-}
-
diff --git a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
index f66148b..f6cef35 100644
--- a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
+++ b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
@@ -1,23 +1,24 @@
-; RUN: opt < %s -reassociate -instcombine -S |\
-; RUN:   grep "ret i32 0"
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
-	%tmp.2 = add i32 %a4, %a3		; <i32> [#uses=1]
-	%tmp.4 = add i32 %tmp.2, %a2		; <i32> [#uses=1]
-	%tmp.6 = add i32 %tmp.4, %a1		; <i32> [#uses=1]
-	%tmp.8 = add i32 %tmp.6, %a0		; <i32> [#uses=1]
-	%tmp.11 = add i32 %a3, %a2		; <i32> [#uses=1]
-	%tmp.13 = add i32 %tmp.11, %a1		; <i32> [#uses=1]
-	%tmp.15 = add i32 %tmp.13, %a0		; <i32> [#uses=1]
-	%tmp.18 = add i32 %a2, %a1		; <i32> [#uses=1]
-	%tmp.20 = add i32 %tmp.18, %a0		; <i32> [#uses=1]
-	%tmp.23 = add i32 %a1, %a0		; <i32> [#uses=1]
-	%tmp.26 = sub i32 %tmp.8, %tmp.15		; <i32> [#uses=1]
-	%tmp.28 = add i32 %tmp.26, %tmp.20		; <i32> [#uses=1]
-	%tmp.30 = sub i32 %tmp.28, %tmp.23		; <i32> [#uses=1]
-	%tmp.32 = sub i32 %tmp.30, %a4		; <i32> [#uses=1]
-	%tmp.34 = sub i32 %tmp.32, %a2		; <i32> [#uses=2]
-	%T = mul i32 %tmp.34, %tmp.34		; <i32> [#uses=1]
-	ret i32 %T
-}
+define i32 @f1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: f1
+; CHECK-NEXT: ret i32 0
 
+  %tmp.2 = add i32 %a4, %a3
+  %tmp.4 = add i32 %tmp.2, %a2
+  %tmp.6 = add i32 %tmp.4, %a1
+  %tmp.8 = add i32 %tmp.6, %a0
+  %tmp.11 = add i32 %a3, %a2
+  %tmp.13 = add i32 %tmp.11, %a1
+  %tmp.15 = add i32 %tmp.13, %a0
+  %tmp.18 = add i32 %a2, %a1
+  %tmp.20 = add i32 %tmp.18, %a0
+  %tmp.23 = add i32 %a1, %a0
+  %tmp.26 = sub i32 %tmp.8, %tmp.15
+  %tmp.28 = add i32 %tmp.26, %tmp.20
+  %tmp.30 = sub i32 %tmp.28, %tmp.23
+  %tmp.32 = sub i32 %tmp.30, %a4
+  %tmp.34 = sub i32 %tmp.32, %a2
+  %T = mul i32 %tmp.34, %tmp.34
+  ret i32 %T
+}
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index 384cbc9..f783955 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -reassociate -disable-output
+; RUN: opt < %s -reassociate -S | FileCheck %s
 
-define void @foo() {
-	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp164 = fmul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
-	ret void
-}
+define <4 x float> @test1() {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
+; CHECK-NEXT: %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+; CHECK-NEXT: ret <4 x float> %tmp2
 
+  %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
+  %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+  ret <4 x float> %tmp2
+}
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index fda0ca6..d70bfcb 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -1,46 +1,47 @@
-; With reassociation, constant folding can eliminate the 12 and -12 constants.
-;
-; RUN: opt < %s -reassociate  -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %arg) {
-	%tmp1 = sub i32 -12, %arg
-	%tmp2 = add i32 %tmp1, 12
-	ret i32 %tmp2
-; CHECK-LABEL: @test1(
+  %tmp1 = sub i32 -12, %arg
+  %tmp2 = add i32 %tmp1, 12
+  ret i32 %tmp2
+
+; CHECK-LABEL: @test1
 ; CHECK-NEXT: sub i32 0, %arg
 ; CHECK-NEXT: ret i32
 }
 
 define i32 @test2(i32 %reg109, i32 %reg1111) {
-	%reg115 = add i32 %reg109, -30		; <i32> [#uses=1]
-	%reg116 = add i32 %reg115, %reg1111		; <i32> [#uses=1]
-	%reg117 = add i32 %reg116, 30		; <i32> [#uses=1]
-	ret i32 %reg117
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: add i32 %reg1111, %reg109
-; CHECK-NEXT: ret i32
+  %reg115 = add i32 %reg109, -30
+  %reg116 = add i32 %reg115, %reg1111
+  %reg117 = add i32 %reg116, 30
+  ret i32 %reg117
+
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %reg117 = add i32 %reg1111, %reg109
+; CHECK-NEXT: ret i32 %reg117
 }
 
-@e = external global i32		; <i32*> [#uses=3]
-@a = external global i32		; <i32*> [#uses=3]
-@b = external global i32		; <i32*> [#uses=3]
-@c = external global i32		; <i32*> [#uses=3]
-@f = external global i32		; <i32*> [#uses=3]
+@e = external global i32
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@f = external global i32
 
 define void @test3() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %A, %B		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = (a+b)+c;
-        store i32 %t2, i32* @e
-        ; f = (a+c)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test3(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %A, %B
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = (a+b)+c;
+  store i32 %t2, i32* @e
+  ; f = (a+c)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test3
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -48,19 +49,20 @@ define void @test3() {
 }
 
 define void @test4() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %A, %B		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = c+(a+b)
-        store i32 %t2, i32* @e
-        ; f = (c+a)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test4(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %A, %B
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = c+(a+b)
+  store i32 %t2, i32* @e
+  ; f = (c+a)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test4
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -68,19 +70,20 @@ define void @test4() {
 }
 
 define void @test5() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %B, %A		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = c+(b+a)
-        store i32 %t2, i32* @e
-        ; f = (c+a)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test5(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %B, %A
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = c+(b+a)
+  store i32 %t2, i32* @e
+  ; f = (c+a)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test5
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -88,60 +91,61 @@ define void @test5() {
 }
 
 define i32 @test6() {
-	%tmp.0 = load i32* @a
-	%tmp.1 = load i32* @b
-        ; (a+b)
-	%tmp.2 = add i32 %tmp.0, %tmp.1
-	%tmp.4 = load i32* @c
-	; (a+b)+c
-        %tmp.5 = add i32 %tmp.2, %tmp.4
-	; (a+c)
-        %tmp.8 = add i32 %tmp.0, %tmp.4
-	; (a+c)+b
-        %tmp.11 = add i32 %tmp.8, %tmp.1
-	; X ^ X = 0
-        %RV = xor i32 %tmp.5, %tmp.11
-	ret i32 %RV
-; CHECK-LABEL: @test6(
+  %tmp.0 = load i32* @a
+  %tmp.1 = load i32* @b
+  ; (a+b)
+  %tmp.2 = add i32 %tmp.0, %tmp.1
+  %tmp.4 = load i32* @c
+  ; (a+b)+c
+  %tmp.5 = add i32 %tmp.2, %tmp.4
+  ; (a+c)
+  %tmp.8 = add i32 %tmp.0, %tmp.4
+  ; (a+c)+b
+  %tmp.11 = add i32 %tmp.8, %tmp.1
+  ; X ^ X = 0
+  %RV = xor i32 %tmp.5, %tmp.11
+  ret i32 %RV
+
+; CHECK-LABEL: @test6
 ; CHECK: ret i32 0
 }
 
 ; This should be one add and two multiplies.
 define i32 @test7(i32 %A, i32 %B, i32 %C) {
- ; A*A*B + A*C*A
-	%aa = mul i32 %A, %A
-	%aab = mul i32 %aa, %B
-	%ac = mul i32 %A, %C
-	%aac = mul i32 %ac, %A
-	%r = add i32 %aab, %aac
-	ret i32 %r
-; CHECK-LABEL: @test7(
+  ; A*A*B + A*C*A
+  %aa = mul i32 %A, %A
+  %aab = mul i32 %aa, %B
+  %ac = mul i32 %A, %C
+  %aac = mul i32 %ac, %A
+  %r = add i32 %aab, %aac
+  ret i32 %r
+
+; CHECK-LABEL: @test7
 ; CHECK-NEXT: add i32 %C, %B
 ; CHECK-NEXT: mul i32 
 ; CHECK-NEXT: mul i32 
 ; CHECK-NEXT: ret i32 
 }
 
-
 define i32 @test8(i32 %X, i32 %Y, i32 %Z) {
-	%A = sub i32 0, %X
-	%B = mul i32 %A, %Y
-        ; (-X)*Y + Z -> Z-X*Y
-	%C = add i32 %B, %Z
-	ret i32 %C
-; CHECK-LABEL: @test8(
+  %A = sub i32 0, %X
+  %B = mul i32 %A, %Y
+  ; (-X)*Y + Z -> Z-X*Y
+  %C = add i32 %B, %Z
+  ret i32 %C
+
+; CHECK-LABEL: @test8
 ; CHECK-NEXT: %A = mul i32 %Y, %X
 ; CHECK-NEXT: %C = sub i32 %Z, %A
 ; CHECK-NEXT: ret i32 %C
 }
 
-
 ; PR5458
 define i32 @test9(i32 %X) {
   %Y = mul i32 %X, 47
   %Z = add i32 %Y, %Y
   ret i32 %Z
-; CHECK-LABEL: @test9(
+; CHECK-LABEL: @test9
 ; CHECK-NEXT: mul i32 %X, 94
 ; CHECK-NEXT: ret i32
 }
@@ -150,7 +154,7 @@ define i32 @test10(i32 %X) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK-LABEL: @test10(
+; CHECK-LABEL: @test10
 ; CHECK-NEXT: mul i32 %X, 3
 ; CHECK-NEXT: ret i32
 }
@@ -160,7 +164,7 @@ define i32 @test11(i32 %W) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK-LABEL: @test11(
+; CHECK-LABEL: @test11
 ; CHECK-NEXT: mul i32 %W, 381
 ; CHECK-NEXT: ret i32
 }
@@ -169,11 +173,10 @@ define i32 @test12(i32 %X) {
   %A = sub i32 1, %X
   %B = sub i32 2, %X
   %C = sub i32 3, %X
-
   %Y = add i32 %A ,%B
   %Z = add i32 %Y, %C
   ret i32 %Z
-; CHECK-LABEL: @test12(
+; CHECK-LABEL: @test12
 ; CHECK-NEXT: mul i32 %X, -3
 ; CHECK-NEXT: add i32{{.*}}, 6
 ; CHECK-NEXT: ret i32
@@ -185,7 +188,7 @@ define i32 @test13(i32 %X1, i32 %X2, i32 %X3) {
   %C = mul i32 %X1, %X3  ; X1*X3
   %D = add i32 %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
   ret i32 %D
-; CHECK-LABEL: @test13(
+; CHECK-LABEL: @test13
 ; CHECK-NEXT: sub i32 %X3, %X2
 ; CHECK-NEXT: mul i32 {{.*}}, %X1
 ; CHECK-NEXT: ret i32
@@ -197,9 +200,10 @@ define i32 @test14(i32 %X1, i32 %X2) {
   %C = mul i32 %X2, -47  ; X2*-47
   %D = add i32 %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
   ret i32 %D
-; CHECK-LABEL: @test14(
+
+; CHECK-LABEL: @test14
 ; CHECK-NEXT: sub i32 %X1, %X2
-; CHECK-NEXT: mul i32 {{.*}}, 47
+; CHECK-NEXT: mul i32 %tmp, 47
 ; CHECK-NEXT: ret i32
 }
 
@@ -210,7 +214,6 @@ define i32 @test15(i32 %X1, i32 %X2, i32 %X3) {
   %C = and i1 %A, %B
   %D = select i1 %C, i32 %X1, i32 0
   ret i32 %D
-; CHECK-LABEL: @test15(
+; CHECK-LABEL: @test15
 ; CHECK: and i1 %A, %B
 }
-
diff --git a/test/Transforms/Reassociate/fp-commute.ll b/test/Transforms/Reassociate/fp-commute.ll
index 025689b..eac5b59 100644
--- a/test/Transforms/Reassociate/fp-commute.ll
+++ b/test/Transforms/Reassociate/fp-commute.ll
@@ -1,18 +1,19 @@
 ; RUN: opt -reassociate -S < %s | FileCheck %s
 
-target triple = "armv7-apple-ios"
-
 declare void @use(float)
 
-; CHECK: test
-define void @test(float %x, float %y) {
-entry:
+define void @test1(float %x, float %y) {
+; CHECK-LABEL: test1
 ; CHECK: fmul float %x, %y
 ; CHECK: fmul float %x, %y
-  %0 = fmul float %x, %y
-  %1 = fmul float %y, %x
-  %2 = fsub float %0, %1
-  call void @use(float %0)
-  call void @use(float %2)
+; CHECK: fsub float %1, %2
+; CHECK: call void @use(float %{{.*}})
+; CHECK: call void @use(float %{{.*}})
+
+  %1 = fmul float %x, %y
+  %2 = fmul float %y, %x
+  %3 = fsub float %1, %2
+  call void @use(float %1)
+  call void @use(float %3)
   ret void
 }
diff --git a/test/Transforms/Reassociate/inverses.ll b/test/Transforms/Reassociate/inverses.ll
index afe076c..8500cd8 100644
--- a/test/Transforms/Reassociate/inverses.ll
+++ b/test/Transforms/Reassociate/inverses.ll
@@ -32,3 +32,15 @@ define i32 @test3(i32 %b, i32 %a) {
 ; CHECK: %tmp.5 = add i32 %b, 1234
 ; CHECK: ret i32 %tmp.5
 }
+
+define i32 @test4(i32 %b, i32 %a) {
+        %tmp.1 = add i32 %a, 1234
+        %tmp.2 = add i32 %b, %tmp.1
+        %tmp.4 = xor i32 %a, -1
+        ; (b+(a+1234))+~a -> b+1233
+        %tmp.5 = add i32 %tmp.2, %tmp.4
+        ret i32 %tmp.5
+; CHECK-LABEL: @test4(
+; CHECK: %tmp.5 = add i32 %b, 1233
+; CHECK: ret i32 %tmp.5
+}
diff --git a/test/Transforms/Reassociate/looptest.ll b/test/Transforms/Reassociate/looptest.ll
index 91723bc..aad3b20 100644
--- a/test/Transforms/Reassociate/looptest.ll
+++ b/test/Transforms/Reassociate/looptest.ll
@@ -18,6 +18,7 @@
 
 declare i32 @printf(i8*, ...)
 
+; FIXME: No longer works.
 define void @test(i32 %Num, i32* %Array) {
 bb0:
 	%cond221 = icmp eq i32 0, %Num		; <i1> [#uses=3]
diff --git a/test/Transforms/Reassociate/mightymul.ll b/test/Transforms/Reassociate/mightymul.ll
index cfbc485..ae915da 100644
--- a/test/Transforms/Reassociate/mightymul.ll
+++ b/test/Transforms/Reassociate/mightymul.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -reassociate
+; RUN: opt < %s -reassociate -disable-output
 ; PR13021
 
-define i32 @foo(i32 %x) {
+define i32 @test1(i32 %x) {
   %t0 = mul i32 %x, %x
   %t1 = mul i32 %t0, %t0
   %t2 = mul i32 %t1, %t1
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index d794647..12eaeee 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -28,4 +28,3 @@ define i64 @multistep2(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-NEXT: ret
   ret i64 %t3
 }
-
diff --git a/test/Transforms/Reassociate/negation.ll b/test/Transforms/Reassociate/negation.ll
index 6a3dfd3..12d2c86 100644
--- a/test/Transforms/Reassociate/negation.ll
+++ b/test/Transforms/Reassociate/negation.ll
@@ -1,21 +1,31 @@
-; RUN: opt < %s -reassociate -instcombine -S | not grep sub
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
 ; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
 
 define i32 @test1(i32 %a, i32 %b, i32 %z) {
-	%c = sub i32 0, %z		; <i32> [#uses=1]
-	%d = mul i32 %a, %b		; <i32> [#uses=1]
-	%e = mul i32 %c, %d		; <i32> [#uses=1]
-	%f = mul i32 %e, 12345		; <i32> [#uses=1]
-	%g = sub i32 0, %f		; <i32> [#uses=1]
-	ret i32 %g
+; CHECK-LABEL: test1
+; CHECK-NEXT: %e = mul i32 %a, 12345
+; CHECK-NEXT: %f = mul i32 %e, %b
+; CHECK-NEXT: %g = mul i32 %f, %z
+; CHECK-NEXT: ret i32 %g
+
+  %c = sub i32 0, %z
+  %d = mul i32 %a, %b
+  %e = mul i32 %c, %d
+  %f = mul i32 %e, 12345
+  %g = sub i32 0, %f
+  ret i32 %g
 }
 
 define i32 @test2(i32 %a, i32 %b, i32 %z) {
-	%d = mul i32 %z, 40		; <i32> [#uses=1]
-	%c = sub i32 0, %d		; <i32> [#uses=1]
-	%e = mul i32 %a, %c		; <i32> [#uses=1]
-	%f = sub i32 0, %e		; <i32> [#uses=1]
-	ret i32 %f
-}
+; CHECK-LABEL: test2
+; CHECK-NEXT: %e = mul i32 %a, 40
+; CHECK-NEXT: %f = mul i32 %e, %z
+; CHECK-NEXT: ret i32 %f
 
+  %d = mul i32 %z, 40
+  %c = sub i32 0, %d
+  %e = mul i32 %a, %c
+  %f = sub i32 0, %e
+  ret i32 %f
+}
diff --git a/test/Transforms/Reassociate/otherops.ll b/test/Transforms/Reassociate/otherops.ll
index d68d008..7718881 100644
--- a/test/Transforms/Reassociate/otherops.ll
+++ b/test/Transforms/Reassociate/otherops.ll
@@ -1,28 +1,42 @@
 ; Reassociation should apply to Add, Mul, And, Or, & Xor
 ;
-; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep 12
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | FileCheck %s
 
 define i32 @test_mul(i32 %arg) {
-	%tmp1 = mul i32 12, %arg		; <i32> [#uses=1]
-	%tmp2 = mul i32 %tmp1, 12		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_mul
+; CHECK-NEXT: %tmp2 = mul i32 %arg, 144
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = mul i32 12, %arg
+  %tmp2 = mul i32 %tmp1, 12
+  ret i32 %tmp2
 }
 
 define i32 @test_and(i32 %arg) {
-	%tmp1 = and i32 14, %arg		; <i32> [#uses=1]
-	%tmp2 = and i32 %tmp1, 14		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_and
+; CHECK-NEXT: %tmp2 = and i32 %arg, 14
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = and i32 14, %arg
+  %tmp2 = and i32 %tmp1, 14
+  ret i32 %tmp2
 }
 
 define i32 @test_or(i32 %arg) {
-	%tmp1 = or i32 14, %arg		; <i32> [#uses=1]
-	%tmp2 = or i32 %tmp1, 14		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_or
+; CHECK-NEXT: %tmp2 = or i32 %arg, 14
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = or i32 14, %arg
+  %tmp2 = or i32 %tmp1, 14
+  ret i32 %tmp2
 }
 
 define i32 @test_xor(i32 %arg) {
-	%tmp1 = xor i32 12, %arg		; <i32> [#uses=1]
-	%tmp2 = xor i32 %tmp1, 12		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
+; CHECK-LABEL: test_xor
+; CHECK-NEXT: ret i32 %arg
 
+  %tmp1 = xor i32 12, %arg
+  %tmp2 = xor i32 %tmp1, 12
+  ret i32 %tmp2
+}
diff --git a/test/Transforms/Reassociate/shift-factor.ll b/test/Transforms/Reassociate/shift-factor.ll
index 73af5e5..8fbf1b9 100644
--- a/test/Transforms/Reassociate/shift-factor.ll
+++ b/test/Transforms/Reassociate/shift-factor.ll
@@ -1,12 +1,14 @@
 ; There should be exactly one shift and one add left.
-; RUN: opt < %s -reassociate -instcombine -S > %t
-; RUN: grep shl %t | count 1
-; RUN: grep add %t | count 1
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %X, i32 %Y) {
-	%tmp.2 = shl i32 %X, 1		; <i32> [#uses=1]
-	%tmp.6 = shl i32 %Y, 1		; <i32> [#uses=1]
-	%tmp.4 = add i32 %tmp.6, %tmp.2		; <i32> [#uses=1]
-	ret i32 %tmp.4
-}
+define i32 @test1(i32 %X, i32 %Y) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp = add i32 %Y, %X
+; CHECK-NEXT: %tmp1 = shl i32 %tmp, 1
+; CHECK-NEXT: ret i32 %tmp1
 
+  %tmp.2 = shl i32 %X, 1
+  %tmp.6 = shl i32 %Y, 1
+  %tmp.4 = add i32 %tmp.6, %tmp.2
+  ret i32 %tmp.4
+}
diff --git a/test/Transforms/Reassociate/subtest.ll b/test/Transforms/Reassociate/subtest.ll
index 4c63d12..e6263d8 100644
--- a/test/Transforms/Reassociate/subtest.ll
+++ b/test/Transforms/Reassociate/subtest.ll
@@ -1,11 +1,26 @@
-; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
-;
-; RUN: opt < %s -reassociate -instcombine -S | not grep 12
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %A, i32 %B) {
-	%X = add i32 -12, %A		; <i32> [#uses=1]
-	%Y = sub i32 %X, %B		; <i32> [#uses=1]
-	%Z = add i32 %Y, 12		; <i32> [#uses=1]
-	ret i32 %Z
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: %Z = sub i32 %A, %B
+; CHECK-NEXT: ret i32 %Z
+  %X = add i32 -12, %A
+  %Y = sub i32 %X, %B
+  %Z = add i32 %Y, 12
+  ret i32 %Z
 }
 
+; PR2047
+; With sub reassociation, constant folding can eliminate the uses of %a.
+define i32 @test2(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %sum = add i32 %c, %b
+; CHECK-NEXT: %tmp7 = sub i32 0, %sum
+; CHECK-NEXT: ret i32 %tmp7
+
+  %tmp3 = sub i32 %a, %b
+  %tmp5 = sub i32 %tmp3, %c
+  %tmp7 = sub i32 %tmp5, %a
+  ret i32 %tmp7
+}
diff --git a/test/Transforms/Reassociate/subtest2.ll b/test/Transforms/Reassociate/subtest2.ll
deleted file mode 100644
index 0513c5f..0000000
--- a/test/Transforms/Reassociate/subtest2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; With sub reassociation, constant folding can eliminate the uses of %a.
-;
-; RUN: opt < %s -reassociate -instcombine -S | grep %a | count 1
-; PR2047
-
-define i32 @test(i32 %a, i32 %b, i32 %c) nounwind  {
-entry:
-	%tmp3 = sub i32 %a, %b		; <i32> [#uses=1]
-	%tmp5 = sub i32 %tmp3, %c		; <i32> [#uses=1]
-	%tmp7 = sub i32 %tmp5, %a		; <i32> [#uses=1]
-	ret i32 %tmp7
-}
-
diff --git a/test/Transforms/SCCP/atomic.ll b/test/Transforms/SCCP/atomic.ll
new file mode 100644
index 0000000..60d4896
--- /dev/null
+++ b/test/Transforms/SCCP/atomic.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+define i1 @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg
+; CHECK: cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %val = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %res = extractvalue { i32, i1 } %val, 1
+  ret i1 %res
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
index c420349..7184443 100644
--- a/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
index 5fc35d8..236e1d3 100644
--- a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
index 9e0ab99..4086e8d 100644
--- a/test/Transforms/SLPVectorizer/R600/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'R600' in targets:
+if not 'R600' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
new file mode 100644
index 0000000..8303bc8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [4 x i32] zeroinitializer, align 16
+@c = common global [4 x i32] zeroinitializer, align 16
+@d = common global [4 x i32] zeroinitializer, align 16
+@e = common global [4 x i32] zeroinitializer, align 16
+@a = common global [4 x i32] zeroinitializer, align 16
+@fb = common global [4 x float] zeroinitializer, align 16
+@fc = common global [4 x float] zeroinitializer, align 16
+@fa = common global [4 x float] zeroinitializer, align 16
+
+; CHECK-LABEL: @addsub
+; CHECK: %5 = add <4 x i32> %3, %4
+; CHECK: %6 = add <4 x i32> %2, %5
+; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @addsub() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %add2 = add nsw i32 %add, %add1
+  store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add4 = add nsw i32 %6, %7
+  %sub = sub nsw i32 %add3, %add4
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %add7 = add nsw i32 %add5, %add6
+  store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %sub10 = sub nsw i32 %add8, %add9
+  store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @subadd
+; CHECK:  %5 = add <4 x i32> %3, %4
+; CHECK:  %6 = sub <4 x i32> %2, %5
+; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @subadd() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %sub = sub nsw i32 %add, %add1
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add2 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %6, %7
+  %add4 = add nsw i32 %add2, %add3
+  store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %sub7 = sub nsw i32 %add5, %add6
+  store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %add10 = add nsw i32 %add8, %add9
+  store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @faddfsub
+; CHECK: %2 = fadd <4 x float> %0, %1
+; CHECK: %3 = fsub <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %sub = fsub float %2, %3
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add1 = fadd float %4, %5
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub2 = fsub float %6, %7
+  store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @fsubfadd
+; CHECK: %2 = fsub <4 x float> %0, %1
+; CHECK: %3 = fadd <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @fsubfadd() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add = fadd float %2, %3
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %sub1 = fsub float %4, %5
+  store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %add2 = fadd float %6, %7
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @No_faddfsub
+; CHECK-NOT: fadd <4 x float>
+; CHECK-NOT: fsub <4 x float>
+; CHECK-NOT: shufflevector
+; Function Attrs: nounwind uwtable
+define void @No_faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add1 = fadd float %2, %3
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add2 = fadd float %4, %5
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub = fsub float %6, %7
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll
new file mode 100644
index 0000000..9e105ec
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test if SLP can handle GEP expressions.
+; The test perform the following action:
+;   x->first  = y->first  + 16
+;   x->second = y->second + 16
+
+; CHECK-LABEL: foo1
+; CHECK: <2 x i32*>
+define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i64 16
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i64 16
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}
+
+; Test that we don't vectorize GEP expressions if indexes are not constants.
+; We can't produce an efficient code in that case.
+; CHECK-LABEL: foo2
+; CHECK-NOT: <2 x i32*>
+define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i32 %i
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i32 %i
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 30c5093..937252f 100644
--- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -117,3 +117,270 @@ entry:
 ; CHECK: store <4 x i32>
 ; CHECK: ret
 }
+
+declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
+
+define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_ctlz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.ctlz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_ctlz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
+
+}
+
+
+declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
+
+define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_cttz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.cttz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_cttz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
+}
+
+
+declare float @llvm.powi.f32(float, i32)
+define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
+entry:
+  %i0 = load float* %a, align 4
+  %i1 = load float* %b, align 4
+  %add1 = fadd float %i0, %i1
+  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds float* %a, i32 1
+  %i2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %b, i32 1
+  %i3 = load float* %arrayidx3, align 4
+  %add2 = fadd float %i2, %i3
+  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds float* %a, i32 2
+  %i4 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %b, i32 2
+  %i5 = load float* %arrayidx5, align 4
+  %add3 = fadd float %i4, %i5
+  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds float* %a, i32 3
+  %i6 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %b, i32 3
+  %i7 = load float* %arrayidx7, align 4
+  %add4 = fadd float %i6, %i7
+  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_powi_f32(
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: call <4 x float> @llvm.powi.v4f32
+; CHECK: store <4 x float>
+; CHECK: ret
+}
+
+
+define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
+entry:
+  %i0 = load float* %a, align 4
+  %i1 = load float* %b, align 4
+  %add1 = fadd float %i0, %i1
+  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds float* %a, i32 1
+  %i2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %b, i32 1
+  %i3 = load float* %arrayidx3, align 4
+  %add2 = fadd float %i2, %i3
+  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds float* %a, i32 2
+  %i4 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %b, i32 2
+  %i5 = load float* %arrayidx5, align 4
+  %add3 = fadd float %i4, %i5
+  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds float* %a, i32 3
+  %i6 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %b, i32 3
+  %i7 = load float* %arrayidx7, align 4
+  %add4 = fadd float %i6, %i7
+  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_powi_f32_neg(
+; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
+}
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/SLPVectorizer/X86/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SLPVectorizer/X86/pr19657.ll b/test/Transforms/SLPVectorizer/X86/pr19657.ll
new file mode 100644
index 0000000..9352308
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: load <2 x double>*
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+
+; Function Attrs: nounwind uwtable
+define void @foo(double* %x) #0 {
+  %1 = alloca double*, align 8
+  store double* %x, double** %1, align 8
+  %2 = load double** %1, align 8
+  %3 = getelementptr inbounds double* %2, i64 0
+  %4 = load double* %3, align 8
+  %5 = load double** %1, align 8
+  %6 = getelementptr inbounds double* %5, i64 0
+  %7 = load double* %6, align 8
+  %8 = fadd double %4, %7
+  %9 = load double** %1, align 8
+  %10 = getelementptr inbounds double* %9, i64 0
+  %11 = load double* %10, align 8
+  %12 = fadd double %8, %11
+  %13 = load double** %1, align 8
+  %14 = getelementptr inbounds double* %13, i64 0
+  store double %12, double* %14, align 8
+  %15 = load double** %1, align 8
+  %16 = getelementptr inbounds double* %15, i64 1
+  %17 = load double* %16, align 8
+  %18 = load double** %1, align 8
+  %19 = getelementptr inbounds double* %18, i64 1
+  %20 = load double* %19, align 8
+  %21 = fadd double %17, %20
+  %22 = load double** %1, align 8
+  %23 = getelementptr inbounds double* %22, i64 1
+  %24 = load double* %23, align 8
+  %25 = fadd double %21, %24
+  %26 = load double** %1, align 8
+  %27 = getelementptr inbounds double* %26, i64 1
+  store double %25, double* %27, align 8
+  %28 = load double** %1, align 8
+  %29 = getelementptr inbounds double* %28, i64 2
+  %30 = load double* %29, align 8
+  %31 = load double** %1, align 8
+  %32 = getelementptr inbounds double* %31, i64 2
+  %33 = load double* %32, align 8
+  %34 = fadd double %30, %33
+  %35 = load double** %1, align 8
+  %36 = getelementptr inbounds double* %35, i64 2
+  %37 = load double* %36, align 8
+  %38 = fadd double %34, %37
+  %39 = load double** %1, align 8
+  %40 = getelementptr inbounds double* %39, i64 2
+  store double %38, double* %40, align 8
+  %41 = load double** %1, align 8
+  %42 = getelementptr inbounds double* %41, i64 3
+  %43 = load double* %42, align 8
+  %44 = load double** %1, align 8
+  %45 = getelementptr inbounds double* %44, i64 3
+  %46 = load double* %45, align 8
+  %47 = fadd double %43, %46
+  %48 = load double** %1, align 8
+  %49 = getelementptr inbounds double* %48, i64 3
+  %50 = load double* %49, align 8
+  %51 = fadd double %47, %50
+  %52 = load double** %1, align 8
+  %53 = getelementptr inbounds double* %52, i64 3
+  store double %51, double* %53, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
index 4d17d46..bb48713 100644
--- a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SROA/slice-order-independence.ll b/test/Transforms/SROA/slice-order-independence.ll
new file mode 100644
index 0000000..364ef85
--- /dev/null
+++ b/test/Transforms/SROA/slice-order-independence.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; Check that the chosen type for a split is independent from the order of
+; slices even in case of types that are skipped because their width is not a
+; byte width multiple
+define void @skipped_inttype_first({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_first
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63* %pb0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8** %pb1
+  ret void
+}
+
+define void @skipped_inttype_last({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_last
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8** %pb1
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63* %pb0
+  ret void
+}
diff --git a/test/Transforms/SROA/slice-width.ll b/test/Transforms/SROA/slice-width.ll
new file mode 100644
index 0000000..179780b
--- /dev/null
+++ b/test/Transforms/SROA/slice-width.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @no_split_on_non_byte_width(i32) {
+; This tests that allocas are not split into slices that are not byte width multiple
+  %arg = alloca i32 , align 8
+  store i32 %0, i32* %arg
+  br label %load_i32
+
+load_i32:
+; CHECK-LABEL: load_i32:
+; CHECK-NOT: bitcast {{.*}} to i1
+; CHECK-NOT: zext i1
+  %r0 = load i32* %arg
+  br label %load_i1
+
+load_i1:
+; CHECK-LABEL: load_i1:
+; CHECK: bitcast {{.*}} to i1
+  %p1 = bitcast i32* %arg to i1*
+  %t1 = load i1* %p1
+  ret void
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
index 40532cd..a5e90f8 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'NVPTX' in targets:
+if not 'NVPTX' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 850fc4c..c07440c 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
 ; RUN: opt < %s -S -separate-const-offset-from-gep -gvn -dce | FileCheck %s --check-prefix=IR
 
@@ -20,6 +19,90 @@ target triple = "nvptx64-unknown-unknown"
 
 define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
 .preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i32 %y, 1
+  %7 = sext i32 %6 to i64
+  %8 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
+  %9 = addrspacecast float addrspace(3)* %8 to float*
+  %10 = load float* %9, align 4
+  %11 = fadd float %5, %10
+  %12 = add i32 %x, 1
+  %13 = sext i32 %12 to i64
+  %14 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
+  %15 = addrspacecast float addrspace(3)* %14 to float*
+  %16 = load float* %15, align 4
+  %17 = fadd float %11, %16
+  %18 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
+  %19 = addrspacecast float addrspace(3)* %18 to float*
+  %20 = load float* %19, align 4
+  %21 = fadd float %17, %20
+  store float %21, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
+
+; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
+; the order of "sext" and "add" when computing the array indices. @sum_of_array
+; computes add before sext, e.g., array[sext(x + 1)][sext(y + 1)], while
+; @sum_of_array2 computes sext before add,
+; e.g., array[sext(x) + 1][sext(y) + 1]. SeparateConstOffsetFromGEP should be
+; able to extract constant offsets from both forms.
+define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i64 %0, 1
+  %7 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
+  %8 = addrspacecast float addrspace(3)* %7 to float*
+  %9 = load float* %8, align 4
+  %10 = fadd float %5, %9
+  %11 = add i64 %1, 1
+  %12 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
+  %13 = addrspacecast float addrspace(3)* %12 to float*
+  %14 = load float* %13, align 4
+  %15 = fadd float %10, %14
+  %16 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
+  %17 = addrspacecast float addrspace(3)* %16 to float*
+  %18 = load float* %17, align 4
+  %19 = fadd float %15, %18
+  store float %19, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array2(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array2(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
+
+; Similar to @sum_of_array3, but extends array indices using zext instead of
+; sext. e.g., array[zext(x + 1)][zext(y + 1)].
+define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
   %0 = zext i32 %y to i64
   %1 = zext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
@@ -45,15 +128,14 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
   store float %21, float* %output, align 4
   ret void
 }
-
-; PTX-LABEL: sum_of_array(
+; PTX-LABEL: sum_of_array3(
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
 
-; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y
+; IR-LABEL: @sum_of_array3(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2e50f5f..ed40c7e 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -23,71 +23,94 @@ entry:
   %p = getelementptr inbounds [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
   ret double* %p
 }
-; CHECK-LABEL: @struct
-; CHECK: getelementptr [1024 x %struct.S]* @struct_array, i64 0, i32 %i, i32 1
+; CHECK-LABEL: @struct(
+; CHECK: getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1
 
-; We should be able to trace into sext/zext if it's directly used as a GEP
-; index.
-define float* @sext_zext(i32 %i, i32 %j) {
+; We should be able to trace into sext(a + b) if a + b is non-negative
+; (e.g., used as an index of an inbounds GEP) and one of a and b is
+; non-negative.
+define float* @sext_add(i32 %i, i32 %j) {
 entry:
-  %i1 = add i32 %i, 1
-  %j2 = add i32 %j, 2
-  %i1.ext = sext i32 %i1 to i64
-  %j2.ext = zext i32 %j2 to i64
-  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i1.ext, i64 %j2.ext
+  %0 = add i32 %i, 1
+  %1 = sext i32 %0 to i64  ; inbound sext(i + 1) = sext(i) + 1
+  %2 = add i32 %j, -2
+  ; However, inbound sext(j + -2) != sext(j) + -2, e.g., j = INT_MIN
+  %3 = sext i32 %2 to i64
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
   ret float* %p
 }
-; CHECK-LABEL: @sext_zext
-; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j
-; CHECK: getelementptr float* %{{[0-9]+}}, i64 34
+; CHECK-LABEL: @sext_add(
+; CHECK-NOT: = add
+; CHECK: add i32 %j, -2
+; CHECK: sext
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 32
 
 ; We should be able to trace into sext/zext if it can be distributed to both
 ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
+;
+; This test verifies we can transform
+;   gep base, a + sext(b +nsw 1), c + zext(d +nuw 1)
+; to
+;   gep base, a + sext(b), c + zext(d); gep ..., 1 * 32 + 1
 define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
   %b1 = add nsw i32 %b, 1
   %b2 = sext i32 %b1 to i64
-  %i = add i64 %a, %b2
+  %i = add i64 %a, %b2       ; i = a + sext(b +nsw 1)
   %d1 = add nuw i32 %d, 1
   %d2 = zext i32 %d1 to i64
-  %j = add i64 %c, %d2
+  %j = add i64 %c, %d2       ; j = c + zext(d +nuw 1)
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
   ret float* %p
 }
-; CHECK-LABEL: @ext_add_no_overflow
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
+; CHECK-LABEL: @ext_add_no_overflow(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 33
 
-; Similar to @ext_add_no_overflow, we should be able to trace into sext/zext if
-; its operand is an "or" instruction.
-define float* @ext_or(i64 %a, i32 %b) {
+; Verifies we handle nested sext/zext correctly.
+define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
+entry:
+  %0 = add nsw nuw i32 %a, 1
+  %1 = sext i32 %0 to i48
+  %2 = zext i48 %1 to i64    ; zext(sext(a +nsw nuw 1)) = zext(sext(a)) + 1
+  %3 = add nsw i32 %b, 2
+  %4 = sext i32 %3 to i48
+  %5 = zext i48 %4 to i64    ; zext(sext(b +nsw 2)) != zext(sext(b)) + 2
+  %p1 = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %2, i64 %5
+  store float* %p1, float** %out1
+  %6 = add nuw i32 %a, 3
+  %7 = zext i32 %6 to i48
+  %8 = sext i48 %7 to i64 ; sext(zext(a +nuw 3)) = zext(a +nuw 3) = zext(a) + 3
+  %9 = add nsw i32 %b, 4
+  %10 = zext i32 %9 to i48
+  %11 = sext i48 %10 to i64  ; sext(zext(b +nsw 4)) != zext(b) + 4
+  %p2 = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %8, i64 %11
+  store float* %p2, float** %out2
+  ret void
+}
+; CHECK-LABEL: @sext_zext(
+; CHECK: [[BASE_PTR_1:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR_1]], i64 32
+; CHECK: [[BASE_PTR_2:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR_2]], i64 96
+
+; Similar to @ext_add_no_overflow, we should be able to trace into s/zext if
+; its operand is an OR and the two operands of the OR have no common bits.
+define float* @sext_or(i64 %a, i32 %b) {
 entry:
   %b1 = shl i32 %b, 2
-  %b2 = or i32 %b1, 1
-  %b3 = or i32 %b1, 2
-  %b2.ext = sext i32 %b2 to i64
+  %b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
+  %b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
+  %b2.ext = zext i32 %b2 to i64
   %b3.ext = sext i32 %b3 to i64
   %i = add i64 %a, %b2.ext
   %j = add i64 %a, %b3.ext
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
   ret float* %p
 }
-; CHECK-LABEL: @ext_or
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
-; CHECK: getelementptr float* [[BASE_PTR]], i64 34
-
-; We should treat "or" with no common bits (%k) as "add", and leave "or" with
-; potentially common bits (%l) as is.
-define float* @or(i64 %i) {
-entry:
-  %j = shl i64 %i, 2
-  %k = or i64 %j, 3 ; no common bits
-  %l = or i64 %j, 4 ; potentially common bits
-  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %k, i64 %l
-  ret float* %p
-}
-; CHECK-LABEL: @or
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l
-; CHECK: getelementptr float* [[BASE_PTR]], i64 96
+; CHECK-LABEL: @sext_or(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 32
 
 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
 ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -100,11 +123,28 @@ entry:
   store i64 %b5, i64* %out
   ret float* %p
 }
-; CHECK-LABEL: @expr
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0
+; CHECK-LABEL: @expr(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 160
 ; CHECK: store i64 %b5, i64* %out
 
+; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
+define float* @sext_expr(i32 %a, i32 %b, i32 %c, i64 %d) {
+entry:
+  %0 = add nsw i32 %c, 8
+  %1 = add nsw i32 %b, %0
+  %2 = add nsw i32 %a, %1
+  %3 = sext i32 %2 to i64
+  %i = add i64 %d, %3
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: @sext_expr(
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 8
+
 ; Verifies we handle "sub" correctly.
 define float* @sub(i64 %i, i64 %j) {
   %i2 = sub i64 %i, 5 ; i - 5
@@ -112,9 +152,9 @@ define float* @sub(i64 %i, i64 %j) {
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i2, i64 %j2
   ret float* %p
 }
-; CHECK-LABEL: @sub
-; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
+; CHECK-LABEL: @sub(
+; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 -155
 
 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
@@ -130,8 +170,92 @@ entry:
   %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom
   ret i64* %arrayidx3
 }
-; CHECK-LABEL: @packed_struct
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i32 %i, i32 1, i32 %j
-; CHECK: [[CASTED_PTR:%[0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
+; CHECK-LABEL: @packed_struct(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
 ; CHECK: %uglygep = getelementptr i8* [[CASTED_PTR]], i64 100
 ; CHECK: bitcast i8* %uglygep to i64*
+
+; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
+; because "zext(b + 8) != zext(b) + 8"
+define float* @zext_expr(i32 %a, i32 %b) {
+entry:
+  %0 = add i32 %b, 8
+  %1 = add nuw i32 %a, %0
+  %i = zext i32 %1 to i64
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: zext_expr(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+
+; Per http://llvm.org/docs/LangRef.html#id181, the indices of a off-bound gep
+; should be considered sign-extended to the pointer size. Therefore,
+;   gep base, (add i32 a, b) != gep (gep base, i32 a), i32 b
+; because
+;   sext(a + b) != sext(a) + sext(b)
+;
+; This test verifies we do not illegitimately extract the 8 from
+;   gep base, (i32 a + 8)
+define float* @i32_add(i32 %a) {
+entry:
+  %i = add i32 %a, 8
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i32 %i
+  ret float* %p
+}
+; CHECK-LABEL: @i32_add(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK-NOT: getelementptr
+
+; Verifies that we compute the correct constant offset when the index is
+; sign-extended and then zero-extended. The old version of our code failed to
+; handle this case because it simply computed the constant offset as the
+; sign-extended value of the constant part of the GEP index.
+define float* @apint(i1 %a) {
+entry:
+  %0 = add nsw nuw i1 %a, 1
+  %1 = sext i1 %0 to i4
+  %2 = zext i4 %1 to i64         ; zext (sext i1 1 to i4) to i64 = 15
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %2
+  ret float* %p
+}
+; CHECK-LABEL: @apint(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 15
+
+; Do not trace into binary operators other than ADD, SUB, and OR.
+define float* @and(i64 %a) {
+entry:
+  %0 = shl i64 %a, 2
+  %1 = and i64 %0, 1
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %1
+  ret float* %p
+}
+; CHECK-LABEL: @and(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array
+; CHECK-NOT: getelementptr
+
+; if zext(a + b) <= max signed value of typeof(a + b), then we can prove
+; a + b >= 0 and zext(a + b) == sext(a + b). If we can prove further a or b is
+; non-negative, we have zext(a + b) == sext(a) + sext(b).
+define float* @inbounds_zext_add(i32 %i, i4 %j) {
+entry:
+  %0 = add i32 %i, 1
+  %1 = zext i32 %0 to i64
+  ; Because zext(i + 1) is an index of an in bounds GEP based on
+  ; float_2d_array, zext(i + 1) <= sizeof(float_2d_array) = 4096.
+  ; Furthermore, since typeof(i + 1) is i32 and 4096 < 2^31, we are sure the
+  ; sign bit of i + 1 is 0. This implies zext(i + 1) = sext(i + 1).
+  %2 = add i4 %j, 2
+  %3 = zext i4 %2 to i64
+  ; In this case, typeof(j + 2) is i4, so zext(j + 2) <= 4096 does not imply
+  ; the sign bit of j + 2 is 0.
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
+  ret float* %p
+}
+; CHECK-LABEL: @inbounds_zext_add(
+; CHECK-NOT: add
+; CHECK: add i4 %j, 2
+; CHECK: sext
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 32
diff --git a/test/Transforms/SimplifyCFG/PR17073.ll b/test/Transforms/SimplifyCFG/PR17073.ll
new file mode 100644
index 0000000..8dc9fb2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR17073.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; In PR17073 ( http://llvm.org/pr17073 ), we illegally hoisted an operation that can trap.
+; The first test confirms that we don't do that when the trapping op is reached by the current BB (block1).
+; The second test confirms that we don't do that when the trapping op is reached by the previous BB (entry).
+; The third test confirms that we can still do this optimization for an operation (add) that doesn't trap.
+; The tests must be complicated enough to prevent previous SimplifyCFG actions from optimizing away
+; the instructions that we're checking for.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+@a = common global i32 0, align 4
+@b = common global i8 0, align 1
+
+; CHECK-LABEL: can_trap1 
+; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
+; CHECK-NOT: select i1 %tobool, i32* null, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a) 
+define i32* @can_trap1() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ null, %entry ],[ null, %block2 ], [ select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %block1 ]
+  ret i32* %storemerge
+}
+
+; CHECK-LABEL: can_trap2 
+; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
+; CHECK-NOT: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
+define i32* @can_trap2() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %entry ],[ null, %block2 ], [ null, %block1 ]
+  ret i32* %storemerge
+}
+
+; CHECK-LABEL: cannot_trap 
+; CHECK: select i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), i32* select (i1 icmp eq (i64 add (i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64), i64 2), i64 0), i32* null, i32* @a), i32* null
+define i32* @cannot_trap() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ null, %entry ],[ null, %block2 ], [ select (i1 icmp eq (i64 add (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %block1 ]
+  ret i32* %storemerge
+}
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
index 4d344fa..fa6a54e 100644
--- a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/SimplifyCFG/X86/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 81079b1..51ced40 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -918,3 +918,58 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; Don't build tables for switches with TLS variables.
+@tls_a = thread_local global i32 0
+@tls_b = thread_local global i32 0
+@tls_c = thread_local global i32 0
+@tls_d = thread_local global i32 0
+define i32* @tls(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @tls(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
+
+; Don't build tables for switches with dllimport variables.
+@dllimport_a = external dllimport global [3x i32]
+@dllimport_b = external dllimport global [3x i32]
+@dllimport_c = external dllimport global [3x i32]
+@dllimport_d = external dllimport global [3x i32]
+define i32* @dllimport(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ getelementptr inbounds ([3 x i32]* @dllimport_d, i32 0, i32 0), %sw.default ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @dllimport(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
diff --git a/test/Transforms/SimplifyCFG/speculate-vector-ops.ll b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll
new file mode 100644
index 0000000..91972eb
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define i32 @speculate_vector_extract(i32 %d, <4 x i32> %v) #0 {
+; CHECK-LABEL: @speculate_vector_extract(
+; CHECK-NOT: br
+entry:
+  %conv = insertelement <4 x i32> undef, i32 %d, i32 0
+  %conv2 = insertelement <4 x i32> %conv, i32 %d, i32 1
+  %conv3 = insertelement <4 x i32> %conv2, i32 %d, i32 2
+  %conv4 = insertelement <4 x i32> %conv3, i32 %d, i32 3
+  %tmp6 = add nsw <4 x i32> %conv4, <i32 0, i32 -1, i32 -2, i32 -3>
+  %cmp = icmp eq <4 x i32> %tmp6, zeroinitializer
+  %cmp.ext = sext <4 x i1> %cmp to <4 x i32>
+  %tmp8 = extractelement <4 x i32> %cmp.ext, i32 0
+  %tobool = icmp eq i32 %tmp8, 0
+  br i1 %tobool, label %cond.else, label %cond.then
+
+return:                                           ; preds = %cond.end28
+  ret i32 %cond32
+
+cond.then:                                        ; preds = %entry
+  %tmp10 = extractelement <4 x i32> %v, i32 0
+  br label %cond.end
+
+cond.else:                                        ; preds = %entry
+  %tmp12 = extractelement <4 x i32> %v, i32 3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.else, %cond.then
+  %cond = phi i32 [ %tmp10, %cond.then ], [ %tmp12, %cond.else ]
+  %tmp14 = extractelement <4 x i32> %cmp.ext, i32 1
+  %tobool15 = icmp eq i32 %tmp14, 0
+  br i1 %tobool15, label %cond.else17, label %cond.then16
+
+cond.then16:                                      ; preds = %cond.end
+  %tmp20 = extractelement <4 x i32> %v, i32 1
+  br label %cond.end18
+
+cond.else17:                                      ; preds = %cond.end
+  br label %cond.end18
+
+cond.end18:                                       ; preds = %cond.else17, %cond.then16
+  %cond22 = phi i32 [ %tmp20, %cond.then16 ], [ %cond, %cond.else17 ]
+  %tmp24 = extractelement <4 x i32> %cmp.ext, i32 2
+  %tobool25 = icmp eq i32 %tmp24, 0
+  br i1 %tobool25, label %cond.else27, label %cond.then26
+
+cond.then26:                                      ; preds = %cond.end18
+  %tmp30 = extractelement <4 x i32> %v, i32 2
+  br label %cond.end28
+
+cond.else27:                                      ; preds = %cond.end18
+  br label %cond.end28
+
+cond.end28:                                       ; preds = %cond.else27, %cond.then26
+  %cond32 = phi i32 [ %tmp30, %cond.then26 ], [ %cond22, %cond.else27 ]
+  br label %return
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 04e8830..e481dcc 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -35,6 +35,11 @@ for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
     if symbolizer in os.environ:
         config.environment[symbolizer] = os.environ[symbolizer]
 
+# Win32 seeks DLLs along %PATH%.
+if sys.platform in ['win32', 'cygwin'] and os.path.isdir(config.shlibdir):
+    config.environment['PATH'] = os.path.pathsep.join((
+            config.shlibdir, config.environment['PATH']))
+
 ###
 
 # Check that the object root is known.
diff --git a/test/Verifier/alias.ll b/test/Verifier/alias.ll
index e3636bc..ff02a37 100644
--- a/test/Verifier/alias.ll
+++ b/test/Verifier/alias.ll
@@ -10,3 +10,18 @@ declare void @f()
 @ga = alias i32* @g
 ; CHECK: Alias must point to a definition
 ; CHECK-NEXT: @ga
+
+
+@test2_a = alias i32* @test2_b
+@test2_b = alias i32* @test2_a
+; CHECK:      Aliases cannot form a cycle
+; CHECK-NEXT: i32* @test2_a
+; CHECK-NEXT: Aliases cannot form a cycle
+; CHECK-NEXT: i32* @test2_b
+
+
+@test3_a = global i32 42
+@test3_b = alias weak i32* @test3_a
+@test3_c = alias i32* @test3_b
+; CHECK: Alias cannot point to a weak alias
+; CHECK-NEXT: i32* @test3_c
diff --git a/test/Verifier/bitcast-alias-address-space.ll b/test/Verifier/bitcast-alias-address-space.ll
new file mode 100644
index 0000000..d9794d9
--- /dev/null
+++ b/test/Verifier/bitcast-alias-address-space.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: error: invalid cast opcode for cast from 'i32 addrspace(2)*' to 'i32 addrspace(1)*'
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+
+@data = addrspace(2) global i32 27
+
+@illegal_alias_data = alias bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
diff --git a/test/Verifier/comdat.ll b/test/Verifier/comdat.ll
new file mode 100644
index 0000000..ca47429
--- /dev/null
+++ b/test/Verifier/comdat.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+$v = comdat any
+@v = common global i32 0, comdat $v
+; CHECK: 'common' global may not be in a Comdat!
diff --git a/test/Verifier/comdat2.ll b/test/Verifier/comdat2.ll
new file mode 100644
index 0000000..23b6cee
--- /dev/null
+++ b/test/Verifier/comdat2.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+$v = comdat any
+@v = private global i32 0, comdat $v
+; CHECK: comdat global value has local linkage
diff --git a/test/Verifier/jumptable.ll b/test/Verifier/jumptable.ll
new file mode 100644
index 0000000..5f4cd3f
--- /dev/null
+++ b/test/Verifier/jumptable.ll
@@ -0,0 +1,9 @@
+; RUN: not llc <%s 2>&1 | FileCheck %s
+
+define i32 @f() jumptable {
+  ret i32 0
+}
+
+; CHECK: Attribute 'jumptable' requires 'unnamed_addr'
+; CHECK: i32 ()* @f
+; CHECK: LLVM ERROR: Broken function found, compilation aborted!
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
index b6a75d1..f15ca3f 100644
--- a/test/Verifier/range-1.ll
+++ b/test/Verifier/range-1.ll
@@ -6,7 +6,7 @@ entry:
   ret void
 }
 !0 = metadata !{i8 0, i8 1}
-; CHECK: Ranges are only for loads!
+; CHECK: Ranges are only for loads, calls and invokes!
 ; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
 
 define i8 @f2(i8* %x) {
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
index 8d85d19..1d2e057 100644
--- a/test/Verifier/range-2.ll
+++ b/test/Verifier/range-2.ll
@@ -34,3 +34,33 @@ entry:
   ret i8 %y
 }
 !4 = metadata !{i8 -1, i8 0, i8 1, i8 -2}
+
+; We can annotate the range of the return value of a CALL.
+define void @call_all(i8* %x) {
+entry:
+  %v1 = call i8 @f1(i8* %x), !range !0
+  %v2 = call i8 @f2(i8* %x), !range !1
+  %v3 = call i8 @f3(i8* %x), !range !2
+  %v4 = call i8 @f4(i8* %x), !range !3
+  %v5 = call i8 @f5(i8* %x), !range !4
+  ret void
+}
+
+; We can annotate the range of the return value of an INVOKE.
+define void @invoke_all(i8* %x) {
+entry:
+  %v1 = invoke i8 @f1(i8* %x) to label %cont unwind label %lpad, !range !0
+  %v2 = invoke i8 @f2(i8* %x) to label %cont unwind label %lpad, !range !1
+  %v3 = invoke i8 @f3(i8* %x) to label %cont unwind label %lpad, !range !2
+  %v4 = invoke i8 @f4(i8* %x) to label %cont unwind label %lpad, !range !3
+  %v5 = invoke i8 @f5(i8* %x) to label %cont unwind label %lpad, !range !4
+
+cont:
+  ret void
+
+lpad:
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/lit.cfg b/test/lit.cfg
index 2815a61..664d55f 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -243,7 +243,6 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bmacho-dump\b",
                 NOJUNK + r"\bopt\b",
                 r"\bFileCheck\b",
-                r"\bFileUpdate\b",
                 r"\bobj2yaml\b",
                 r"\byaml2obj\b",
                 # Handle these specially as they are strings searched
@@ -265,6 +264,10 @@ for pattern in [r"\bbugpoint\b(?!-)",
         tool_path = llvm_tools_dir + '/' + tool_name
     config.substitutions.append((pattern, tool_pipe + tool_path))
 
+### Targets
+
+config.targets = frozenset(config.targets_to_build.split())
+
 ### Features
 
 # Shell execution
@@ -332,6 +335,10 @@ if 'darwin' == sys.platform:
         config.available_features.add('fma3')
     sysctl_cmd.wait()
 
+# .debug_frame is not emitted for targeting Windows x64.
+if not re.match(r'^x86_64.*-(mingw32|win32)', config.target_triple):
+    config.available_features.add('debug_frame')
+
 # Check if we should use gmalloc.
 use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
 if use_gmalloc_str is not None:
diff --git a/test/tools/llvm-cov/copy_block_helper.m b/test/tools/llvm-cov/copy_block_helper.m
index 1859b88..64973f1 100644
--- a/test/tools/llvm-cov/copy_block_helper.m
+++ b/test/tools/llvm-cov/copy_block_helper.m
@@ -29,4 +29,4 @@ void test(id x) { // GCOV: -:    [[@LINE]]:void test
 int main(int argc, const char *argv[]) { test(0); }
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
index 2345f8d..0d3eb6b 100644
--- a/test/tools/llvm-cov/llvm-cov.test
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -102,12 +102,12 @@ RUN: diff -aub test_no_gcda.cpp.gcov test.cpp.gcov
 RUN: diff -aub test_no_gcda.h.gcov test.h.gcov
 
 # Invalid gcno file.
-RUN: not llvm-cov test.c -gcno=test_read_fail.gcno
+RUN: llvm-cov test.c -gcno=test_read_fail.gcno
 
 # Bad file checksum on gcda.
-RUN: not llvm-cov test.c -gcda=test_file_checksum_fail.gcda
+RUN: llvm-cov test.c -gcda=test_file_checksum_fail.gcda
 
 # Bad function checksum on gcda
-RUN: not llvm-cov test.c -gcda=test_func_checksum_fail.gcda
+RUN: llvm-cov test.c -gcda=test_func_checksum_fail.gcda
 
-XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/range_based_for.cpp b/test/tools/llvm-cov/range_based_for.cpp
index 61f60f6..3fdb244 100644
--- a/test/tools/llvm-cov/range_based_for.cpp
+++ b/test/tools/llvm-cov/range_based_for.cpp
@@ -26,4 +26,4 @@ int main(int argc, const char *argv[]) { // GCOV: 1:    [[@LINE]]:int main(
 }                                        // GCOV: -:    [[@LINE]]:}
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-objdump/lit.local.cfg b/test/tools/llvm-objdump/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/tools/llvm-objdump/lit.local.cfg
+++ b/test/tools/llvm-objdump/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/tools/llvm-readobj/ARM/lit.local.cfg b/test/tools/llvm-readobj/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/tools/llvm-readobj/ARM/lit.local.cfg
+++ b/test/tools/llvm-readobj/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/tools/llvm-readobj/Inputs/got-empty.exe.mipsel b/test/tools/llvm-readobj/Inputs/got-empty.exe.mipsel
new file mode 100755
index 0000000..b578745
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/got-empty.exe.mipsel
diff --git a/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el b/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el
new file mode 100755
index 0000000..3afc567
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el
diff --git a/test/tools/llvm-readobj/mips-got.test b/test/tools/llvm-readobj/mips-got.test
new file mode 100644
index 0000000..2021587
--- /dev/null
+++ b/test/tools/llvm-readobj/mips-got.test
@@ -0,0 +1,331 @@
+RUN: llvm-readobj -mips-plt-got %p/Inputs/relocs.obj.elf-mips | \
+RUN:   FileCheck %s -check-prefix GOT-OBJ
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-exe.mips | \
+RUN:   FileCheck %s -check-prefix GOT-EXE
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-so.mips | \
+RUN:   FileCheck %s -check-prefix GOT-SO
+RUN: llvm-readobj -mips-plt-got %p/Inputs/got-tls.so.elf-mips64el | \
+RUN:   FileCheck %s -check-prefix GOT-TLS
+RUN: llvm-readobj -mips-plt-got %p/Inputs/got-empty.exe.mipsel | \
+RUN:   FileCheck %s -check-prefix GOT-EMPTY
+
+GOT-OBJ: Cannot find PLTGOT dynamic table tag.
+
+GOT-EXE:      Primary GOT {
+GOT-EXE-NEXT:   Canonical gp value: 0x418880
+GOT-EXE-NEXT:   Reserved entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410890
+GOT-EXE-NEXT:       Access: -32752
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Purpose: Lazy resolver
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410894
+GOT-EXE-NEXT:       Access: -32748
+GOT-EXE-NEXT:       Initial: 0x80000000
+GOT-EXE-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Local entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410898
+GOT-EXE-NEXT:       Access: -32744
+GOT-EXE-NEXT:       Initial: 0x400418
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x41089C
+GOT-EXE-NEXT:       Access: -32740
+GOT-EXE-NEXT:       Initial: 0x410840
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A0
+GOT-EXE-NEXT:       Access: -32736
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Global entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A4
+GOT-EXE-NEXT:       Access: -32732
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Value: 0x0
+GOT-EXE-NEXT:       Type: Function (0x2)
+GOT-EXE-NEXT:       Section: Undefined (0x0)
+GOT-EXE-NEXT:       Name: __gmon_start__@ (1)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-EXE-NEXT: }
+
+GOT-SO:      Primary GOT {
+GOT-SO-NEXT:   Canonical gp value: 0x188D0
+GOT-SO-NEXT:   Reserved entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E0
+GOT-SO-NEXT:       Access: -32752
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Purpose: Lazy resolver
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E4
+GOT-SO-NEXT:       Access: -32748
+GOT-SO-NEXT:       Initial: 0x80000000
+GOT-SO-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Local entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E8
+GOT-SO-NEXT:       Access: -32744
+GOT-SO-NEXT:       Initial: 0x108E0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108EC
+GOT-SO-NEXT:       Access: -32740
+GOT-SO-NEXT:       Initial: 0x10000
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F0
+GOT-SO-NEXT:       Access: -32736
+GOT-SO-NEXT:       Initial: 0x10920
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F4
+GOT-SO-NEXT:       Access: -32732
+GOT-SO-NEXT:       Initial: 0x108CC
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F8
+GOT-SO-NEXT:       Access: -32728
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108FC
+GOT-SO-NEXT:       Access: -32724
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10900
+GOT-SO-NEXT:       Access: -32720
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10904
+GOT-SO-NEXT:       Access: -32716
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Global entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10908
+GOT-SO-NEXT:       Access: -32712
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_registerTMCloneTable@ (87)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1090C
+GOT-SO-NEXT:       Access: -32708
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _Jv_RegisterClasses@ (128)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10910
+GOT-SO-NEXT:       Access: -32704
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __gmon_start__@ (23)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10914
+GOT-SO-NEXT:       Access: -32700
+GOT-SO-NEXT:       Initial: 0x840
+GOT-SO-NEXT:       Value: 0x840
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: puts@GLIBC_2.0 (162)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10918
+GOT-SO-NEXT:       Access: -32696
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_deregisterTMCloneTable@ (59)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1091C
+GOT-SO-NEXT:       Access: -32692
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (113)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-SO-NEXT: }
+
+GOT-TLS:      Primary GOT {
+GOT-TLS-NEXT:   Canonical gp value: 0x18BF0
+GOT-TLS-NEXT:   Reserved entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C00
+GOT-TLS-NEXT:       Access: -32752
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Purpose: Lazy resolver
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C08
+GOT-TLS-NEXT:       Access: -32744
+GOT-TLS-NEXT:       Initial: 0x8000000000000000
+GOT-TLS-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Local entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C10
+GOT-TLS-NEXT:       Access: -32736
+GOT-TLS-NEXT:       Initial: 0x10000
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C18
+GOT-TLS-NEXT:       Access: -32728
+GOT-TLS-NEXT:       Initial: 0x10C00
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C20
+GOT-TLS-NEXT:       Access: -32720
+GOT-TLS-NEXT:       Initial: 0x10CB8
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C28
+GOT-TLS-NEXT:       Access: -32712
+GOT-TLS-NEXT:       Initial: 0x10BF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C30
+GOT-TLS-NEXT:       Access: -32704
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C38
+GOT-TLS-NEXT:       Access: -32696
+GOT-TLS-NEXT:       Initial: 0x948
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C40
+GOT-TLS-NEXT:       Access: -32688
+GOT-TLS-NEXT:       Initial: 0xA20
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C48
+GOT-TLS-NEXT:       Access: -32680
+GOT-TLS-NEXT:       Initial: 0xAF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C50
+GOT-TLS-NEXT:       Access: -32672
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C58
+GOT-TLS-NEXT:       Access: -32664
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C60
+GOT-TLS-NEXT:       Access: -32656
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Global entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C68
+GOT-TLS-NEXT:       Access: -32648
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_registerTMCloneTable@ (78)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C70
+GOT-TLS-NEXT:       Access: -32640
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _Jv_RegisterClasses@ (119)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C78
+GOT-TLS-NEXT:       Access: -32632
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __gmon_start__@ (23)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C80
+GOT-TLS-NEXT:       Access: -32624
+GOT-TLS-NEXT:       Initial: 0xB60
+GOT-TLS-NEXT:       Value: 0xB60
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __tls_get_addr@GLIBC_2.3 (150)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C88
+GOT-TLS-NEXT:       Access: -32616
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_deregisterTMCloneTable@ (50)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C90
+GOT-TLS-NEXT:       Access: -32608
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (104)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Number of TLS and multi-GOT entries: 4
+GOT-TLS-NEXT: }
+
+GOT-EMPTY:      Primary GOT {
+GOT-EMPTY-NEXT:   Canonical gp value: 0x409FF0
+GOT-EMPTY-NEXT:   Reserved entries [
+GOT-EMPTY-NEXT:     Entry {
+GOT-EMPTY-NEXT:       Address: 0x402000
+GOT-EMPTY-NEXT:       Access: -32752
+GOT-EMPTY-NEXT:       Initial: 0x0
+GOT-EMPTY-NEXT:       Purpose: Lazy resolver
+GOT-EMPTY-NEXT:     }
+GOT-EMPTY-NEXT:     Entry {
+GOT-EMPTY-NEXT:       Address: 0x402004
+GOT-EMPTY-NEXT:       Access: -32748
+GOT-EMPTY-NEXT:       Initial: 0x80000000
+GOT-EMPTY-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-EMPTY-NEXT:     }
+GOT-EMPTY-NEXT:   ]
+GOT-EMPTY-NEXT:   Local entries [
+GOT-EMPTY-NEXT:   ]
+GOT-EMPTY-NEXT:   Global entries [
+GOT-EMPTY-NEXT:   ]
+GOT-EMPTY-NEXT:   Number of TLS and multi-GOT entries: 2
+GOT-EMPTY-NEXT: }
diff --git a/test/tools/llvm-readobj/program-headers.test b/test/tools/llvm-readobj/program-headers.test
index 7c22f2b..f014c03 100644
--- a/test/tools/llvm-readobj/program-headers.test
+++ b/test/tools/llvm-readobj/program-headers.test
@@ -4,6 +4,8 @@ RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-x8
 RUN:     | FileCheck %s -check-prefix ELF-X86-64
 RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.mips \
 RUN:     | FileCheck %s -check-prefix ELF-MIPS
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.mips64 \
+RUN:     | FileCheck %s -check-prefix ELF-MIPS64
 
 ELF-I386:      ProgramHeaders [
 ELF-I386-NEXT:   ProgramHeader {
@@ -75,7 +77,11 @@ ELF-X86-64-NEXT:     Alignment: 8
 ELF-X86-64-NEXT:   }
 ELF-X86-64-NEXT: ]
 
-ELF-MIPS:      ProgramHeaders [
+ELF-MIPS:      Format: ELF32-mips
+ELF-MIPS-NEXT: Arch: mips
+ELF-MIPS-NEXT: AddressSize: 32bit
+ELF-MIPS-NEXT: LoadName:
+ELF-MIPS-NEXT: ProgramHeaders [
 ELF-MIPS-NEXT:   ProgramHeader {
 ELF-MIPS-NEXT:     Type: PT_MIPS_REGINFO (0x70000000)
 ELF-MIPS-NEXT:     Offset: 0x74
@@ -102,3 +108,23 @@ ELF-MIPS-NEXT:     ]
 ELF-MIPS-NEXT:     Alignment: 65536
 ELF-MIPS-NEXT:   }
 ELF-MIPS-NEXT: ]
+
+ELF-MIPS64:      Format: ELF64-mips
+ELF-MIPS64-NEXT: Arch: mips64
+ELF-MIPS64-NEXT: AddressSize: 64bit
+ELF-MIPS64-NEXT: LoadName:
+ELF-MIPS64-NEXT: ProgramHeaders [
+ELF-MIPS64-NEXT:   ProgramHeader {
+ELF-MIPS64-NEXT:     Type: PT_LOAD (0x1)
+ELF-MIPS64-NEXT:     Offset: 0x0
+ELF-MIPS64-NEXT:     VirtualAddress: 0x120000000
+ELF-MIPS64-NEXT:     PhysicalAddress: 0x120000000
+ELF-MIPS64-NEXT:     FileSize: 136
+ELF-MIPS64-NEXT:     MemSize: 136
+ELF-MIPS64-NEXT:     Flags [ (0x5)
+ELF-MIPS64-NEXT:       PF_R (0x4)
+ELF-MIPS64-NEXT:       PF_X (0x1)
+ELF-MIPS64-NEXT:     ]
+ELF-MIPS64-NEXT:     Alignment: 65536
+ELF-MIPS64-NEXT:   }
+ELF-MIPS64-NEXT: ]
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
index 3a87ff5..864ded3 100644
--- a/test/tools/llvm-readobj/relocations.test
+++ b/test/tools/llvm-readobj/relocations.test
@@ -34,8 +34,8 @@ MACHO-I386:      Relocations [
 MACHO-I386-NEXT:   Section __text {
 MACHO-I386-NEXT:     0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
 MACHO-I386-NEXT:     0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
-MACHO-I386-NEXT:     0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 -
-MACHO-I386-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 -
+MACHO-I386-NEXT:     0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x22
+MACHO-I386-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x8
 MACHO-I386-NEXT:   }
 MACHO-I386-NEXT: ]
 
@@ -49,17 +49,17 @@ MACHO-X86-64-NEXT:]
 
 MACHO-PPC: Relocations [
 MACHO-PPC-NEXT:   Section __text {
-MACHO-PPC-NEXT:     0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
-MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:     0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
-MACHO-PPC-NEXT:     0x58 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:     0x18 1 2 0 PPC_RELOC_BR24 0 -
+MACHO-PPC-NEXT:     0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x64
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 0xC
+MACHO-PPC-NEXT:     0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x64
+MACHO-PPC-NEXT:     0x58 0 2 n/a PPC_RELOC_PAIR 1 0xC
+MACHO-PPC-NEXT:     0x18 1 2 0 PPC_RELOC_BR24 0 0x2
 MACHO-PPC-NEXT:   }
 MACHO-PPC-NEXT:   Section __picsymbolstub1 {
-MACHO-PPC-NEXT:     0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
-MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:     0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
-MACHO-PPC-NEXT:     0x20 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:     0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x68
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 0x48
+MACHO-PPC-NEXT:     0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x68
+MACHO-PPC-NEXT:     0x20 0 2 n/a PPC_RELOC_PAIR 1 0x48
 MACHO-PPC-NEXT:   }
 MACHO-PPC-NEXT:   Section __la_symbol_ptr {
 MACHO-PPC-NEXT:     0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
@@ -68,17 +68,17 @@ MACHO-PPC-NEXT: ]
 
 MACHO-PPC64: Relocations [
 MACHO-PPC64-NEXT:   Section __text {
-MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x1C 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x58 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x18 1 2 0 0 -
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 0x64
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 0xC
+MACHO-PPC64-NEXT:     0x1C 0 2 n/a 1 0x64
+MACHO-PPC64-NEXT:     0x58 0 2 n/a 1 0xC
+MACHO-PPC64-NEXT:     0x18 1 2 0 0 0x2
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section __picsymbolstub1 {
-MACHO-PPC64-NEXT:     0x14 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0xC 0 2 n/a 1 -
-MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x14 0 2 n/a 1 0x6C
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 0x48
+MACHO-PPC64-NEXT:     0xC 0 2 n/a 1 0x6C
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 0x48
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section __la_symbol_ptr {
 MACHO-PPC64-NEXT:     0x0 0 3 1 0 dyld_stub_binding_helper
@@ -94,7 +94,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 2
 MACHO-ARM-NEXT:        Extern: N/A
 MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0x40
 MACHO-ARM-NEXT:        Scattered: 1
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:      Relocation {
@@ -103,7 +103,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 2
 MACHO-ARM-NEXT:        Extern: N/A
 MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0x28
 MACHO-ARM-NEXT:        Scattered: 1
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:      Relocation {
@@ -130,7 +130,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 1
 MACHO-ARM-NEXT:        Extern: 0
 MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0xFFFFFF
 MACHO-ARM-NEXT:        Scattered: 0
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:      Relocation {
@@ -148,7 +148,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 0
 MACHO-ARM-NEXT:        Extern: 0
 MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0xFFFFFF
 MACHO-ARM-NEXT:        Scattered: 0
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:      Relocation {
@@ -157,7 +157,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 2
 MACHO-ARM-NEXT:        Extern: N/A
 MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0x44
 MACHO-ARM-NEXT:        Scattered: 1
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:      Relocation {
@@ -166,7 +166,7 @@ MACHO-ARM-NEXT:        PCRel: 0
 MACHO-ARM-NEXT:        Length: 2
 MACHO-ARM-NEXT:        Extern: N/A
 MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Symbol: 0x4
 MACHO-ARM-NEXT:        Scattered: 1
 MACHO-ARM-NEXT:      }
 MACHO-ARM-NEXT:    }
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 0f7ce26..972d8e6 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -183,8 +183,8 @@ MACHO-I386-NEXT:     Reserved2: 0x0
 MACHO-I386-NEXT:     Relocations [
 MACHO-I386-NEXT:       0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
 MACHO-I386-NEXT:       0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
-MACHO-I386-NEXT:       0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 -
-MACHO-I386-NEXT:       0x0 0 2 n/a GENERIC_RELOC_PAIR 1 -
+MACHO-I386-NEXT:       0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x22
+MACHO-I386-NEXT:       0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x8
 MACHO-I386-NEXT:     ]
 MACHO-I386-NEXT:     Symbols [
 MACHO-I386-NEXT:       Symbol {
@@ -299,11 +299,11 @@ MACHO-PPC-NEXT:     ]
 MACHO-PPC-NEXT:     Reserved1: 0x0
 MACHO-PPC-NEXT:     Reserved2: 0x0
 MACHO-PPC-NEXT:     Relocations [
-MACHO-PPC-NEXT:       0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
-MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:       0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
-MACHO-PPC-NEXT:       0x58 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:       0x18 1 2 0 PPC_RELOC_BR24 0 -
+MACHO-PPC-NEXT:       0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x64
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 0xC
+MACHO-PPC-NEXT:       0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x64
+MACHO-PPC-NEXT:       0x58 0 2 n/a PPC_RELOC_PAIR 1 0xC
+MACHO-PPC-NEXT:       0x18 1 2 0 PPC_RELOC_BR24 0 0x2
 MACHO-PPC-NEXT:     ]
 MACHO-PPC-NEXT:     Symbols [
 MACHO-PPC-NEXT:       Symbol {
@@ -342,10 +342,10 @@ MACHO-PPC-NEXT:     ]
 MACHO-PPC-NEXT:     Reserved1: 0x0
 MACHO-PPC-NEXT:     Reserved2: 0x20
 MACHO-PPC-NEXT:     Relocations [
-MACHO-PPC-NEXT:       0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
-MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 -
-MACHO-PPC-NEXT:       0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
-MACHO-PPC-NEXT:       0x20 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:       0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x68
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 0x48
+MACHO-PPC-NEXT:       0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x68
+MACHO-PPC-NEXT:       0x20 0 2 n/a PPC_RELOC_PAIR 1 0x48
 MACHO-PPC-NEXT:     ]
 MACHO-PPC-NEXT:     Symbols [
 MACHO-PPC-NEXT:     ]
@@ -456,11 +456,11 @@ MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x0
 MACHO-PPC64-NEXT:     Relocations [
-MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x1C 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x58 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x18 1 2 0 0 -
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 0x64
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 0xC
+MACHO-PPC64-NEXT:       0x1C 0 2 n/a 1 0x64
+MACHO-PPC64-NEXT:       0x58 0 2 n/a 1 0xC
+MACHO-PPC64-NEXT:       0x18 1 2 0 0 0x2
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Symbols [
 MACHO-PPC64-NEXT:       Symbol {
@@ -499,10 +499,10 @@ MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x20
 MACHO-PPC64-NEXT:     Relocations [
-MACHO-PPC64-NEXT:       0x14 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0xC 0 2 n/a 1 -
-MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x14 0 2 n/a 1 0x6C
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 0x48
+MACHO-PPC64-NEXT:       0xC 0 2 n/a 1 0x6C
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 0x48
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Symbols [
 MACHO-PPC64-NEXT:     ]
@@ -618,7 +618,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 2
 MACHO-ARM-NEXT:         Extern: N/A
 MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0x40
 MACHO-ARM-NEXT:         Scattered: 1
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:       Relocation {
@@ -627,7 +627,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 2
 MACHO-ARM-NEXT:         Extern: N/A
 MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0x28
 MACHO-ARM-NEXT:         Scattered: 1
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:       Relocation {
@@ -654,7 +654,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 1
 MACHO-ARM-NEXT:         Extern: 0
 MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0xFFFFFF
 MACHO-ARM-NEXT:         Scattered: 0
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:       Relocation {
@@ -672,7 +672,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 0
 MACHO-ARM-NEXT:         Extern: 0
 MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0xFFFFFF
 MACHO-ARM-NEXT:         Scattered: 0
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:       Relocation {
@@ -681,7 +681,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 2
 MACHO-ARM-NEXT:         Extern: N/A
 MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0x44
 MACHO-ARM-NEXT:         Scattered: 1
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:       Relocation {
@@ -690,7 +690,7 @@ MACHO-ARM-NEXT:         PCRel: 0
 MACHO-ARM-NEXT:         Length: 2
 MACHO-ARM-NEXT:         Extern: N/A
 MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
-MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Symbol: 0x4
 MACHO-ARM-NEXT:         Scattered: 1
 MACHO-ARM-NEXT:       }
 MACHO-ARM-NEXT:     ]
author	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
committer	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
commit	c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree	81b7dd2bb4370a392f31d332a566c903b5744764 /test
parent	19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download	external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2