Update LLVM for rebase to r212749.

Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
author: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
committer: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
commit: c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree: 81b7dd2bb4370a392f31d332a566c903b5744764 /test/Transforms
parent: 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download: external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
148 files changed, 5538 insertions, 775 deletions
diff --git a/test/Transforms/ArgumentPromotion/basictest.ll b/test/Transforms/ArgumentPromotion/basictest.ll
index d3d21fc..8f78b98 100644
--- a/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,23 +1,29 @@
-; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | not grep alloca
+; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
 define internal i32 @test(i32* %X, i32* %Y) {
-        %A = load i32* %X               ; <i32> [#uses=1]
-        %B = load i32* %Y               ; <i32> [#uses=1]
-        %C = add i32 %A, %B             ; <i32> [#uses=1]
-        ret i32 %C
+; CHECK-LABEL: define internal i32 @test(i32 %X.val, i32 %Y.val)
+  %A = load i32* %X
+  %B = load i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
 }
 
 define internal i32 @caller(i32* %B) {
-        %A = alloca i32         ; <i32*> [#uses=2]
-        store i32 1, i32* %A
-        %C = call i32 @test( i32* %A, i32* %B )         ; <i32> [#uses=1]
-        ret i32 %C
+; CHECK-LABEL: define internal i32 @caller(i32 %B.val1)
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @test(i32* %A, i32* %B)
+; CHECK: call i32 @test(i32 1, i32 %B.val1)
+  ret i32 %C
 }
 
 define i32 @callercaller() {
-        %B = alloca i32         ; <i32*> [#uses=2]
-        store i32 2, i32* %B
-        %X = call i32 @caller( i32* %B )                ; <i32> [#uses=1]
-        ret i32 %X
+; CHECK-LABEL: define i32 @callercaller()
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+; CHECK: call i32 @caller(i32 2)
+  ret i32 %X
 }
 
diff --git a/test/Transforms/ArgumentPromotion/byval-2.ll b/test/Transforms/ArgumentPromotion/byval-2.ll
index 368c689..b412f5e 100644
--- a/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -1,26 +1,31 @@
-; RUN: opt < %s -argpromotion -S | grep -F "i32* byval" | count 2
-; Argpromote + scalarrepl should change this to passing the two integers by value.
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
-	%struct.ss = type { i32, i64 }
+; Arg promotion eliminates the struct argument.
+; FIXME: Should it eliminate the i32* argument?
+
+%struct.ss = type { i32, i64 }
 
 define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1, i32* byval %X)
 entry:
-	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0
-	%tmp1 = load i32* %tmp, align 4
-	%tmp2 = add i32 %tmp1, 1	
-	store i32 %tmp2, i32* %tmp, align 4
+  %tmp = getelementptr %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
 
-	store i32 0, i32* %X
-	ret void
+  store i32 0, i32* %X
+  ret void
 }
 
 define i32 @test(i32* %X) {
+; CHECK-LABEL: define i32 @test
 entry:
-	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
-	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp1, align 8
-	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
-	store i64 2, i64* %tmp4, align 4
-	call void @f( %struct.ss* byval %S, i32* byval %X) 
-	ret i32 0
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval %S, i32* byval %X)
+; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}}, i32* byval %{{.*}})
+  ret i32 0
 }
diff --git a/test/Transforms/ArgumentPromotion/byval.ll b/test/Transforms/ArgumentPromotion/byval.ll
index 44b26fc..27305e9 100644
--- a/test/Transforms/ArgumentPromotion/byval.ll
+++ b/test/Transforms/ArgumentPromotion/byval.ll
@@ -1,25 +1,28 @@
-; RUN: opt < %s -argpromotion -scalarrepl -S | not grep load
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-; Argpromote + scalarrepl should change this to passing the two integers by value.
 
-	%struct.ss = type { i32, i64 }
+%struct.ss = type { i32, i64 }
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
+; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1)
 entry:
-	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
-	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
-	store i32 %tmp2, i32* %tmp, align 4
-	ret void
+  %tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+  %tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+  %tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
 }
 
 define i32 @main() nounwind  {
+; CHECK-LABEL: define i32 @main
 entry:
-	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
-	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp1, align 8
-	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
-	store i64 2, i64* %tmp4, align 4
-	call void @f( %struct.ss* byval  %S ) nounwind 
-	ret i32 0
+  %S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+  %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval  %S ) nounwind 
+; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}})
+  ret i32 0
 }
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
new file mode 100644
index 0000000..70503af
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+; CHECK: call void @test(), !dbg [[DBG_LOC:![0-9]]]
+; CHECK: [[TEST_FN:.*]] = {{.*}} void ()* @test
+; CHECK: [[DBG_LOC]] = metadata !{i32 8, i32 0, metadata [[TEST_FN]], null}
+
+define internal void @test(i32* %X) {
+  ret void
+}
+
+define void @caller() {
+  call void @test(i32* null), !dbg !1
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!1 = metadata !{i32 8, i32 0, metadata !2, null}
+!2 = metadata !{i32 786478, null, null, metadata !"test", metadata !"test", metadata !"", i32 3, null, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @test, null, null, null, i32 3}
+!3 = metadata !{i32 786449, null, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, null, null, metadata !4, null, null, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
+!4 = metadata !{metadata !2}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
index ac9fc1f..6a93016 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
@@ -80,8 +80,8 @@ define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
-; CHECK: [[NEWVAL_TMP:%.*]] = xor i16 %nandend, -1
-; CHECK: [[NEWVAL:%.*]] = and i16 [[OLDVAL]], [[NEWVAL_TMP]]
+; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
+; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
@@ -229,22 +229,28 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence seq_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[OLDVAL]]
 
-  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
 }
 
@@ -257,22 +263,28 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence seq_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[OLDVAL]]
 
-  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
 }
 
@@ -284,21 +296,27 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK: fence acquire
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK: fence acquire
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[OLDVAL]]
 
-  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
 }
 
@@ -317,7 +335,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -325,16 +343,22 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[OLDVAL]]
 
-  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
 }
 \ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
index bec5bef..8092c10 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
@@ -91,22 +91,28 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[OLDVAL]]
 
-  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
 }
 
@@ -119,22 +125,28 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
+; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[OLDVAL]]
 
-  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
 }
 
@@ -146,21 +158,27 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[OLDVAL]]
 
-  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
 }
 
@@ -179,7 +197,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -187,16 +205,22 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
-; CHECK: [[BARRIER]]:
-; CHECK-NOT: fence
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
 ; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[OLDVAL]]
 
-  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
 }
 \ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
new file mode 100644
index 0000000..07a4a7f
--- /dev/null
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
@@ -0,0 +1,97 @@
+; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+
+define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_seq_cst
+; CHECK:     fence release
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
+
+define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_weak_fail
+; CHECK:     fence release
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     fence seq_cst
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i1 [[SUCCESS]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 1
+  ret i1 %oldval
+}
+
+define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_monotonic
+; CHECK-NOT: fence
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
+++ b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
index c420349..7184443 100644
--- a/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/X86/large-immediate.ll b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
index e0af9c9..b8c04f3 100644
--- a/test/Transforms/ConstantHoisting/X86/large-immediate.ll
+++ b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
@@ -25,3 +25,12 @@ define i196 @test3(i196 %a) nounwind {
   %2 = mul i196 %1, 2
   ret i196 %2
 }
+
+; Check that we don't hoist immediates with small values.
+define i96 @test4(i96 %a) nounwind {
+; CHECK-LABEL: test4
+; CHECK-NOT: %const = bitcast i96 2 to i96
+  %1 = mul i96 %a, 2
+  %2 = add i96 %1, 2
+  ret i96 %2
+}
diff --git a/test/Transforms/ConstantHoisting/X86/lit.local.cfg b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/ConstantHoisting/X86/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll
index cef022e..9df4417 100644
--- a/test/Transforms/ConstantHoisting/X86/stackmap.ll
+++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll
@@ -6,11 +6,11 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Test if the 3rd argument of a stackmap is hoisted.
 define i128 @test1(i128 %a) {
 ; CHECK-LABEL:  @test1
-; CHECK:        %const = bitcast i128 13464618275673403322 to i128
+; CHECK:        %const = bitcast i128 134646182756734033220 to i128
 ; CHECK:        tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
 entry:
-  %0 = add i128 %a, 13464618275673403322
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 13464618275673403322)
+  %0 = add i128 %a, 134646182756734033220
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220)
   ret i128 %0
 }
 
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index d2460c0..d3842c8 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -68,7 +68,7 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
 	ret i1* %lookup
 }
 
-; CHECK: define i1 @c7(i32* readnone %q, i32 %bitno)
+; CHECK: define i1 @c7(i32* readonly %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
 	%val = load i1* %ptr
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 7ae38bb..b4e904c 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -51,3 +51,17 @@ define void @test6_2(i8** %p, i8* %q) {
 define void @test7_1(i32* inalloca %a) {
   ret void
 }
+
+; CHECK: define i32* @test8_1(i32* readnone %p)
+define i32* @test8_1(i32* %p) {
+entry:
+  ret i32* %p
+}
+
+; CHECK: define void @test8_2(i32* %p)
+define void @test8_2(i32* %p) {
+entry:
+  %call = call i32* @test8_1(i32* %p)
+  store i32 10, i32* %call, align 4
+  ret void
+}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
new file mode 100644
index 0000000..722a096
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -0,0 +1,58 @@
+; RUN: echo '!16 = metadata !{metadata !"%T/global-ctor.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
+; RUN: rm %T/global-ctor.gcno
+
+; REQUIRES: shell
+
+@x = global i32 0, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
+
+; Function Attrs: nounwind
+define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  %call = call i32 @_Z1fv(), !dbg !13
+  store i32 %call, i32* @x, align 4, !dbg !13
+  ret void, !dbg !13
+}
+
+declare i32 @_Z1fv() #1
+
+; Function Attrs: nounwind
+define internal void @_GLOBAL__sub_I_global-ctor.ll() #0 section ".text.startup" {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  call void @__cxx_global_var_init(), !dbg !14
+  ret void, !dbg !14
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.gcov = !{!16}
+!llvm.ident = !{!12}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 210217)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"/home/nlewycky"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
+!5 = metadata !{metadata !"global-ctor.ll", metadata !"/home/nlewycky"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"", metadata !"", metadata !"_GLOBAL__sub_I_global-ctor.ll", i32 0, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, metadata !2, i32 0} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!9 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!11 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{metadata !"clang version 3.5.0 (trunk 210217)"}
+!13 = metadata !{i32 2, i32 0, metadata !4, null}
+!14 = metadata !{i32 0, i32 0, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !5, metadata !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
new file mode 100644
index 0000000..e2f8324
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -0,0 +1,143 @@
+; RUN: sed -e 's@PATTERN@\%T@g' < %s > %t1
+; RUN: opt -insert-gcov-profiling -disable-output < %t1
+; RUN: rm %T/linezero.gcno %t1
+; REQUIRES: shell
+
+; This is a crash test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.vector = type { i8 }
+
+; Function Attrs: nounwind
+define i32 @_Z4testv() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %__range = alloca %struct.vector*, align 8
+  %ref.tmp = alloca %struct.vector, align 1
+  %undef.agg.tmp = alloca %struct.vector, align 1
+  %__begin = alloca i8*, align 8
+  %__end = alloca i8*, align 8
+  %spec = alloca i8, align 1
+  call void @llvm.dbg.declare(metadata !{%struct.vector** %__range}, metadata !27), !dbg !30
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  call void @_Z13TagFieldSpecsv(), !dbg !31
+  store %struct.vector* %ref.tmp, %struct.vector** %__range, align 8, !dbg !31
+  call void @llvm.dbg.declare(metadata !{i8** %__begin}, metadata !32), !dbg !30
+  %1 = load %struct.vector** %__range, align 8, !dbg !31
+  %call = call i8* @_ZN6vector5beginEv(%struct.vector* %1), !dbg !31
+  store i8* %call, i8** %__begin, align 8, !dbg !31
+  call void @llvm.dbg.declare(metadata !{i8** %__end}, metadata !33), !dbg !30
+  %2 = load %struct.vector** %__range, align 8, !dbg !31
+  %call1 = call i8* @_ZN6vector3endEv(%struct.vector* %2), !dbg !31
+  store i8* %call1, i8** %__end, align 8, !dbg !31
+  br label %for.cond, !dbg !31
+
+for.cond:                                         ; preds = %for.inc, %0
+  %3 = load i8** %__begin, align 8, !dbg !34
+  %4 = load i8** %__end, align 8, !dbg !34
+  %cmp = icmp ne i8* %3, %4, !dbg !34
+  br i1 %cmp, label %for.body, label %for.end, !dbg !34
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata !{i8* %spec}, metadata !37), !dbg !31
+  %5 = load i8** %__begin, align 8, !dbg !38
+  %6 = load i8* %5, align 1, !dbg !38
+  store i8 %6, i8* %spec, align 1, !dbg !38
+  br label %for.inc, !dbg !38
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i8** %__begin, align 8, !dbg !40
+  %incdec.ptr = getelementptr inbounds i8* %7, i32 1, !dbg !40
+  store i8* %incdec.ptr, i8** %__begin, align 8, !dbg !40
+  br label %for.cond, !dbg !40
+
+for.end:                                          ; preds = %for.cond
+  call void @llvm.trap(), !dbg !42
+  unreachable, !dbg !42
+
+return:                                           ; No predecessors!
+  %8 = load i32* %retval, !dbg !44
+  ret i32 %8, !dbg !44
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @_Z13TagFieldSpecsv() #2
+
+declare i8* @_ZN6vector5beginEv(%struct.vector*) #2
+
+declare i8* @_ZN6vector3endEv(%struct.vector*) #2
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap() #3
+
+; Function Attrs: nounwind
+define void @_Z2f1v() #0 {
+entry:
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  ret void, !dbg !45
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { noreturn nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.gcov = !{!25}
+!llvm.ident = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 209871)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<stdin>", metadata !"PATTERN"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"vector", i32 21, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !"linezero.cc", metadata !"PATTERN"}
+!6 = metadata !{metadata !7, metadata !13}
+!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"begin", metadata !"begin", metadata !"_ZN6vector5beginEv", i32 25, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 25} ; [ DW_TAG_subprogram ] [line 25] [begin]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{metadata !10, metadata !12}
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
+!13 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"end", metadata !"end", metadata !"_ZN6vector3endEv", i32 26, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 26} ; [ DW_TAG_subprogram ] [line 26] [end]
+!14 = metadata !{metadata !15, metadata !20}
+!15 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 50, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4testv, null, null, metadata !2, i32 50} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
+!16 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 54, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z2f1v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
+!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{null}
+!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!24 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!25 = metadata !{metadata !"PATTERN/linezero.o", metadata !0}
+!26 = metadata !{metadata !"clang version 3.5.0 (trunk 209871)"}
+!27 = metadata !{i32 786688, metadata !28, metadata !"__range", null, i32 0, metadata !29, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__range] [line 0]
+!28 = metadata !{i32 786443, metadata !5, metadata !15, i32 51, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!29 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
+!30 = metadata !{i32 0, i32 0, metadata !28, null}
+!31 = metadata !{i32 51, i32 0, metadata !28, null}
+!32 = metadata !{i32 786688, metadata !28, metadata !"__begin", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
+!33 = metadata !{i32 786688, metadata !28, metadata !"__end", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__end] [line 0]
+!34 = metadata !{i32 51, i32 0, metadata !35, null}
+!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 51, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!36 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!37 = metadata !{i32 786688, metadata !28, metadata !"spec", metadata !16, i32 51, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [spec] [line 51]
+!38 = metadata !{i32 51, i32 0, metadata !39, null}
+!39 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!40 = metadata !{i32 51, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!42 = metadata !{i32 51, i32 0, metadata !43, null}
+!43 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!44 = metadata !{i32 52, i32 0, metadata !15, null}
+!45 = metadata !{i32 54, i32 0, metadata !20, null}
diff --git a/test/Transforms/GVN/calloc-load-removal.ll b/test/Transforms/GVN/calloc-load-removal.ll
new file mode 100644
index 0000000..2dde5b7
--- /dev/null
+++ b/test/Transforms/GVN/calloc-load-removal.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS
+; Check that loads from calloc are recognized as being zero.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1() {
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  ; This load is trivially constant zero
+  %3 = load i32* %2, align 4
+  ret i32 %3
+
+; CHECK-LABEL: @test1(
+; CHECK-NOT: %3 = load i32* %2, align 4
+; CHECK: ret i32 0
+
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: ret i32 %
+
+}
+
+declare noalias i8* @calloc(i64, i64)
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
new file mode 100644
index 0000000..80e2226
--- /dev/null
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -0,0 +1,31 @@
+; Test if the !invariant.load metadata is maintained by GVN.
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+define i32 @test1(i32* nocapture %p, i8* nocapture %q) {
+; CHECK-LABEL: test1
+; CHECK: %x = load i32* %p, align 4, !invariant.load !0
+; CHECK-NOT: %y = load
+entry:
+  %x = load i32* %p, align 4, !invariant.load !0
+  %conv = trunc i32 %x to i8
+  store i8 %conv, i8* %q, align 1
+  %y = load i32* %p, align 4, !invariant.load !0
+  %add = add i32 %y, 1
+  ret i32 %add
+}
+
+define i32 @test2(i32* nocapture %p, i8* nocapture %q) {
+; CHECK-LABEL: test2
+; CHECK-NOT: !invariant.load
+; CHECK-NOT: %y = load
+entry:
+  %x = load i32* %p, align 4
+  %conv = trunc i32 %x to i8
+  store i8 %conv, i8* %q, align 1
+  %y = load i32* %p, align 4, !invariant.load !0
+  %add = add i32 %y, 1
+  ret i32 %add
+}
+
+!0 = metadata !{ }
+
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 4b96799..0bdced5 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -11,8 +11,8 @@
 @L1 = alias i32* @A
 ; CHECK: @L1 = alias i32* @A
 
-@L2 = alias internal i32* @A
-; DEAD-NOT: @L2
+@L2 = alias internal i32* @L1
+; CHECK: @L2 = alias internal i32* @L1
 
-@L3 = alias i32* @A
-; CHECK: @L3 = alias i32* @A
+@L3 = alias i32* @L2
+; CHECK: @L3 = alias i32* @L2
diff --git a/test/Transforms/GlobalMerge/AArch64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll
deleted file mode 100644
index eea474a..0000000
--- a/test/Transforms/GlobalMerge/AArch64/arm64.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
-
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
-
-@bar = internal global [5 x i32] zeroinitializer, align 4
-@baz = internal global [5 x i32] zeroinitializer, align 4
-@foo = internal global [5 x i32] zeroinitializer, align 4
-
-; Function Attrs: nounwind ssp
-define internal void @initialize() #0 {
-  %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
-  %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
-  %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
-  %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
-  %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
-  %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
-  %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
-  %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
-  %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
-  %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
-  store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
-  ret void
-}
-
-declare i32 @calc(...)
-
-; Function Attrs: nounwind ssp
-define internal void @calculate() #0 {
-  %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
-  %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
-  %3 = mul nsw i32 %2, %1
-  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
-  %6 = mul nsw i32 %5, %4
-  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
-  %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
-  %9 = mul nsw i32 %8, %7
-  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
-  %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
-  %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
-  %12 = mul nsw i32 %11, %10
-  store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
-  %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
-  %15 = mul nsw i32 %14, %13
-  store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
-  ret void
-}
-
-; Function Attrs: nounwind readnone ssp
-define internal i32* @returnFoo() #1 {
-  ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
-}
-
-attributes #0 = { nounwind ssp }
-attributes #1 = { nounwind readnone ssp }
-attributes #2 = { nounwind }
diff --git a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
deleted file mode 100644
index 9a66a00..0000000
--- a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll
deleted file mode 100644
index 8c77de6..0000000
--- a/test/Transforms/GlobalMerge/ARM/arm.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
-
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-target triple = "thumbv7-apple-ios3.0.0"
-
-@bar = internal global [5 x i32] zeroinitializer, align 4
-@baz = internal global [5 x i32] zeroinitializer, align 4
-@foo = internal global [5 x i32] zeroinitializer, align 4
-
-; Function Attrs: nounwind ssp
-define internal void @initialize() #0 {
-  %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
-  %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
-  %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
-  %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
-  %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
-  %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
-  %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
-  %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
-  %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
-  %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
-  store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
-  ret void
-}
-
-declare i32 @calc(...) #1
-
-; Function Attrs: nounwind ssp
-define internal void @calculate() #0 {
-  %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
-  %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
-  %3 = mul <4 x i32> %2, %1
-  store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
-  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
-  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
-  %6 = mul nsw i32 %5, %4
-  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
-  ret void
-}
-
-; Function Attrs: nounwind readnone ssp
-define internal i32* @returnFoo() #2 {
-  ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
-}
-
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = metadata !{metadata !"LLVM version 3.4 "}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/GlobalMerge/ARM/lit.local.cfg b/test/Transforms/GlobalMerge/ARM/lit.local.cfg
deleted file mode 100644
index 8a3ba96..0000000
--- a/test/Transforms/GlobalMerge/ARM/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index 03d6ee4..d6a565a 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
 
 @g = global i32 0
 
-@a = alias i8, i32* @g
+@a = alias bitcast (i32* @g to i8*)
 
 define void @f() {
 	%tmp = load i8* @a
diff --git a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
index 62f75e1..930a96e 100644
--- a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
+++ b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
@@ -1,11 +1,23 @@
-; RUN: opt < %s -globalopt -S | grep internal | count 2
+; RUN: opt < %s -globalopt -S | FileCheck %s
 
 global i32 0
-define i32* @1() {
+; CHECK-DAG: @0 = internal global i32 0
+
+private global i32 0
+; CHECK-DAG: @1 = private global i32 0
+
+define i32* @2() {
 	ret i32* @0
 }
+; CHECK-DAG: define internal fastcc i32* @2()
+
 define i32* @f() {
 entry:
-	call i32* @1()
+	call i32* @2()
 	ret i32* %0
 }
+
+define i32* @g() {
+entry:
+	ret i32* @1
+}
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index bd07b31..9d70c70 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
-@foo1 = alias void ()* @bar2
+@foo1 = alias void ()* @foo2
 ; CHECK: @foo1 = alias void ()* @bar2
 
-@foo2 = alias void()* @bar2
+@foo2 = alias void()* @bar1
 ; CHECK: @foo2 = alias void ()* @bar2
 
 @bar1  = alias void ()* @bar2
@@ -12,6 +12,10 @@
 @weak1 = alias weak void ()* @bar2
 ; CHECK: @weak1 = alias weak void ()* @bar2
 
+@bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
+@foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+
 define void @bar2() {
   ret void
 }
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index ce6e2c4..4a25d66 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -50,7 +50,41 @@ entry:
   ret void
 }
 
+; PR19955
+
+@dllimportptr = global i32* null, align 4
+; CHECK: @dllimportptr = global i32* null, align 4
+@dllimportvar = external dllimport global i32
+define internal void @test3() {
+entry:
+  store i32* @dllimportvar, i32** @dllimportptr, align 4
+  ret void
+}
+
+@dllexportptr = global i32* null, align 4
+; CHECK: @dllexportptr = global i32* @dllexportvar, align 4
+@dllexportvar = dllexport global i32 0, align 4
+; CHECK: @dllexportvar = dllexport global i32 20, align 4
+define internal void @test4() {
+entry:
+  store i32 20, i32* @dllexportvar, align 4
+  store i32* @dllexportvar, i32** @dllexportptr, align 4
+  ret void
+}
+
+@threadlocalptr = global i32* null, align 4
+; CHECK: @threadlocalptr = global i32* null, align 4
+@threadlocalvar = external thread_local global i32
+define internal void @test5() {
+entry:
+  store i32* @threadlocalvar, i32** @threadlocalptr, align 4
+  ret void
+}
+
 @llvm.global_ctors = appending constant
-  [2 x { i32, void ()* }]
+  [5 x { i32, void ()* }]
   [{ i32, void ()* } { i32 65535, void ()* @test1 },
-   { i32, void ()* } { i32 65535, void ()* @test2 }]
+   { i32, void ()* } { i32 65535, void ()* @test2 },
+   { i32, void ()* } { i32 65535, void ()* @test3 },
+   { i32, void ()* } { i32 65535, void ()* @test4 },
+   { i32, void ()* } { i32 65535, void ()* @test5 }]
diff --git a/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
new file mode 100644
index 0000000..2c738de
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -loop-unswitch -instcombine -indvars < %s | FileCheck %s
+
+; This used to crash in SCEVExpander when there were congruent phis with and
+; undef incoming value from the loop header. The -loop-unswitch -instcombine is
+; necessary to create just this pattern, which is essentially a nop and gets
+; folded away aggressively if spelled out in IR directly.
+; PR 20093
+
+@c = external global i32**, align 8
+
+define void @test1() {
+entry:
+  br i1 undef, label %for.end12, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %0 = load i32*** @c, align 8
+  %1 = load i32** %0, align 8
+  %2 = load i32* %1, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond.backedge, %for.body9.us, %for.cond.preheader
+  %3 = phi i32* [ %1, %for.cond.preheader ], [ %3, %for.cond.backedge ], [ %6, %for.body9.us ]
+  %4 = phi i32 [ %2, %for.cond.preheader ], [ undef, %for.cond.backedge ], [ %7, %for.body9.us ]
+  %i.024 = phi i32 [ 0, %for.cond.preheader ], [ %inc, %for.cond.backedge ], [ 0, %for.body9.us ]
+  %tobool1 = icmp eq i32 %4, 0
+  br i1 %tobool1, label %if.end, label %for.cond.backedge
+
+if.end:                                           ; preds = %for.body
+  %5 = load i32* %3, align 4
+  %tobool4 = icmp eq i32 %5, 0
+  br i1 %tobool4, label %for.cond3, label %for.body9.preheader
+
+for.body9.preheader:                              ; preds = %if.end
+  %tobool8 = icmp eq i32 %i.024, 1
+  br i1 %tobool8, label %for.body9.us, label %for.body9
+
+for.body9.us:                                     ; preds = %for.body9.preheader
+  %6 = load i32** undef, align 8
+  %7 = load i32* %6, align 4
+  br label %for.body
+
+for.cond3:                                        ; preds = %for.cond3, %if.end
+  br label %for.cond3
+
+for.body9:                                        ; preds = %for.body9, %for.body9.preheader
+  br label %for.body9
+
+for.cond.backedge:                                ; preds = %for.body
+  %inc = add nsw i32 %i.024, 1
+  br i1 false, label %for.body, label %for.end12
+
+for.end12:                                        ; preds = %for.cond.backedge, %entry
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: phi
+}
diff --git a/test/Transforms/Inline/blockaddress.ll b/test/Transforms/Inline/blockaddress.ll
index 4206312..8eb3072 100644
--- a/test/Transforms/Inline/blockaddress.ll
+++ b/test/Transforms/Inline/blockaddress.ll
@@ -1,8 +1,9 @@
 ; RUN: opt -inline -S < %s | FileCheck %s
 ; PR10162
 
-; Make sure the blockaddress is mapped correctly when doit is inlined
-; CHECK: store i8* blockaddress(@f, %here.i), i8** @ptr1, align 8
+; Make sure doit is not inlined since the blockaddress is taken
+; which could be unsafe
+; CHECK: store i8* blockaddress(@doit, %here), i8** %pptr, align 8
 
 @i = global i32 1, align 4
 @ptr1 = common global i8* null, align 8
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
new file mode 100644
index 0000000..41d6074
--- /dev/null
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -always-inline -S | FileCheck %s
+
+; Test that the debug location is preserved when rewriting an inlined call as an invoke
+
+; CHECK: invoke void @test()
+; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
+; CHECK: [[EMPTY:.*]] = metadata !{}
+; CHECK: [[INL_LOC]] = metadata !{i32 1, i32 0, metadata [[EMPTY]], metadata [[INL_AT:.*]]}
+; CHECK: [[INL_AT]] = metadata !{i32 2, i32 0, metadata [[EMPTY]], null}
+
+declare void @test()
+declare i32 @__gxx_personality_v0(...)
+
+attributes #0 = { alwaysinline }
+define void @inl() #0 {
+  call void @test(), !dbg !3
+  ret void
+}
+
+define void @caller() {
+  invoke void @inl()
+    to label %cont unwind label %lpad, !dbg !4
+
+cont:
+  ret void
+
+lpad:
+  landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+    cleanup
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!2 = metadata !{}
+!3 = metadata !{i32 1, i32 0, metadata !2, null}
+!4 = metadata !{i32 2, i32 0, metadata !2, null}
diff --git a/test/Transforms/Inline/null-function.ll b/test/Transforms/Inline/null-function.ll
new file mode 100644
index 0000000..2aecfa8
--- /dev/null
+++ b/test/Transforms/Inline/null-function.ll
@@ -0,0 +1,9 @@
+; RUN: opt -print-before=always-inline -always-inline < %s -o /dev/null 2>&1 | FileCheck %s
+
+define i32 @main() #0 {
+entry:
+  ret i32 0
+}
+
+; CHECK: *** IR Dump Before Inliner for always_inline functions ***
+; CHECK: Printing <null> Function
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index b1384ec..e0def99 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -22,11 +22,11 @@ define i1 @PR6486() nounwind {
 define i1 @PR16462_1() nounwind {
 ; CHECK-LABEL: @PR16462_1(
   ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
-; CHECK: ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
+; CHECK: ret i1 false
 }
 
 define i1 @PR16462_2() nounwind {
 ; CHECK-LABEL: @PR16462_2(
   ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 42)
-; CHECK: ret i1 icmp sgt (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16), i16 42)
+; CHECK: ret i1 false
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 4d185bf..ac9c555 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ; CHECK: addrspacecast
 
-@base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
+@base = internal unnamed_addr addrspace(3) global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
new file mode 100644
index 0000000..8f3d429
--- /dev/null
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @oppositesign
+; CHECK: add nsw i16 %a, %b
+define i16 @oppositesign(i16 %x, i16 %y) {
+; %a is negative, %b is positive
+  %a = or i16 %x, 32768
+  %b = and i16 %y, 32767
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+define i16 @zero_sign_bit(i16 %a) {
+; CHECK-LABEL: @zero_sign_bit(
+; CHECK-NEXT: and
+; CHECK-NEXT: add nuw
+; CHECK-NEXT: ret
+  %1 = and i16 %a, 32767
+  %2 = add i16 %1, 512
+  ret i16 %2
+}
+
+define i16 @zero_sign_bit2(i16 %a, i16 %b) {
+; CHECK-LABEL: @zero_sign_bit2(
+; CHECK-NEXT: and
+; CHECK-NEXT: and
+; CHECK-NEXT: add nuw
+; CHECK-NEXT: ret
+  %1 = and i16 %a, 32767
+  %2 = and i16 %b, 32767
+  %3 = add i16 %1, %2
+  ret i16 %3
+}
+
+declare i16 @bounded(i16 %input);
+declare i32 @__gxx_personality_v0(...);
+!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
+!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+
+define i16 @add_bounded_values(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !0
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0
+cont:
+; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow.
+  %e = add i16 %c, %d
+; CHECK: add nuw i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+define i16 @add_bounded_values_2(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values_2(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !1
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1
+cont:
+; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore,
+; %c + %d may unsigned overflow and we cannot add NUW.
+  %e = add i16 %c, %d
+; CHECK: add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+; CHECK-LABEL: @ripple_nsw1
+; CHECK: add nsw i16 %a, %b
+define i16 @ripple_nsw1(i16 %x, i16 %y) {
+; %a has at most one bit set
+  %a = and i16 %y, 1
+
+; %b has a 0 bit other than the sign bit
+  %b = and i16 %x, 49151
+
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+; CHECK-LABEL: @ripple_nsw2
+; CHECK: add nsw i16 %b, %a
+define i16 @ripple_nsw2(i16 %x, i16 %y) {
+  %a = and i16 %y, 1
+  %b = and i16 %x, 49151
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+; CHECK-LABEL: @ripple_no_nsw1
+; CHECK: add i32 %a, %x
+define i32 @ripple_no_nsw1(i32 %x, i32 %y) {
+; We know nothing about %x
+  %a = and i32 %y, 1
+  %b = add i32 %a, %x
+  ret i32 %b
+}
+
+; CHECK-LABEL: @ripple_no_nsw2
+; CHECK: add nuw i16 %a, %b
+define i16 @ripple_no_nsw2(i16 %x, i16 %y) {
+; %a has at most one bit set
+  %a = and i16 %y, 1
+
+; %b has a 0 bit, but it is the sign bit
+  %b = and i16 %x, 32767
+
+  %c = add i16 %a, %b
+  ret i16 %c
+}
diff --git a/test/Transforms/InstCombine/abs_abs.ll b/test/Transforms/InstCombine/abs_abs.ll
new file mode 100644
index 0000000..de10fd1
--- /dev/null
+++ b/test/Transforms/InstCombine/abs_abs.ll
@@ -0,0 +1,961 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @abs_abs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_abs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_abs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_nabs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_nabs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @abs_nabs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @abs_nabs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x01(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x01(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x02(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x02(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x03(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x03(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x04(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x04(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x05(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x05(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x06(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x06(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x07(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x07(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x08(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x08(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x09(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x09(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x10(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x10(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x11(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x11(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x12(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x12(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x13(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x13(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x14(i32 %x) {
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x14(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[NEG]], i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x15(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x15(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 0
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+define i32 @nabs_abs_x16(i32 %x) {
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+; CHECK-LABEL: @nabs_abs_x16(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub nsw i32 0, %x
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 %x, i32 [[NEG]]
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll
index 3edb392..67a990f 100644
--- a/test/Transforms/InstCombine/add-shrink.ll
+++ b/test/Transforms/InstCombine/add-shrink.ll
@@ -1,9 +1,11 @@
-; RUN: opt < %s -instcombine -S | grep "add nsw i32"
-; RUN: opt < %s -instcombine -S | grep sext | count 1
-
-; Should only have one sext and the add should be i32 instead of i64.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; CHECK-LABEL: define i64 @test
 define i64 @test1(i32 %A) {
+; CHECK: %[[ADD:.*]] = add nsw i32 %B, %C
+; CHECK: %F = sext i32 %[[ADD]] to i64
+; CHECK: ret i64 %F
+
 	%B = ashr i32 %A, 7		; <i32> [#uses=1]
 	%C = ashr i32 %A, 9		; <i32> [#uses=1]
 	%D = sext i32 %B to i64		; <i64> [#uses=1]
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 40edf71..3b5485e 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "add nsw i32"
+; RUN: opt < %s -instcombine -S | grep "add nuw nsw i32"
 
 define double @x(i32 %a, i32 %b) nounwind {
   %m = lshr i32 %a, 24
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 67d560e..d7eac4b 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -76,3 +76,240 @@ define <2 x i64> @test8(<2 x i64> %A) {
 ; CHECK-NEXT: %add = sub <2 x i64> <i64 1, i64 2>, %A
 ; CHECK-NEXT: ret <2 x i64> %add
 }
+
+define i16 @test9(i16 %a) {
+       %b = mul i16 %a, 2
+       %c = mul i16 %a, 32767
+       %d = add i16 %b, %c
+       ret i16 %d
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  %d = mul i16 %a, -32767
+; CHECK-NEXT:  ret i16 %d
+}
+
+; y + (~((x >> 3) & 0x55555555) + 1) -> y - ((x >> 3) & 0x55555555)
+define i32 @test10(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x & 0x55555555) + 1) -> y - (x & 0x55555555)
+define i32 @test11(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x & 0x55555555) -> y - (x & 0x55555555)
+define i32 @test12(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x & 0x55555556) + 1) -> y - (x & 0x55555556)
+define i32 @test13(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x & 0x55555556) -> y - (x & 0x55555556)
+define i32 @test14(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x | 0x55555556) + 1) -> y - (x | 0x55555556)
+define i32 @test15(i32 %x, i32 %y) {
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x | 0x55555556) -> y - (x | 0x555555556)
+define i32 @test16(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; y + (~(x | 0x55555555) + 1) -> y - (x | 0x55555555)
+define i32 @test17(i32 %x, i32 %y) {
+  %x.not = and i32 %x, -1431655766
+  %add2 = xor i32 %x.not, -1431655765
+  %add1 = add nsw i32 %add2, %y
+  ret i32 %add1
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+; (y + 1) + ~(x | 0x55555555) -> y - (x | 0x55555555)
+define i32 @test18(i32 %x, i32 %y) {
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655766
+  %neg = xor i32 %x.not, -1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = or i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+ %add1 = add nsw i16 %x, %x
+ %add2 = add nsw i16 %add1, %x
+ ret i16 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 3
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %x, %mul1
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %mul1, %x
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+ %mul1 = mul i16 %a, 2
+ %mul2 = mul i16 %a, 3
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT: %add = mul i16 %a, 5
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+ %mul1 = mul nsw i16 %a, 2
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+ %mul1 = mul nsw i16 %a, 3
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT: %add = mul nsw i16 %a, 10
+; CHECK-NEXT: ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT: %mul1 = mul nsw i32 %x, %y
+; CHECK-NEXT: %add = mul nsw i32 %mul1, 6
+; CHECK-NEXT: ret i32 %add
+}
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+; CHECK: or i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = metadata !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+; CHECK: or i16 %cttz, -16
+  ret i16 %b
+}
+!1 = metadata !{i16 0, i16 32}
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
index d908b55..c168436 100644
--- a/test/Transforms/InstCombine/addrspacecast.ll
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -28,13 +28,91 @@ define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*
 
 define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
 ; CHECK-LABEL: @combine_redundant_addrspacecast_types(
-; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float*
 ; CHECK-NEXT: ret
   %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
   %z = addrspacecast i32 addrspace(3)* %y to float*
   ret float* %z
 }
 
+define <4 x float*> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x float*>
+  ret <4 x float*> %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_1(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+  %z = bitcast i32 addrspace(2)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_2(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  %z = bitcast float addrspace(2)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_1(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_2(
+; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  ret float addrspace(2)* %y
+}
+
+define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float addrspace(2)*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(2)*>
+  ret <4 x float addrspace(2)*> %y
+}
+
+define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
+; CHECK-LABEL: @canonicalize_addrspacecast(
+; CHECK-NEXT: getelementptr inbounds [16 x i32] addrspace(1)* %arr, i32 0, i32 0
+; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
+; CHECK-NEXT: load i32*
+; CHECK-NEXT: ret i32
+  %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
+  %v = load i32* %p
+  ret i32 %v
+}
+
 @const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
                                                 i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
                                                 i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index 5bca46d..f6a8776 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -31,7 +31,7 @@ bb1:
   store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
 
   %indvar.next = add i64 %j, 2
-  %exitcond = icmp eq i64 %indvar.next, 557
+  %exitcond = icmp eq i64 %indvar.next, 556
   br i1 %exitcond, label %bb11, label %bb1
 
 bb11:
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index 284960b..a6b56f9 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
 ; Cases that should be bitcast
 
 ; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias float (float), i32 (i32)* @func_i32
+@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
 
 ; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
+@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
 
 ; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), i64 (i64)* @func_i64
+@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias  i64 (i64), <2 x float> (<2 x float>)* @func_v2f32
+@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
 
 ; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), <2 x i32*> (<2 x i32*>)* @func_v2i32p
+@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
 
 
 ; Cases that should be invalid and unchanged
 
 ; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias float (float), i64 (i64)* @func_i64
+@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
 
 ; Test cast between vectors with different bit sizes but the
 ; same number of elements
-@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i64> (<2 x i64>)* @func_v2i64
+@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast between vectors with same number of bits and different
 ; numbers of elements
-@alias_v2i32_to_v4f32 = alias  <4 x float> (<4 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
+@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
 
 ; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias i64 (i64), <4 x float> (<4 x float>)* @func_v4f32
+@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
 
 ; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), i64 (i64)* @func_i64
+@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
 
 ; Test cast from scalar to vector of pointers with same number of bits
 ; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias  i64 (i64), <2 x i32*> (<2 x i32*>)* @func_v2i32p
+@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
 
 ; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), i64 (i64)* @func_i64
+@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
 
 
 
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 4fab92f..0cbfbb0 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -370,7 +370,7 @@ define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
 	ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
 ; CHECK-LABEL: @test43(
 ; CHECK-NEXT: %A = zext i8 %on_off to i64
-; CHECK-NEXT: %B = add i64 %A, -1
+; CHECK-NEXT: %B = add nsw i64 %A, -1
 ; CHECK-NEXT: ret i64 %B
 }
 
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 9f21d54..7fac78a 100644
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -230,3 +230,13 @@ define i32 @constant_through_array_as_ptrs() {
   %b = load i32 addrspace(1)* %a, align 4
   ret i32 %b
 }
+
+@shared_mem = external addrspace(3) global [0 x i8]
+
+define float @canonicalize_addrspacecast(i32 %i) {
+; CHECK-LABEL: @canonicalize_addrspacecast
+; CHECK-NEXT: getelementptr inbounds float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 %i
+  %p = getelementptr inbounds float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i
+  %v = load float* %p
+  ret float %v
+}
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll
new file mode 100644
index 0000000..7990fdb
--- /dev/null
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define internal i8* @descale_zero() {
+entry:
+; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
+; CHECK-NEXT: bitcast i16*
+; CHECK-NEXT: ret i8*
+  %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
+  %num = load i64* inttoptr (i64 64 to i64*), align 64
+  %num_times_2 = shl i64 %num, 1
+  %num_times_2_plus_4 = add i64 %num_times_2, 4
+  %i8_ptr = bitcast i16* %i16_ptr to i8*
+  %i8_ptr_num_times_2_plus_4 = getelementptr i8* %i8_ptr, i64 %num_times_2_plus_4
+  %num_times_neg2 = mul i64 %num, -2
+  %num_times_neg2_minus_4 = add i64 %num_times_neg2, -4
+  %addr = getelementptr i8* %i8_ptr_num_times_2_plus_4, i64 %num_times_neg2_minus_4
+  ret i8* %addr
+}
diff --git a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll b/test/Transforms/InstCombine/distribute.ll
index 9ea0a5e..e6360f8 100644
--- a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
+++ b/test/Transforms/InstCombine/distribute.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instsimplify -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @factorize(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize(
@@ -28,27 +28,32 @@ define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
   %r = or i32 %x, %b
   %z = and i32 %l, %r
   ret i32 %z
-; CHECK: ret i32 %r
+; CHECK: %z = or i32 %b, %x
+; CHECK: ret i32 %z
 }
 
 define i32 @factorize4(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize4(
+; ((Y << 1) * X) - (X * Y) -> (X * (Y * 2 - Y)) -> (X * Y)
   %sh = shl i32 %y, 1
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
   %s = sub i32 %ml, %mr
   ret i32 %s
-; CHECK: ret i32 %mr
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
 }
 
 define i32 @factorize5(i32 %x, i32 %y) {
 ; CHECK-LABEL: @factorize5(
+; ((Y * 2) * X) - (X * Y) -> (X * Y)
   %sh = mul i32 %y, 2
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
   %s = sub i32 %ml, %mr
   ret i32 %s
-; CHECK: ret i32 %mr
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
 }
 
 define i32 @expand(i32 %x) {
@@ -58,5 +63,6 @@ define i32 @expand(i32 %x) {
   %b = or i32 %a, 2
   %c = and i32 %b, 1
   ret i32 %c
+; CHECK: %a = and i32 %x, 1
 ; CHECK: ret i32 %a
 }
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 1dec11d..c8763dc 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -103,7 +103,7 @@ define i32 @test_simplify13(i32 %x) {
 ; CHECK-LABEL: @test_simplify13(
   %ret = call i32 @ffs(i32 %x)
 ; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
   ret i32 %ret
@@ -114,7 +114,7 @@ define i32 @test_simplify14(i32 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify14(
   %ret = call i32 @ffsl(i32 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
   ret i32 %ret
@@ -125,7 +125,7 @@ define i32 @test_simplify15(i64 %x) {
 ; CHECK-LINUX-LABEL: @test_simplify15(
   %ret = call i32 @ffsll(i64 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
 ; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
 ; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
 ; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
new file mode 100644
index 0000000..9aab609
--- /dev/null
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -0,0 +1,56 @@
+; RUN: opt -instcombine -S  < %s | FileCheck %s
+
+%struct1 = type { %struct2*, i32, i32, i32 }
+%struct2 = type { i32, i32 }
+
+define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2** %tmp, align 8
+  br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+  %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  br label %bb3
+
+bb2:
+  %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  br label %bb3
+
+bb3:
+  %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ]
+  %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1
+  %tmp25 = load i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test1(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ]
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1
+
+}
+
+define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2** %tmp, align 8
+  %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1
+  %tmp25 = load i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test2(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index ef0cb29..3240c6d 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -732,7 +732,8 @@ define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %
 define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {
 ; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
 ; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
-; CHECK-NEXT: %t = addrspacecast double*
+; CHECK-NEXT: bitcast double*
+; CHECK-NEXT: %t = addrspacecast i64*
 ; CHECK: load i64 addrspace(3)* %t
   %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
   %V = mul i64 %N, 8
@@ -802,10 +803,22 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
-define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind {
+define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
 ; CHECK: ret
+  %gep = getelementptr i32* %p, i32 0
+  %x = addrspacecast i32* %gep to i32 addrspace(1)*
+  ret i32 addrspace(1)* %x
+}
+
+; Do not merge the GEP and the addrspacecast, because it would undo the
+; addrspacecast canonicalization.
+define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind {
+; CHECK-LABEL: @ascast_0_0_gep(
+; CHECK-NEXT: getelementptr [128 x i32]
+; CHECK-NEXT: addrspacecast i32*
+; CHECK-NEXT: ret i32 addrspace(1)*
   %gep = getelementptr [128 x i32]* %p, i32 0, i32 0
   %x = addrspacecast i32* %gep to i32 addrspace(1)*
   ret i32 addrspace(1)* %x
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index f45897c..26e144f 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,7 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout =
-"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
@@ -166,6 +165,14 @@ define i1 @test17(i32 %x) nounwind {
 ; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
 }
 
+define i1 @test17a(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK-LABEL: @test17a(
+; CHECK-NEXT: %cmp = icmp ugt i32 %x, 2
+}
 
 define i1 @test18(i32 %x) nounwind {
   %sh = lshr i32 8, %x
@@ -194,6 +201,15 @@ define i1 @test20(i32 %x) nounwind {
 ; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
 }
 
+define i1 @test20a(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+; CHECK-LABEL: @test20a(
+; CHECK-NEXT: %cmp = icmp ult i32 %x, 3
+}
+
 define i1 @test21(i8 %x, i8 %y) {
 ; CHECK-LABEL: @test21(
 ; CHECK-NOT: or i8
@@ -657,6 +673,49 @@ define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+; Same as test60, but look through an addrspacecast instead of a
+; bitcast. This uses the same sized addrspace.
+define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+  %bit = addrspacecast i8* %foo to i32 addrspace(3)*
+  %gep1 = getelementptr inbounds i32 addrspace(3)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+  %bit = addrspacecast i8* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: trunc i64 %j to i16
+; CHECK-NEXT: icmp sgt i16 %1, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+  %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
+  %gep1 = getelementptr inbounds i32 addrspace(2)* %bit, i32 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT:  %gep1.idx = shl nuw i32 %i, 2
+; CHECK-NEXT:  trunc i32 %gep1.idx to i16
+; CHECK-NEXT:  icmp slt i16 %1, %j
+; CHECK-NEXT:  ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 91c4470..9b58d93 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -3,6 +3,7 @@
 %overflow.result = type {i8, i1}
 
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
@@ -89,6 +90,18 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
 ; CHECK-NEXT: ret i8 %z
 }
 
+; PR20194
+define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
+  %A = sext i8 %a to i32
+  %B = sext i8 %b to i32
+  %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
+  ret { i32, i1 } %x
+; CHECK-LABEL: @saddtest1
+; CHECK: %x = add nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret { i32, i1 } %1
+}
+
 
 define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index b5a0ab8..3bc1d36 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -78,7 +78,8 @@ define void @test2_addrspacecast() {
 ; %A alloca is deleted
 ; This doesn't exactly match what test2 does, because folding the type
 ; cast into the alloca doesn't work for the addrspacecast yet.
-; CHECK-NEXT: alloca %T
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr
 ; CHECK-NEXT: addrspacecast
 
 ; use @G instead of %A
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
index 04019ae..cbb2f5f 100644
--- a/test/Transforms/InstCombine/overflow-mul.ll
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -162,3 +162,14 @@ entry:
   ret i32 %retval
 }
 
+define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: @pr20113
+; CHECK-NOT: mul.with.overflow
+; CHECK: ret
+  %vmovl.i.i726 = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i.i712 = zext <4 x i16> %b to <4 x i32>
+  %mul.i703 = mul <4 x i32> %vmovl.i.i712, %vmovl.i.i726
+  %tmp = icmp sge <4 x i32> %mul.i703, zeroinitializer
+  %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
+  ret <4 x i32> %vcgez.i
+}
diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll
new file mode 100644
index 0000000..0ef3159
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20059.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+; CHECK-LABEL: @do_not_reorder
+; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
+define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) {
+  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %retval = srem <4 x i32> %splat1, %splat2
+  ret <4 x i32> %retval
+}
diff --git a/test/Transforms/InstCombine/pr20079.ll b/test/Transforms/InstCombine/pr20079.ll
new file mode 100644
index 0000000..ce9c4de
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20079.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+@b = internal global [1 x i32] zeroinitializer, align 4
+@c = internal global i32 0, align 4
+
+; CHECK-LABEL: @fn1
+; CHECK-NEXT: ret i32 0
+define i32 @fn1(i32 %a) {
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
new file mode 100644
index 0000000..1db6b0d
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 9f07702..0595a67 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -127,7 +127,7 @@ define i64 @test14(i64 %x, i32 %y) {
 ; CHECK-LABEL: @test14(
 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %y
 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT]], -1
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[ZEXT]], -1
 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[ADD]], %x
 ; CHECK-NEXT: ret i64 [[AND]]
 	%shl = shl i32 1, %y
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 2213be1..d625f3b 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -281,7 +281,7 @@ define i32 @test15i(i32 %X) {
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
 ; CHECK-NEXT: %2 = xor i32 %1, 512
-; CHECK-NEXT: %3 = add i32 %2, 577
+; CHECK-NEXT: %3 = add nuw nsw i32 %2, 577
 ; CHECK-NEXT: ret i32 %3
 }
 
@@ -294,7 +294,7 @@ define i32 @test15j(i32 %X) {
 ; CHECK-LABEL: @test15j(
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
-; CHECK-NEXT: %2 = add i32 %1, 577
+; CHECK-NEXT: %2 = add nuw nsw i32 %1, 577
 ; CHECK-NEXT: ret i32 %2
 }
 
@@ -521,7 +521,7 @@ define i32 @test35(i32 %x) {
 ; CHECK-LABEL: @test35(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, 40
-; CHECK: add i32 {{.*}}, 60
+; CHECK: add nuw nsw i32 {{.*}}, 60
 ; CHECK: ret
 }
 
@@ -532,7 +532,7 @@ define i32 @test36(i32 %x) {
 ; CHECK-LABEL: @test36(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, -40
-; CHECK: add i32 {{.*}}, 100
+; CHECK: add nsw i32 {{.*}}, 100
 ; CHECK: ret
 }
 
@@ -996,6 +996,111 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
   ret <2 x i32> %select
 }
 
+; CHECK-LABEL: @select_icmp_and_8_eq_0_or_8(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_8_eq_0_or_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 8
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
+; CHECK-LABEL: @select_icmp_and_8_ne_0_xor_8(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_8_ne_0_xor_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_xor_8(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_8_eq_0_xor_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+; CHECK-LABEL: @select_icmp_and_8_ne_0_and_not_8(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_8_ne_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %x.and1 = select i1 %cmp, i32 %x, i32 %and1
+  ret i32 %x.and1
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_and_not_8(
+; CHECK-NEXT: ret i32 %x
+define i32 @select_icmp_and_8_eq_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %and1.x = select i1 %cmp, i32 %and1, i32 %x
+  ret i32 %and1.x
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8(
+; CHECK: select i1 %cmp, i64 %y, i64 %xor
+define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %y.xor = select i1 %cmp, i64 %y, i64 %xor
+  ret i64 %y.xor
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %y, i64 %and1
+define i64 @select_icmp_x_and_8_eq_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %y.and1 = select i1 %cmp, i64 %y, i64 %and1
+  ret i64 %y.and1
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8(
+; CHECK: select i1 %cmp, i64 %xor, i64 %y
+define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %xor.y = select i1 %cmp, i64 %xor, i64 %y
+  ret i64 %xor.y
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %and1, i64 %y
+define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %and1.y = select i1 %cmp, i64 %and1, i64 %y
+  ret i64 %and1.y
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8(
+; CHECK: xor i64 %1, 8
+; CHECK: or i64 %2, %y
+define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i64 %y, 8
+  %or.y = select i1 %cmp, i64 %or, i64 %y
+  ret i64 %or.y
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
@@ -1130,4 +1235,4 @@ define i32 @test75(i32 %x) {
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 68
 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
 ; CHECK-NEXT: ret i32 [[SEL]]
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index b8dfe22..f04afcc 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -145,7 +145,7 @@ define i32 @test13(i32 %x) nounwind {
 ; CHECK-LABEL: @test13(
 ; CHECK-NEXT: %and = lshr i32 %x, 3
 ; CHECK-NEXT: %1 = and i32 %and, 1
-; CHECK-NEXT: %sext = add i32 %1, -1
+; CHECK-NEXT: %sext = add nsw i32 %1, -1
 ; CHECK-NEXT: ret i32 %sext
 }
 
@@ -157,7 +157,7 @@ define i32 @test14(i16 %x) nounwind {
 ; CHECK-LABEL: @test14(
 ; CHECK-NEXT: %and = lshr i16 %x, 4
 ; CHECK-NEXT: %1 = and i16 %and, 1
-; CHECK-NEXT: %sext = add i16 %1, -1
+; CHECK-NEXT: %sext = add nsw i16 %1, -1
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 41d803c8..67b7c49 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -444,3 +444,23 @@ define <2 x i64> @test36(<2 x i64> %A) {
 ; CHECK-NEXT: %sub = mul <2 x i64> %A, <i64 7, i64 15>
 ; CHECK-NEXT: ret <2 x i64> %sub
 }
+
+define <2 x i32> @test37(<2 x i32> %A) {
+  %div = sdiv <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+  %sub = sub nsw <2 x i32> zeroinitializer, %div
+  ret <2 x i32> %sub
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[ICMP]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[SEXT]]
+}
+
+define i32 @test38(i32 %A) {
+  %div = sdiv i32 %A, -2147483648
+  %sub = sub nsw i32 0, %div
+  ret i32 %sub
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 %A, -2147483648
+; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
+; CHECK-NEXT: ret i32 [[SEXT]]
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index fc0f8bd..eb4e9d6 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -405,3 +405,12 @@ define i32 @pr19737(<4 x i32> %in0) {
   %rv = extractelement <4 x i32> %and.i, i32 0
   ret i32 %rv
 }
+
+define <4 x i32> @pr20114(<4 x i32> %__mask) {
+; CHECK-LABEL: @pr20114
+; CHECK: shufflevector
+; CHECK: and
+  %mask01.i = shufflevector <4 x i32> %__mask, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i
+  ret <4 x i32> %masked_new.i.i.i
+}
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index d7f338b..6fa4d70 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -6,7 +6,7 @@ entry:
 ; CHECK-LABEL: @a(
 ; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add nsw i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
new file mode 100644
index 0000000..5d314db
--- /dev/null
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instsimplify -S | not grep or
+
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i39 @test1(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL @test1
+; CHECK-NEXT: and {{.*}}, -274877906944
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. 
+define i399 @test2(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL @test2
+; CHECK-NEXT: and {{.*}}, 18446742974197923840
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 105e244..7d0cd9c 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -883,3 +883,33 @@ define i1 @returns_nonnull() {
 ; CHECK: ret i1 false
 }
 
+; If a bit is known to be zero for A and known to be one for B,
+; then A and B cannot be equal.
+define i1 @icmp_eq_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_eq_const
+; CHECK-NEXT: ret i1 false 
+}
+
+define i1 @icmp_ne_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp ne i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_ne_const
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_sdiv_int_min(i32 %a) {
+  %div = sdiv i32 -2147483648, %a
+  %cmp = icmp ne i32 %div, -1073741824
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_sdiv_int_min
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 -2147483648, %a
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[DIV]], -1073741824
+; CHECK-NEXT: ret i1 [[CMP]]
+}
diff --git a/test/Transforms/JumpThreading/pr15851_hang.ll b/test/Transforms/JumpThreading/pr15851_hang.ll
new file mode 100644
index 0000000..0484bc9
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr15851_hang.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry
+; CHECK: ret void
+; CHECK-NOT: for.cond1
+; CHECK-NOT: for.body
+
+define void @f() {
+entry:
+  ret void
+
+for.cond1:
+  %i.025 = phi i32 [ %inc, %for.body ], [ %inc, %for.body ], [ 1, %for.cond1 ]
+  %cmp = icmp slt i32 %i.025, 2
+  br i1 %cmp, label %for.body, label %for.cond1
+
+for.body:
+  %inc = add nsw i32 %i.025, 0
+  %a = icmp ugt i32 %inc, 2
+  br i1 %a, label %for.cond1, label %for.cond1
+}
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 201e604..545e86c 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -127,7 +127,7 @@ L4:
 ; CHECK: test_switch_default
 ; CHECK: entry:
 ; CHECK: load
-; CHECK: switch
+; CHECK: icmp
 ; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
 ; CHECK: store
 ; CHECK: br
diff --git a/test/Transforms/LICM/extra-copies.ll b/test/Transforms/LICM/extra-copies.ll
new file mode 100644
index 0000000..ef52f9f
--- /dev/null
+++ b/test/Transforms/LICM/extra-copies.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+; PR19835
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @f(i32 %x) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %storemerge4 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %mul = mul nsw i32 %x, %x
+  %add2 = add nsw i32 %mul, %x
+  %mul3 = add nsw i32 %add2, %mul
+  %inc = add nsw i32 %storemerge4, 1
+  %cmp = icmp slt i32 %inc, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %a9.0.lcssa = phi i32 [ %mul3, %for.body ]
+  ret i32 %a9.0.lcssa
+}
+
+; Test that there is exactly one copy of mul nsw i32 %x, %x in the exit block.
+; CHECK: define i32 @f(i32 [[X:%.*]])
+; CHECK: for.end:
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]
+; CHECK: mul nsw i32 [[X]], [[X]]
+; CHECK-NOT: mul nsw i32 [[X]], [[X]]
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
new file mode 100644
index 0000000..639dca5
--- /dev/null
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -0,0 +1,201 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure the basic alloca pointer hoisting works:
+; CHECK-LABEL: @test1
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %c = alloca i32
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type:
+; CHECK-LABEL: @test2
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i64
+  %c = bitcast i64* %ca to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type (where the bitcast also needs to be hoisted):
+; CHECK-LABEL: @test3
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i64
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %c = bitcast i64* %ca to i32*
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Make sure the basic alloca pointer hoisting does not happen through a bitcast
+; to a pointer to a larger type:
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+; Function Attrs: nounwind uwtable
+define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i16
+  %c = bitcast i16* %ca to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; Don't crash on bitcasts to unsized types.
+; CHECK-LABEL: @test5
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+%atype = type opaque
+
+; Function Attrs: nounwind uwtable
+define void @test5(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i16
+  %cab = bitcast i16* %ca to %atype*
+  %c = bitcast %atype* %cab to i32*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/LoadCombine/load-combine.ll b/test/Transforms/LoadCombine/load-combine.ll
new file mode 100644
index 0000000..c4d9241
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine.ll
@@ -0,0 +1,190 @@
+; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Combine read from char* idiom.
+define i64 @LoadU64_x64_0(i64* %pData) {
+  %1 = bitcast i64* %pData to i8*
+  %2 = load i8* %1, align 1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw i64 %3, 56
+  %5 = getelementptr inbounds i8* %1, i64 1
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i64
+  %8 = shl nuw nsw i64 %7, 48
+  %9 = or i64 %8, %4
+  %10 = getelementptr inbounds i8* %1, i64 2
+  %11 = load i8* %10, align 1
+  %12 = zext i8 %11 to i64
+  %13 = shl nuw nsw i64 %12, 40
+  %14 = or i64 %9, %13
+  %15 = getelementptr inbounds i8* %1, i64 3
+  %16 = load i8* %15, align 1
+  %17 = zext i8 %16 to i64
+  %18 = shl nuw nsw i64 %17, 32
+  %19 = or i64 %14, %18
+  %20 = getelementptr inbounds i8* %1, i64 4
+  %21 = load i8* %20, align 1
+  %22 = zext i8 %21 to i64
+  %23 = shl nuw nsw i64 %22, 24
+  %24 = or i64 %19, %23
+  %25 = getelementptr inbounds i8* %1, i64 5
+  %26 = load i8* %25, align 1
+  %27 = zext i8 %26 to i64
+  %28 = shl nuw nsw i64 %27, 16
+  %29 = or i64 %24, %28
+  %30 = getelementptr inbounds i8* %1, i64 6
+  %31 = load i8* %30, align 1
+  %32 = zext i8 %31 to i64
+  %33 = shl nuw nsw i64 %32, 8
+  %34 = or i64 %29, %33
+  %35 = getelementptr inbounds i8* %1, i64 7
+  %36 = load i8* %35, align 1
+  %37 = zext i8 %36 to i64
+  %38 = or i64 %34, %37
+  ret i64 %38
+; CHECK-LABEL: @LoadU64_x64_0(
+; CHECK: load i64* %{{.*}}, align 1
+; CHECK-NOT: load
+}
+
+; Combine simple adjacent loads.
+define i32 @"2xi16_i32"(i16* %x) {
+  %1 = load i16* %x, align 2
+  %2 = getelementptr inbounds i16* %x, i64 1
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32"(
+; CHECK: load i32* %{{.*}}, align 2
+; CHECK-NOT: load
+}
+
+; Don't combine loads across stores.
+define i32 @"2xi16_i32_store"(i16* %x, i16* %y) {
+  %1 = load i16* %x, align 2
+  store i16 0, i16* %y, align 2
+  %2 = getelementptr inbounds i16* %x, i64 1
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_store"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: store
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Don't combine loads with a gap.
+define i32 @"2xi16_i32_gap"(i16* %x) {
+  %1 = load i16* %x, align 2
+  %2 = getelementptr inbounds i16* %x, i64 2
+  %3 = load i16* %2, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw i32 %4, 16
+  %6 = zext i16 %1 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_gap"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Combine out of order loads.
+define i32 @"2xi16_i32_order"(i16* %x) {
+  %1 = getelementptr inbounds i16* %x, i64 1
+  %2 = load i16* %1, align 2
+  %3 = zext i16 %2 to i32
+  %4 = load i16* %x, align 2
+  %5 = shl nuw i32 %3, 16
+  %6 = zext i16 %4 to i32
+  %7 = or i32 %5, %6
+  ret i32 %7
+; CHECK-LABEL: @"2xi16_i32_order"(
+; CHECK: load i32* %{{.*}}, align 2
+; CHECK-NOT: load
+}
+
+; Overlapping loads.
+define i32 @"2xi16_i32_overlap"(i8* %x) {
+  %1 = bitcast i8* %x to i16*
+  %2 = load i16* %1, align 2
+  %3 = getelementptr inbounds i8* %x, i64 1
+  %4 = bitcast i8* %3 to i16*
+  %5 = load i16* %4, align 2
+  %6 = zext i16 %5 to i32
+  %7 = shl nuw i32 %6, 16
+  %8 = zext i16 %2 to i32
+  %9 = or i32 %7, %8
+  ret i32 %9
+; CHECK-LABEL: @"2xi16_i32_overlap"(
+; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16* %{{.*}}, align 2
+}
+
+; Combine valid alignments.
+define i64 @"2xi16_i64_align"(i8* %x) {
+  %1 = bitcast i8* %x to i32*
+  %2 = load i32* %1, align 4
+  %3 = getelementptr inbounds i8* %x, i64 4
+  %4 = bitcast i8* %3 to i16*
+  %5 = load i16* %4, align 2
+  %6 = getelementptr inbounds i8* %x, i64 6
+  %7 = bitcast i8* %6 to i16*
+  %8 = load i16* %7, align 2
+  %9 = zext i16 %8 to i64
+  %10 = shl nuw i64 %9, 48
+  %11 = zext i16 %5 to i64
+  %12 = shl nuw nsw i64 %11, 32
+  %13 = zext i32 %2 to i64
+  %14 = or i64 %12, %13
+  %15 = or i64 %14, %10
+  ret i64 %15
+; CHECK-LABEL: @"2xi16_i64_align"(
+; CHECK: load i64* %{{.*}}, align 4
+}
+
+; Non power of two.
+define i64 @"2xi16_i64_npo2"(i8* %x) {
+  %1 = load i8* %x, align 1
+  %2 = zext i8 %1 to i64
+  %3 = getelementptr inbounds i8* %x, i64 1
+  %4 = load i8* %3, align 1
+  %5 = zext i8 %4 to i64
+  %6 = shl nuw nsw i64 %5, 8
+  %7 = or i64 %6, %2
+  %8 = getelementptr inbounds i8* %x, i64 2
+  %9 = load i8* %8, align 1
+  %10 = zext i8 %9 to i64
+  %11 = shl nuw nsw i64 %10, 16
+  %12 = or i64 %11, %7
+  %13 = getelementptr inbounds i8* %x, i64 3
+  %14 = load i8* %13, align 1
+  %15 = zext i8 %14 to i64
+  %16 = shl nuw nsw i64 %15, 24
+  %17 = or i64 %16, %12
+  %18 = getelementptr inbounds i8* %x, i64 4
+  %19 = load i8* %18, align 1
+  %20 = zext i8 %19 to i64
+  %21 = shl nuw nsw i64 %20, 32
+  %22 = or i64 %21, %17
+  %23 = getelementptr inbounds i8* %x, i64 5
+  %24 = load i8* %23, align 1
+  %25 = zext i8 %24 to i64
+  %26 = shl nuw nsw i64 %25, 40
+  %27 = or i64 %26, %22
+  %28 = getelementptr inbounds i8* %x, i64 6
+  %29 = load i8* %28, align 1
+  %30 = zext i8 %29 to i64
+  %31 = shl nuw nsw i64 %30, 48
+  %32 = or i64 %31, %27
+  ret i64 %32
+; CHECK-LABEL: @"2xi16_i64_npo2"(
+; CHECK: load i32* %{{.*}}, align 1
+}
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopIdiom/X86/lit.local.cfg
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
index 6642d28..675f48e 100644
--- a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
@@ -1,5 +1,4 @@
 config.suffixes = ['.ll']
 
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 756ea82..1d56dde 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s
 ;
 ; LSR should only check for valid address modes when the IV user is a
 ; memory address.
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
+++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/X86/lit.local.cfg b/test/Transforms/LoopUnroll/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopUnroll/X86/lit.local.cfg
+++ b/test/Transforms/LoopUnroll/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll
new file mode 100644
index 0000000..62f2610
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr18861.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-unroll -indvars -disable-output
+
+@b = external global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define void @fn1() #0 {
+entry:
+  br label %for.cond1thread-pre-split
+
+for.cond1thread-pre-split:                        ; preds = %for.inc8, %entry
+  %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ]
+  br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph
+
+for.cond2.preheader.lr.ph:                        ; preds = %for.cond1thread-pre-split
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.inc5, %for.cond2.preheader.lr.ph
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.body3, %for.cond2.preheader
+  %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ]
+  %cmp = icmp slt i32 %storemerge, 1
+  br i1 %cmp, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond2
+  %tobool4 = icmp eq i32 %storemerge, 0
+  %add = add nsw i32 %storemerge, 1
+  br i1 %tobool4, label %for.cond2, label %if.then
+
+if.then:                                          ; preds = %for.body3
+  store i32 %storemerge1, i32* @b, align 4
+  ret void
+
+for.inc5:                                         ; preds = %for.cond2
+  br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader
+
+for.cond1.for.inc8_crit_edge:                     ; preds = %for.inc5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split
+  %inc9 = add nsw i32 %storemerge1, 1
+  br label %for.cond1thread-pre-split
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index d8bbea9..a14087d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -2,6 +2,12 @@
 
 ; Tests for unrolling loops with run-time trip counts
 
+; CHECK: %xtraiter = and i32 %n
+; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK: br i1 %lcmp.or, label %unr.cmp
+
 ; CHECK: unr.cmp{{.*}}:
 ; CHECK: for.body.unr{{.*}}:
 ; CHECK: for.body:
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
new file mode 100644
index 0000000..5e45a2d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -0,0 +1,285 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; loop4 contains a small loop which should be completely unrolled by
+; the default unrolling heuristics.  It serves as a control for the
+; unroll(disable) pragma test loop4_with_disable.
+;
+; CHECK-LABEL: @loop4(
+; CHECK-NOT: br i1
+define void @loop4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(disable)
+;
+; CHECK-LABEL: @loop4_with_disable(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop4_with_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = metadata !{metadata !1, metadata !2}
+!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+
+; loop64 has a high enough count that it should *not* be unrolled by
+; the default unrolling heuristic.  It serves as the control for the
+; unroll(enable) pragma test loop64_with_.* tests below.
+;
+; CHECK-LABEL: @loop64(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!3 = metadata !{metadata !3, metadata !4}
+!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+
+; #pragma clang loop unroll_count(4)
+; Loop should be unrolled 4 times.
+;
+; CHECK-LABEL: @loop64_with_count4(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64_with_count4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!5 = metadata !{metadata !5, metadata !6}
+!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+
+
+; #pragma clang loop unroll_count(enable) unroll_count(4)
+; Loop should be unrolled 4 times.
+;
+; CHECK-LABEL: @loop64_with_enable_and_count4(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64_with_enable_and_count4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!7 = metadata !{metadata !7, metadata !6, metadata !4}
+
+; #pragma clang loop unroll_count(enable)
+; Full unrolling is requested, but loop has a dynamic trip count so
+; no unrolling should occur.
+;
+; CHECK-LABEL: @dynamic_loop_with_enable(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!8 = metadata !{metadata !8, metadata !4}
+
+; #pragma clang loop unroll_count(4)
+; Loop has a dynamic trip count.  Unrolling should occur, but no
+; conditional branches can be removed.
+;
+; CHECK-LABEL: @dynamic_loop_with_count4(
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK: store
+; CHECK: br i1
+; CHECK-NOT: br i1
+define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!9 = metadata !{metadata !9, metadata !6}
+
+; #pragma clang loop unroll_count(1)
+; Loop should not be unrolled
+;
+; CHECK-LABEL: @unroll_1(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @unroll_1(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!10 = metadata !{metadata !10, metadata !11}
+!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
+
+; #pragma clang loop unroll(enable)
+; Loop has very high loop count (1 million) and full unrolling was requested.
+; Loop should unrolled up to the pragma threshold, but not completely.
+;
+; CHECK-LABEL: @unroll_1M(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1
+define void @unroll_1M(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!12 = metadata !{metadata !12, metadata !4}
diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
index f1d1f88..937cffb 100644
--- a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
@@ -1,6 +1,5 @@
 config.suffixes = ['.ll']
 
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
index 8a3ba96..98c6700 100644
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index faed77d..fce3b70 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -40,7 +40,7 @@ for.end:                                          ; preds = %for.body
 
 ; Now, we check for the Hint metadata
 ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
-; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
 ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
 
diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll
new file mode 100644
index 0000000..a220866
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -0,0 +1,35 @@
+; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Verify that we generate 512-bit wide vectors for a basic integer memset
+; loop.
+
+; CHECK-LABEL: f:
+; CHECK: vmovdqu32 %zmm{{.}}, (
+; CHECK-NOT: %ymm
+
+define void @f(i32* %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %n, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/LoopVectorize/X86/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 9e4e989..8e0ca41 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -9,8 +9,9 @@
 ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
 ; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
 
-; This file tests the llvm.vectorizer.pragma forcing vectorization even when
-; optimization levels are too low, or when vectorization is disabled.
+; This file tests the llvm.loop.vectorize.enable metadata forcing
+; vectorization even when optimization levels are too low, or when
+; vectorization is disabled.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -170,6 +171,6 @@ for.end:                                          ; preds = %for.body
 }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1}
+!1 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 1}
 !2 = metadata !{metadata !2, metadata !3}
-!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0}
+!3 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 0}
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 84ffb27..074313b 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -53,7 +53,7 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This method will not be vectorized, as scalar cost is lower than any of vector costs.
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 1b979e5..97c31a1 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -44,7 +44,7 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This loop will not be vectorized as the trip count is below the threshold.
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
new file mode 100644
index 0000000..6cdd29b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -0,0 +1,160 @@
+; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for tests
+; void test(int *A, int Length) {
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < Length; i++) {
+;     A[i] = i;
+;     if (A[i] > Length)
+;       break;
+;   }
+; }
+
+; void test_disabled(int *A, int Length) {
+; #pragma clang loop vectorize(disable) interleave(disable)
+;   for (int i = 0; i < Length; i++)
+;     A[i] = i;
+; }
+
+; void test_array_bounds(int *A, int *B, int Length) {
+; #pragma clang loop vectorize(enable)
+;   for (int i = 0; i < Length; i++)
+;     A[i] = A[B[i]];
+; }
+
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
+; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
+  br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16
+  %0 = trunc i64 %indvars.iv to i32, !dbg !16
+  store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
+  %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, %Length, !dbg !12
+  %or.cond = and i1 %cmp3, %cmp, !dbg !22
+  br i1 %or.cond, label %for.body, label %for.end, !dbg !22
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
+  br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30
+  %0 = trunc i64 %indvars.iv to i32, !dbg !30
+  store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !25, !llvm.loop !27
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !31
+}
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !35
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0"}
+!12 = metadata !{i32 3, i32 8, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!14 = metadata !{metadata !14, metadata !15, metadata !15}
+!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!16 = metadata !{i32 4, i32 5, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
+!18 = metadata !{metadata !19, metadata !19, i64 0}
+!19 = metadata !{metadata !"int", metadata !20, i64 0}
+!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
+!21 = metadata !{metadata !"Simple C/C++ TBAA"}
+!22 = metadata !{i32 5, i32 9, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
+!24 = metadata !{i32 8, i32 1, metadata !4, null}
+!25 = metadata !{i32 12, i32 8, metadata !26, null}
+!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
+!27 = metadata !{metadata !27, metadata !28, metadata !29}
+!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+!30 = metadata !{i32 13, i32 5, metadata !26, null}
+!31 = metadata !{i32 14, i32 1, metadata !7, null}
+!32 = metadata !{i32 18, i32 8, metadata !33, null}
+!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
+!34 = metadata !{metadata !34, metadata !15}
+!35 = metadata !{i32 19, i32 5, metadata !33, null}
+!36 = metadata !{i32 20, i32 1, metadata !8, null}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 685d034..f683447 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -1,8 +1,17 @@
 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
-; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
-; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; This code has all the !dbg annotations needed to track source line information,
+; but is missing the llvm.dbg.cu annotation. This prevents code generation from
+; emitting debug info in the final output.
+; RUN: llc -mtriple x86_64-pc-linux-gnu %s -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
+; DEBUG-OUTPUT-NOT: .loc
+; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
+
+; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
+; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -37,11 +46,9 @@ for.end:                                          ; preds = %for.body
 
 declare void @ibar(i32*) #1
 
-!llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [./vectorization-remarks.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
index 4d17d46..bb48713 100644
--- a/test/Transforms/LoopVectorize/XCore/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
new file mode 100644
index 0000000..e4ba77f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; C/C++ code for control flow test
+; int test(int *A, int Length) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] > 10.0) goto end;
+;     A[i] = 0;
+;   }
+; end:
+;   return 0;
+; }
+
+; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
+; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
+
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !12
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !12
+  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+  %cmp1 = icmp sgt i32 %0, 10, !dbg !12
+  br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
+
+if.else:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %1 = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %cmp = icmp slt i32 %1, %Length, !dbg !10
+  br i1 %cmp, label %for.body, label %end.loopexit, !dbg !10
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  br label %end
+
+end:                                              ; preds = %end.loopexit, %entry
+  ret i32 0, !dbg !20
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{i32 5, i32 9, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
+!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
+!15 = metadata !{metadata !16, metadata !16, i64 0}
+!16 = metadata !{metadata !"int", metadata !17, i64 0}
+!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
+!18 = metadata !{metadata !"Simple C/C++ TBAA"}
+!19 = metadata !{i32 8, i32 7, metadata !13, null}
+!20 = metadata !{i32 12, i32 3, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index dbe0243..6e3e8ed 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -156,7 +156,7 @@ for.body:
   br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
 
 cond.false:
-  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32))
+  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32))
   br label %cond.end
 
 cond.end:
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index ad2c663..7dabcb2 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -108,3 +108,64 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; <label>:5                                       ; preds = %1
   ret i32 %2
 }
+
+; This loop has a backedge taken count of i32_max. We need to check for this
+; condition and branch directly to the scalar loop.
+
+; CHECK-LABEL: max_i32_backedgetaken
+; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
+; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+
+; CHECK: scalar.ph:
+; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
+; CHECK:  %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]
+
+define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
+
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i32 %b.0, -1
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
+
+; When generating the overflow check we must sure that the induction start value
+; is defined before the branch to the scalar preheader.
+
+; CHECK-LABEL: testoverflowcheck
+; CHECK: entry
+; CHECK: %[[LOAD:.*]] = load i8
+; CHECK: %[[VAL:.*]] =  zext i8 %[[LOAD]] to i32
+; CHECK: br
+
+; CHECK: scalar.ph
+; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+
+@e = global i8 1, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+define i32 @testoverflowcheck() {
+entry:
+  %.pr.i = load i8* @e, align 1
+  %0 = load i32* @d, align 4
+  %c.promoted.i = load i32* @c, align 4
+  br label %cond.end.i
+
+cond.end.i:
+  %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
+  %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
+  %and.i = and i32 %0, %and3.i
+  %inc.i = add i8 %inc4.i, 1
+  %tobool.i = icmp eq i8 %inc.i, 0
+  br i1 %tobool.i, label %loopexit, label %cond.end.i
+
+loopexit:
+  ret i32 %and.i
+}
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index c3d570c..7dfaf03 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1090,3 +1090,105 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
+
+;CHECK-LABEL: @powi_f64(
+;CHECK: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;CHECK-LABEL: @powi_f64_neg(
+;CHECK-NOT: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %1 = trunc i64 %indvars.iv to i32
+  %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @cttz_f64(
+;CHECK: llvm.cttz.v4i64
+;CHECK: ret void
+define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+  %0 = load i64* %arrayidx, align 8
+  %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
+  %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+  store i64 %call, i64* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @ctlz_f64(
+;CHECK: llvm.ctlz.v4i64
+;CHECK: ret void
+define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+  %0 = load i64* %arrayidx, align 8
+  %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
+  %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+  store i64 %call, i64* %arrayidx4, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll
index 7f10372..2fcc53a 100644
--- a/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.unroll", i32 2}
+!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2}
diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll
index 1960c0b..87de655 100644
--- a/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/test/Transforms/LoopVectorize/metadata-width.ll
@@ -28,4 +28,4 @@ for.end:                                          ; preds = %for.body, %entry
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
+!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
new file mode 100644
index 0000000..52b4285
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+entry:
+  %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  switch i32 %0, label %for.inc [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb3
+  ], !dbg !14
+
+sw.bb:                                            ; preds = %for.body
+  %1 = trunc i64 %indvars.iv to i32, !dbg !20
+  %mul = shl nsw i32 %1, 1, !dbg !20
+  br label %for.inc, !dbg !22
+
+sw.bb3:                                           ; preds = %for.body
+  %2 = trunc i64 %indvars.iv to i32, !dbg !23
+  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
+  %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!1 = metadata !{metadata !"source.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
+!5 = metadata !{i32 786473, metadata !1}
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 3, i32 8, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!12 = metadata !{metadata !12, metadata !13, metadata !13}
+!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!14 = metadata !{i32 4, i32 5, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
+!16 = metadata !{metadata !17, metadata !17, i64 0}
+!17 = metadata !{metadata !"int", metadata !18, i64 0}
+!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
+!19 = metadata !{metadata !"Simple C/C++ TBAA"}
+!20 = metadata !{i32 6, i32 7, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
+!22 = metadata !{i32 7, i32 5, metadata !21, null}
+!23 = metadata !{i32 9, i32 7, metadata !21, null}
+!24 = metadata !{i32 14, i32 1, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index e7b1e2a..01e28bc 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -5,6 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK-LABEL: @add_ints(
 ;CHECK: br
+;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
 ;CHECK-DAG: icmp uge
diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
index dc3df7a..f646567 100644
--- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll
+++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ;
 ; Test #1
 ;
-; Ensure that "llvm.vectorizer.enable" metadata was not lost prior to LoopVectorize pass.
+; Ensure that "llvm.loop.vectorize.enable" metadata was not lost prior to LoopVectorize pass.
 ; In past LoopRotate was clearing that metadata.
 ;
 ; The source C code is:
@@ -62,12 +62,12 @@ for.end:
 }
 
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; Test #2
 ;
-; Ensure that "llvm.vectorizer.enable" metadata was not lost even
+; Ensure that "llvm.loop.vectorize.enable" metadata was not lost even
 ; if loop was not rotated (see http://reviews.llvm.org/D3348#comment-4).
 ;
 define i32 @nonrotated(i32 %a) {
@@ -85,4 +85,4 @@ return:
 }
 
 !3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!4 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 7800469..47de13d 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -69,9 +69,9 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
 ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
-; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
 ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
index c319834..cb11241 100644
--- a/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -3,15 +3,20 @@
 define i8 @cmpswap() {
 ; CHECK-LABEL: @cmpswap(
   %i = alloca i8
-  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
-; CHECK: [[INST:%[a-z0-9]+]] = load
-; CHECK-NEXT: icmp
-; CHECK-NEXT: select
-; CHECK-NEXT: store
+  %pair = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
+  %j = extractvalue { i8, i1 } %pair, 0
+; CHECK: [[OLDVAL:%[a-z0-9]+]] = load i8* [[ADDR:%[a-z0-9]+]]
+; CHECK-NEXT: [[SAME:%[a-z0-9]+]] = icmp eq i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[TO_STORE:%[a-z0-9]+]] = select i1 [[SAME]], i8 42, i8 [[OLDVAL]]
+; CHECK-NEXT: store i8 [[TO_STORE]], i8* [[ADDR]]
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = insertvalue { i8, i1 } undef, i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[RES:%[a-z0-9]+]] = insertvalue { i8, i1 } [[TMP]], i1 [[SAME]], 1
+; CHECK-NEXT: [[VAL:%[a-z0-9]+]] = extractvalue { i8, i1 } [[RES]], 0
   ret i8 %j
-; CHECK: ret i8 [[INST]]
+; CHECK: ret i8 [[VAL]]
 }
 
+
 define i8 @swap() {
 ; CHECK-LABEL: @swap(
   %i = alloca i8
diff --git a/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll b/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
new file mode 100644
index 0000000..3673c04
--- /dev/null
+++ b/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+; CHECK-NOT: icmp eq i32 %0, 1
+
+define i32 @foo(i32 %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+
+sw.bb:
+  ret i32 12
+
+sw.bb1:
+  ret i32 4
+
+sw.bb2:
+  ret i32 2
+
+sw.default:
+  ret i32 9
+}
diff --git a/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll b/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
new file mode 100644
index 0000000..0f73721
--- /dev/null
+++ b/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+; CHECK-NOT: {{.*}}icmp eq{{.*}}
+;
+;int foo(int a) {
+;
+;  switch (a) {
+;  case 0:
+;    return 10;
+;  case 1:
+;    return 3;
+;  default:
+;    __builtin_unreachable();
+;  }
+;
+;}
+
+define i32 @foo(i32 %a) nounwind ssp uwtable {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %a, i32* %2, align 4
+  %3 = load i32* %2, align 4
+  switch i32 %3, label %6 [
+    i32 0, label %4
+    i32 1, label %5
+  ]
+
+; <label>:4 
+  store i32 10, i32* %1
+  br label %7
+
+; <label>:5
+  store i32 3, i32* %1
+  br label %7
+
+; <label>:6
+  unreachable
+
+; <label>:7
+  %8 = load i32* %1
+  ret i32 %8
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index e85f03e..09d25f0 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -3,93 +3,57 @@
 ; We have switch on input.
 ; On output we should got binary comparison tree. Check that all is fine.
 
-;CHECK:      entry:
-;CHECK-NEXT:   br label %NodeBlock37
+;CHECK:     entry:
+;CHECK-NEXT:  br label %NodeBlock19
 
-;CHECK:      NodeBlock37:                                      ; preds = %entry
-;CHECK-NEXT:   %Pivot38 = icmp slt i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
+;CHECK:     NodeBlock19:                                      ; preds = %entry
+;CHECK-NEXT:  %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT:  br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
 
-;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot36 = icmp slt i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
+;CHECK:     NodeBlock17:                                      ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT:  br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
 
-;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot34 = icmp slt i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
+;CHECK:     NodeBlock15:                                      ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT:  br i1 %Pivot16, label %bb330, label %NodeBlock13
 
-;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %Pivot32 = icmp slt i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
+;CHECK:     NodeBlock13:                                      ; preds = %NodeBlock15
+;CHECK-NEXT:  %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %Pivot14, label %bb332, label %LeafBlock11
 
-;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf30 = icmp eq i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb334, label %NewDefault
+;CHECK:     LeafBlock11:                                      ; preds = %NodeBlock13
+;CHECK-NEXT:  %SwitchLeaf12 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %SwitchLeaf12, label %bb334, label %NewDefault
 
-;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb332, label %NewDefault
+;CHECK:     NodeBlock9:                                       ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT:  br i1 %Pivot10, label %bb324, label %NodeBlock7
 
-;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb330, label %NewDefault
+;CHECK:     NodeBlock7:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:  %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT:  br i1 %Pivot8, label %bb326, label %bb328
 
-;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot24 = icmp slt i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
+;CHECK:     NodeBlock5:                                       ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT:  br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
 
-;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %Pivot22 = icmp slt i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
+;CHECK:     NodeBlock3:                                       ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT:  br i1 %Pivot4, label %bb, label %NodeBlock1
 
-;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb328, label %NewDefault
+;CHECK:     NodeBlock1:                                       ; preds = %NodeBlock3
+;CHECK-NEXT:  %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT:  br i1 %Pivot2, label %bb338, label %bb322
 
-;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb326, label %NewDefault
+;CHECK:     NodeBlock:                                        ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot = icmp slt i32 %tmp158, 0
+;CHECK-NEXT:  br i1 %Pivot, label %LeafBlock, label %bb338
 
-;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb324, label %NewDefault
-
-;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot14 = icmp slt i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
-
-;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot12 = icmp slt i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
-
-;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %Pivot10 = icmp slt i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
-
-;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb322, label %NewDefault
-
-;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb338, label %NewDefault
-
-;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb, label %NewDefault
-
-;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot = icmp slt i32 %tmp158, 0
-;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
-
-;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf2 = icmp ule i32 %tmp158, 6
-;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb338, label %NewDefault
-
-;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
-;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
-;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158.off, 4
-;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
+;CHECK:     LeafBlock:                                        ; preds = %NodeBlock
+;CHECK-NEXT:  %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:  %SwitchLeaf = icmp ule i32 %tmp158.off, 4
+;CHECK-NEXT:  br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
 entry:
diff --git a/test/Transforms/MergeFunc/functions.ll b/test/Transforms/MergeFunc/functions.ll
new file mode 100644
index 0000000..006fdf5
--- /dev/null
+++ b/test/Transforms/MergeFunc/functions.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Be sure we don't merge cross-referenced functions of same type.
+
+; CHECK-LABEL: @left
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @right(i64 %p)
+define void @left(i64 %p) {
+entry-block:
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: @right
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @left(i64 %p)
+define void @right(i64 %p) {
+entry-block:
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  ret void
+}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
new file mode 100644
index 0000000..e25ff1d
--- /dev/null
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -0,0 +1,43 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8* %0, !range !0
+  %v2 = load i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT  %v1 = load i8* %0
+; CHECK-NEXT  %v2 = load i8* %1
+; CHECK-NEXT  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT  ret i1 %out
+  %v1 = load i8* %0
+  %v2 = load i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8* %0, !range !1
+  %v2 = load i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8* %0, !range !0
+  %v2 = load i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+!0 = metadata !{i8 0, i8 2}
+!1 = metadata !{i8 5, i8 7}
diff --git a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
index 5780990..2430035 100644
--- a/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
+++ b/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -reassociate -instcombine -constprop -dce -S | not grep add
+; RUN: opt < %s -reassociate -S | FileCheck %s
 
-define i32 @test(i32 %A) {
-	%X = add i32 %A, 1		; <i32> [#uses=1]
-	%Y = add i32 %A, 1		; <i32> [#uses=1]
-	%r = sub i32 %X, %Y		; <i32> [#uses=1]
-	ret i32 %r
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: test1
+; CHECK: ret i32 0
+  %X = add i32 %A, 1
+  %Y = add i32 %A, 1
+  %r = sub i32 %X, %Y
+  ret i32 %r
 }
-
diff --git a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
index e8bccbd..5f3c920 100644
--- a/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
+++ b/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
@@ -1,9 +1,11 @@
-; RUN: opt < %s -reassociate -instcombine -constprop -die -S | not grep 5
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %A, i32 %B) {
-	%W = add i32 %B, -5		; <i32> [#uses=1]
-	%Y = add i32 %A, 5		; <i32> [#uses=1]
-	%Z = add i32 %W, %Y		; <i32> [#uses=1]
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: test1
+; CHECK: %Z = add i32 %B, %A
+; CHECK: ret i32 %Z
+	%W = add i32 %B, -5
+	%Y = add i32 %A, 5
+	%Z = add i32 %W, %Y
 	ret i32 %Z
 }
-
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
index c18af5e..29c178f 100644
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
+++ b/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
@@ -1,12 +1,30 @@
+; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | FileCheck %s
+
 ; With sub reassociation, constant folding can eliminate all of the constants.
-;
-; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | not grep add
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %Z = sub i32 %A, %B
+; CHECK-NEXT: ret i32 %Z
 
-define i32 @test(i32 %A, i32 %B) {
-	%W = add i32 5, %B		; <i32> [#uses=1]
-	%X = add i32 -7, %A		; <i32> [#uses=1]
-	%Y = sub i32 %X, %W		; <i32> [#uses=1]
-	%Z = add i32 %Y, 12		; <i32> [#uses=1]
-	ret i32 %Z
+  %W = add i32 5, %B
+  %X = add i32 -7, %A
+  %Y = sub i32 %X, %W
+  %Z = add i32 %Y, 12
+  ret i32 %Z
 }
+ 
+; With sub reassociation, constant folding can eliminate the two 12 constants.
+define i32 @test2(i32 %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %sum = add i32 %B, %A
+; CHECK-NEXT: %sum1 = add i32 %sum, %C
+; CHECK-NEXT: %Q = sub i32 %D, %sum1
+; CHECK-NEXT: ret i32 %Q
 
+  %M = add i32 %A, 12
+  %N = add i32 %M, %B
+  %O = add i32 %N, %C
+  %P = sub i32 %D, %O
+  %Q = add i32 %P, 12
+  ret i32 %Q
+}
diff --git a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll b/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
deleted file mode 100644
index 5848821..0000000
--- a/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; With sub reassociation, constant folding can eliminate the two 12 constants.
-;
-; RUN: opt < %s -reassociate -constprop -dce -S | not grep 12
-
-define i32 @test(i32 %A, i32 %B, i32 %C, i32 %D) {
-	%M = add i32 %A, 12		; <i32> [#uses=1]
-	%N = add i32 %M, %B		; <i32> [#uses=1]
-	%O = add i32 %N, %C		; <i32> [#uses=1]
-	%P = sub i32 %D, %O		; <i32> [#uses=1]
-	%Q = add i32 %P, 12		; <i32> [#uses=1]
-	ret i32 %Q
-}
-
diff --git a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
index f66148b..f6cef35 100644
--- a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
+++ b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
@@ -1,23 +1,24 @@
-; RUN: opt < %s -reassociate -instcombine -S |\
-; RUN:   grep "ret i32 0"
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
-	%tmp.2 = add i32 %a4, %a3		; <i32> [#uses=1]
-	%tmp.4 = add i32 %tmp.2, %a2		; <i32> [#uses=1]
-	%tmp.6 = add i32 %tmp.4, %a1		; <i32> [#uses=1]
-	%tmp.8 = add i32 %tmp.6, %a0		; <i32> [#uses=1]
-	%tmp.11 = add i32 %a3, %a2		; <i32> [#uses=1]
-	%tmp.13 = add i32 %tmp.11, %a1		; <i32> [#uses=1]
-	%tmp.15 = add i32 %tmp.13, %a0		; <i32> [#uses=1]
-	%tmp.18 = add i32 %a2, %a1		; <i32> [#uses=1]
-	%tmp.20 = add i32 %tmp.18, %a0		; <i32> [#uses=1]
-	%tmp.23 = add i32 %a1, %a0		; <i32> [#uses=1]
-	%tmp.26 = sub i32 %tmp.8, %tmp.15		; <i32> [#uses=1]
-	%tmp.28 = add i32 %tmp.26, %tmp.20		; <i32> [#uses=1]
-	%tmp.30 = sub i32 %tmp.28, %tmp.23		; <i32> [#uses=1]
-	%tmp.32 = sub i32 %tmp.30, %a4		; <i32> [#uses=1]
-	%tmp.34 = sub i32 %tmp.32, %a2		; <i32> [#uses=2]
-	%T = mul i32 %tmp.34, %tmp.34		; <i32> [#uses=1]
-	ret i32 %T
-}
+define i32 @f1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: f1
+; CHECK-NEXT: ret i32 0
 
+  %tmp.2 = add i32 %a4, %a3
+  %tmp.4 = add i32 %tmp.2, %a2
+  %tmp.6 = add i32 %tmp.4, %a1
+  %tmp.8 = add i32 %tmp.6, %a0
+  %tmp.11 = add i32 %a3, %a2
+  %tmp.13 = add i32 %tmp.11, %a1
+  %tmp.15 = add i32 %tmp.13, %a0
+  %tmp.18 = add i32 %a2, %a1
+  %tmp.20 = add i32 %tmp.18, %a0
+  %tmp.23 = add i32 %a1, %a0
+  %tmp.26 = sub i32 %tmp.8, %tmp.15
+  %tmp.28 = add i32 %tmp.26, %tmp.20
+  %tmp.30 = sub i32 %tmp.28, %tmp.23
+  %tmp.32 = sub i32 %tmp.30, %a4
+  %tmp.34 = sub i32 %tmp.32, %a2
+  %T = mul i32 %tmp.34, %tmp.34
+  ret i32 %T
+}
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index 384cbc9..f783955 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -reassociate -disable-output
+; RUN: opt < %s -reassociate -S | FileCheck %s
 
-define void @foo() {
-	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp164 = fmul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
-	ret void
-}
+define <4 x float> @test1() {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
+; CHECK-NEXT: %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+; CHECK-NEXT: ret <4 x float> %tmp2
 
+  %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
+  %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+  ret <4 x float> %tmp2
+}
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index fda0ca6..d70bfcb 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -1,46 +1,47 @@
-; With reassociation, constant folding can eliminate the 12 and -12 constants.
-;
-; RUN: opt < %s -reassociate  -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %arg) {
-	%tmp1 = sub i32 -12, %arg
-	%tmp2 = add i32 %tmp1, 12
-	ret i32 %tmp2
-; CHECK-LABEL: @test1(
+  %tmp1 = sub i32 -12, %arg
+  %tmp2 = add i32 %tmp1, 12
+  ret i32 %tmp2
+
+; CHECK-LABEL: @test1
 ; CHECK-NEXT: sub i32 0, %arg
 ; CHECK-NEXT: ret i32
 }
 
 define i32 @test2(i32 %reg109, i32 %reg1111) {
-	%reg115 = add i32 %reg109, -30		; <i32> [#uses=1]
-	%reg116 = add i32 %reg115, %reg1111		; <i32> [#uses=1]
-	%reg117 = add i32 %reg116, 30		; <i32> [#uses=1]
-	ret i32 %reg117
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: add i32 %reg1111, %reg109
-; CHECK-NEXT: ret i32
+  %reg115 = add i32 %reg109, -30
+  %reg116 = add i32 %reg115, %reg1111
+  %reg117 = add i32 %reg116, 30
+  ret i32 %reg117
+
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %reg117 = add i32 %reg1111, %reg109
+; CHECK-NEXT: ret i32 %reg117
 }
 
-@e = external global i32		; <i32*> [#uses=3]
-@a = external global i32		; <i32*> [#uses=3]
-@b = external global i32		; <i32*> [#uses=3]
-@c = external global i32		; <i32*> [#uses=3]
-@f = external global i32		; <i32*> [#uses=3]
+@e = external global i32
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@f = external global i32
 
 define void @test3() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %A, %B		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = (a+b)+c;
-        store i32 %t2, i32* @e
-        ; f = (a+c)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test3(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %A, %B
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = (a+b)+c;
+  store i32 %t2, i32* @e
+  ; f = (a+c)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test3
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -48,19 +49,20 @@ define void @test3() {
 }
 
 define void @test4() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %A, %B		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = c+(a+b)
-        store i32 %t2, i32* @e
-        ; f = (c+a)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test4(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %A, %B
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = c+(a+b)
+  store i32 %t2, i32* @e
+  ; f = (c+a)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test4
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -68,19 +70,20 @@ define void @test4() {
 }
 
 define void @test5() {
-	%A = load i32* @a		; <i32> [#uses=2]
-	%B = load i32* @b		; <i32> [#uses=2]
-	%C = load i32* @c		; <i32> [#uses=2]
-	%t1 = add i32 %B, %A		; <i32> [#uses=1]
-	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
-	%t3 = add i32 %C, %A		; <i32> [#uses=1]
-	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
-	; e = c+(b+a)
-        store i32 %t2, i32* @e
-        ; f = (c+a)+b
-	store i32 %t4, i32* @f
-	ret void
-; CHECK-LABEL: @test5(
+  %A = load i32* @a
+  %B = load i32* @b
+  %C = load i32* @c
+  %t1 = add i32 %B, %A
+  %t2 = add i32 %t1, %C
+  %t3 = add i32 %C, %A
+  %t4 = add i32 %t3, %B
+  ; e = c+(b+a)
+  store i32 %t2, i32* @e
+  ; f = (c+a)+b
+  store i32 %t4, i32* @f
+  ret void
+
+; CHECK-LABEL: @test5
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -88,60 +91,61 @@ define void @test5() {
 }
 
 define i32 @test6() {
-	%tmp.0 = load i32* @a
-	%tmp.1 = load i32* @b
-        ; (a+b)
-	%tmp.2 = add i32 %tmp.0, %tmp.1
-	%tmp.4 = load i32* @c
-	; (a+b)+c
-        %tmp.5 = add i32 %tmp.2, %tmp.4
-	; (a+c)
-        %tmp.8 = add i32 %tmp.0, %tmp.4
-	; (a+c)+b
-        %tmp.11 = add i32 %tmp.8, %tmp.1
-	; X ^ X = 0
-        %RV = xor i32 %tmp.5, %tmp.11
-	ret i32 %RV
-; CHECK-LABEL: @test6(
+  %tmp.0 = load i32* @a
+  %tmp.1 = load i32* @b
+  ; (a+b)
+  %tmp.2 = add i32 %tmp.0, %tmp.1
+  %tmp.4 = load i32* @c
+  ; (a+b)+c
+  %tmp.5 = add i32 %tmp.2, %tmp.4
+  ; (a+c)
+  %tmp.8 = add i32 %tmp.0, %tmp.4
+  ; (a+c)+b
+  %tmp.11 = add i32 %tmp.8, %tmp.1
+  ; X ^ X = 0
+  %RV = xor i32 %tmp.5, %tmp.11
+  ret i32 %RV
+
+; CHECK-LABEL: @test6
 ; CHECK: ret i32 0
 }
 
 ; This should be one add and two multiplies.
 define i32 @test7(i32 %A, i32 %B, i32 %C) {
- ; A*A*B + A*C*A
-	%aa = mul i32 %A, %A
-	%aab = mul i32 %aa, %B
-	%ac = mul i32 %A, %C
-	%aac = mul i32 %ac, %A
-	%r = add i32 %aab, %aac
-	ret i32 %r
-; CHECK-LABEL: @test7(
+  ; A*A*B + A*C*A
+  %aa = mul i32 %A, %A
+  %aab = mul i32 %aa, %B
+  %ac = mul i32 %A, %C
+  %aac = mul i32 %ac, %A
+  %r = add i32 %aab, %aac
+  ret i32 %r
+
+; CHECK-LABEL: @test7
 ; CHECK-NEXT: add i32 %C, %B
 ; CHECK-NEXT: mul i32 
 ; CHECK-NEXT: mul i32 
 ; CHECK-NEXT: ret i32 
 }
 
-
 define i32 @test8(i32 %X, i32 %Y, i32 %Z) {
-	%A = sub i32 0, %X
-	%B = mul i32 %A, %Y
-        ; (-X)*Y + Z -> Z-X*Y
-	%C = add i32 %B, %Z
-	ret i32 %C
-; CHECK-LABEL: @test8(
+  %A = sub i32 0, %X
+  %B = mul i32 %A, %Y
+  ; (-X)*Y + Z -> Z-X*Y
+  %C = add i32 %B, %Z
+  ret i32 %C
+
+; CHECK-LABEL: @test8
 ; CHECK-NEXT: %A = mul i32 %Y, %X
 ; CHECK-NEXT: %C = sub i32 %Z, %A
 ; CHECK-NEXT: ret i32 %C
 }
 
-
 ; PR5458
 define i32 @test9(i32 %X) {
   %Y = mul i32 %X, 47
   %Z = add i32 %Y, %Y
   ret i32 %Z
-; CHECK-LABEL: @test9(
+; CHECK-LABEL: @test9
 ; CHECK-NEXT: mul i32 %X, 94
 ; CHECK-NEXT: ret i32
 }
@@ -150,7 +154,7 @@ define i32 @test10(i32 %X) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK-LABEL: @test10(
+; CHECK-LABEL: @test10
 ; CHECK-NEXT: mul i32 %X, 3
 ; CHECK-NEXT: ret i32
 }
@@ -160,7 +164,7 @@ define i32 @test11(i32 %W) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK-LABEL: @test11(
+; CHECK-LABEL: @test11
 ; CHECK-NEXT: mul i32 %W, 381
 ; CHECK-NEXT: ret i32
 }
@@ -169,11 +173,10 @@ define i32 @test12(i32 %X) {
   %A = sub i32 1, %X
   %B = sub i32 2, %X
   %C = sub i32 3, %X
-
   %Y = add i32 %A ,%B
   %Z = add i32 %Y, %C
   ret i32 %Z
-; CHECK-LABEL: @test12(
+; CHECK-LABEL: @test12
 ; CHECK-NEXT: mul i32 %X, -3
 ; CHECK-NEXT: add i32{{.*}}, 6
 ; CHECK-NEXT: ret i32
@@ -185,7 +188,7 @@ define i32 @test13(i32 %X1, i32 %X2, i32 %X3) {
   %C = mul i32 %X1, %X3  ; X1*X3
   %D = add i32 %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
   ret i32 %D
-; CHECK-LABEL: @test13(
+; CHECK-LABEL: @test13
 ; CHECK-NEXT: sub i32 %X3, %X2
 ; CHECK-NEXT: mul i32 {{.*}}, %X1
 ; CHECK-NEXT: ret i32
@@ -197,9 +200,10 @@ define i32 @test14(i32 %X1, i32 %X2) {
   %C = mul i32 %X2, -47  ; X2*-47
   %D = add i32 %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
   ret i32 %D
-; CHECK-LABEL: @test14(
+
+; CHECK-LABEL: @test14
 ; CHECK-NEXT: sub i32 %X1, %X2
-; CHECK-NEXT: mul i32 {{.*}}, 47
+; CHECK-NEXT: mul i32 %tmp, 47
 ; CHECK-NEXT: ret i32
 }
 
@@ -210,7 +214,6 @@ define i32 @test15(i32 %X1, i32 %X2, i32 %X3) {
   %C = and i1 %A, %B
   %D = select i1 %C, i32 %X1, i32 0
   ret i32 %D
-; CHECK-LABEL: @test15(
+; CHECK-LABEL: @test15
 ; CHECK: and i1 %A, %B
 }
-
diff --git a/test/Transforms/Reassociate/fp-commute.ll b/test/Transforms/Reassociate/fp-commute.ll
index 025689b..eac5b59 100644
--- a/test/Transforms/Reassociate/fp-commute.ll
+++ b/test/Transforms/Reassociate/fp-commute.ll
@@ -1,18 +1,19 @@
 ; RUN: opt -reassociate -S < %s | FileCheck %s
 
-target triple = "armv7-apple-ios"
-
 declare void @use(float)
 
-; CHECK: test
-define void @test(float %x, float %y) {
-entry:
+define void @test1(float %x, float %y) {
+; CHECK-LABEL: test1
 ; CHECK: fmul float %x, %y
 ; CHECK: fmul float %x, %y
-  %0 = fmul float %x, %y
-  %1 = fmul float %y, %x
-  %2 = fsub float %0, %1
-  call void @use(float %0)
-  call void @use(float %2)
+; CHECK: fsub float %1, %2
+; CHECK: call void @use(float %{{.*}})
+; CHECK: call void @use(float %{{.*}})
+
+  %1 = fmul float %x, %y
+  %2 = fmul float %y, %x
+  %3 = fsub float %1, %2
+  call void @use(float %1)
+  call void @use(float %3)
   ret void
 }
diff --git a/test/Transforms/Reassociate/inverses.ll b/test/Transforms/Reassociate/inverses.ll
index afe076c..8500cd8 100644
--- a/test/Transforms/Reassociate/inverses.ll
+++ b/test/Transforms/Reassociate/inverses.ll
@@ -32,3 +32,15 @@ define i32 @test3(i32 %b, i32 %a) {
 ; CHECK: %tmp.5 = add i32 %b, 1234
 ; CHECK: ret i32 %tmp.5
 }
+
+define i32 @test4(i32 %b, i32 %a) {
+        %tmp.1 = add i32 %a, 1234
+        %tmp.2 = add i32 %b, %tmp.1
+        %tmp.4 = xor i32 %a, -1
+        ; (b+(a+1234))+~a -> b+1233
+        %tmp.5 = add i32 %tmp.2, %tmp.4
+        ret i32 %tmp.5
+; CHECK-LABEL: @test4(
+; CHECK: %tmp.5 = add i32 %b, 1233
+; CHECK: ret i32 %tmp.5
+}
diff --git a/test/Transforms/Reassociate/looptest.ll b/test/Transforms/Reassociate/looptest.ll
index 91723bc..aad3b20 100644
--- a/test/Transforms/Reassociate/looptest.ll
+++ b/test/Transforms/Reassociate/looptest.ll
@@ -18,6 +18,7 @@
 
 declare i32 @printf(i8*, ...)
 
+; FIXME: No longer works.
 define void @test(i32 %Num, i32* %Array) {
 bb0:
 	%cond221 = icmp eq i32 0, %Num		; <i1> [#uses=3]
diff --git a/test/Transforms/Reassociate/mightymul.ll b/test/Transforms/Reassociate/mightymul.ll
index cfbc485..ae915da 100644
--- a/test/Transforms/Reassociate/mightymul.ll
+++ b/test/Transforms/Reassociate/mightymul.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -reassociate
+; RUN: opt < %s -reassociate -disable-output
 ; PR13021
 
-define i32 @foo(i32 %x) {
+define i32 @test1(i32 %x) {
   %t0 = mul i32 %x, %x
   %t1 = mul i32 %t0, %t0
   %t2 = mul i32 %t1, %t1
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index d794647..12eaeee 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -28,4 +28,3 @@ define i64 @multistep2(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-NEXT: ret
   ret i64 %t3
 }
-
diff --git a/test/Transforms/Reassociate/negation.ll b/test/Transforms/Reassociate/negation.ll
index 6a3dfd3..12d2c86 100644
--- a/test/Transforms/Reassociate/negation.ll
+++ b/test/Transforms/Reassociate/negation.ll
@@ -1,21 +1,31 @@
-; RUN: opt < %s -reassociate -instcombine -S | not grep sub
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
 ; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
 
 define i32 @test1(i32 %a, i32 %b, i32 %z) {
-	%c = sub i32 0, %z		; <i32> [#uses=1]
-	%d = mul i32 %a, %b		; <i32> [#uses=1]
-	%e = mul i32 %c, %d		; <i32> [#uses=1]
-	%f = mul i32 %e, 12345		; <i32> [#uses=1]
-	%g = sub i32 0, %f		; <i32> [#uses=1]
-	ret i32 %g
+; CHECK-LABEL: test1
+; CHECK-NEXT: %e = mul i32 %a, 12345
+; CHECK-NEXT: %f = mul i32 %e, %b
+; CHECK-NEXT: %g = mul i32 %f, %z
+; CHECK-NEXT: ret i32 %g
+
+  %c = sub i32 0, %z
+  %d = mul i32 %a, %b
+  %e = mul i32 %c, %d
+  %f = mul i32 %e, 12345
+  %g = sub i32 0, %f
+  ret i32 %g
 }
 
 define i32 @test2(i32 %a, i32 %b, i32 %z) {
-	%d = mul i32 %z, 40		; <i32> [#uses=1]
-	%c = sub i32 0, %d		; <i32> [#uses=1]
-	%e = mul i32 %a, %c		; <i32> [#uses=1]
-	%f = sub i32 0, %e		; <i32> [#uses=1]
-	ret i32 %f
-}
+; CHECK-LABEL: test2
+; CHECK-NEXT: %e = mul i32 %a, 40
+; CHECK-NEXT: %f = mul i32 %e, %z
+; CHECK-NEXT: ret i32 %f
 
+  %d = mul i32 %z, 40
+  %c = sub i32 0, %d
+  %e = mul i32 %a, %c
+  %f = sub i32 0, %e
+  ret i32 %f
+}
diff --git a/test/Transforms/Reassociate/otherops.ll b/test/Transforms/Reassociate/otherops.ll
index d68d008..7718881 100644
--- a/test/Transforms/Reassociate/otherops.ll
+++ b/test/Transforms/Reassociate/otherops.ll
@@ -1,28 +1,42 @@
 ; Reassociation should apply to Add, Mul, And, Or, & Xor
 ;
-; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep 12
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | FileCheck %s
 
 define i32 @test_mul(i32 %arg) {
-	%tmp1 = mul i32 12, %arg		; <i32> [#uses=1]
-	%tmp2 = mul i32 %tmp1, 12		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_mul
+; CHECK-NEXT: %tmp2 = mul i32 %arg, 144
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = mul i32 12, %arg
+  %tmp2 = mul i32 %tmp1, 12
+  ret i32 %tmp2
 }
 
 define i32 @test_and(i32 %arg) {
-	%tmp1 = and i32 14, %arg		; <i32> [#uses=1]
-	%tmp2 = and i32 %tmp1, 14		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_and
+; CHECK-NEXT: %tmp2 = and i32 %arg, 14
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = and i32 14, %arg
+  %tmp2 = and i32 %tmp1, 14
+  ret i32 %tmp2
 }
 
 define i32 @test_or(i32 %arg) {
-	%tmp1 = or i32 14, %arg		; <i32> [#uses=1]
-	%tmp2 = or i32 %tmp1, 14		; <i32> [#uses=1]
-	ret i32 %tmp2
+; CHECK-LABEL: test_or
+; CHECK-NEXT: %tmp2 = or i32 %arg, 14
+; CHECK-NEXT: ret i32 %tmp2
+
+  %tmp1 = or i32 14, %arg
+  %tmp2 = or i32 %tmp1, 14
+  ret i32 %tmp2
 }
 
 define i32 @test_xor(i32 %arg) {
-	%tmp1 = xor i32 12, %arg		; <i32> [#uses=1]
-	%tmp2 = xor i32 %tmp1, 12		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
+; CHECK-LABEL: test_xor
+; CHECK-NEXT: ret i32 %arg
 
+  %tmp1 = xor i32 12, %arg
+  %tmp2 = xor i32 %tmp1, 12
+  ret i32 %tmp2
+}
diff --git a/test/Transforms/Reassociate/shift-factor.ll b/test/Transforms/Reassociate/shift-factor.ll
index 73af5e5..8fbf1b9 100644
--- a/test/Transforms/Reassociate/shift-factor.ll
+++ b/test/Transforms/Reassociate/shift-factor.ll
@@ -1,12 +1,14 @@
 ; There should be exactly one shift and one add left.
-; RUN: opt < %s -reassociate -instcombine -S > %t
-; RUN: grep shl %t | count 1
-; RUN: grep add %t | count 1
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %X, i32 %Y) {
-	%tmp.2 = shl i32 %X, 1		; <i32> [#uses=1]
-	%tmp.6 = shl i32 %Y, 1		; <i32> [#uses=1]
-	%tmp.4 = add i32 %tmp.6, %tmp.2		; <i32> [#uses=1]
-	ret i32 %tmp.4
-}
+define i32 @test1(i32 %X, i32 %Y) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp = add i32 %Y, %X
+; CHECK-NEXT: %tmp1 = shl i32 %tmp, 1
+; CHECK-NEXT: ret i32 %tmp1
 
+  %tmp.2 = shl i32 %X, 1
+  %tmp.6 = shl i32 %Y, 1
+  %tmp.4 = add i32 %tmp.6, %tmp.2
+  ret i32 %tmp.4
+}
diff --git a/test/Transforms/Reassociate/subtest.ll b/test/Transforms/Reassociate/subtest.ll
index 4c63d12..e6263d8 100644
--- a/test/Transforms/Reassociate/subtest.ll
+++ b/test/Transforms/Reassociate/subtest.ll
@@ -1,11 +1,26 @@
-; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
-;
-; RUN: opt < %s -reassociate -instcombine -S | not grep 12
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-define i32 @test(i32 %A, i32 %B) {
-	%X = add i32 -12, %A		; <i32> [#uses=1]
-	%Y = sub i32 %X, %B		; <i32> [#uses=1]
-	%Z = add i32 %Y, 12		; <i32> [#uses=1]
-	ret i32 %Z
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: %Z = sub i32 %A, %B
+; CHECK-NEXT: ret i32 %Z
+  %X = add i32 -12, %A
+  %Y = sub i32 %X, %B
+  %Z = add i32 %Y, 12
+  ret i32 %Z
 }
 
+; PR2047
+; With sub reassociation, constant folding can eliminate the uses of %a.
+define i32 @test2(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %sum = add i32 %c, %b
+; CHECK-NEXT: %tmp7 = sub i32 0, %sum
+; CHECK-NEXT: ret i32 %tmp7
+
+  %tmp3 = sub i32 %a, %b
+  %tmp5 = sub i32 %tmp3, %c
+  %tmp7 = sub i32 %tmp5, %a
+  ret i32 %tmp7
+}
diff --git a/test/Transforms/Reassociate/subtest2.ll b/test/Transforms/Reassociate/subtest2.ll
deleted file mode 100644
index 0513c5f..0000000
--- a/test/Transforms/Reassociate/subtest2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; With sub reassociation, constant folding can eliminate the uses of %a.
-;
-; RUN: opt < %s -reassociate -instcombine -S | grep %a | count 1
-; PR2047
-
-define i32 @test(i32 %a, i32 %b, i32 %c) nounwind  {
-entry:
-	%tmp3 = sub i32 %a, %b		; <i32> [#uses=1]
-	%tmp5 = sub i32 %tmp3, %c		; <i32> [#uses=1]
-	%tmp7 = sub i32 %tmp5, %a		; <i32> [#uses=1]
-	ret i32 %tmp7
-}
-
diff --git a/test/Transforms/SCCP/atomic.ll b/test/Transforms/SCCP/atomic.ll
new file mode 100644
index 0000000..60d4896
--- /dev/null
+++ b/test/Transforms/SCCP/atomic.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+define i1 @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg
+; CHECK: cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %val = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %res = extractvalue { i32, i1 } %val, 1
+  ret i1 %res
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
index c420349..7184443 100644
--- a/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
index 5fc35d8..236e1d3 100644
--- a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM' in targets:
+if not 'ARM' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
index 9e0ab99..4086e8d 100644
--- a/test/Transforms/SLPVectorizer/R600/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'R600' in targets:
+if not 'R600' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
new file mode 100644
index 0000000..8303bc8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [4 x i32] zeroinitializer, align 16
+@c = common global [4 x i32] zeroinitializer, align 16
+@d = common global [4 x i32] zeroinitializer, align 16
+@e = common global [4 x i32] zeroinitializer, align 16
+@a = common global [4 x i32] zeroinitializer, align 16
+@fb = common global [4 x float] zeroinitializer, align 16
+@fc = common global [4 x float] zeroinitializer, align 16
+@fa = common global [4 x float] zeroinitializer, align 16
+
+; CHECK-LABEL: @addsub
+; CHECK: %5 = add <4 x i32> %3, %4
+; CHECK: %6 = add <4 x i32> %2, %5
+; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @addsub() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %add2 = add nsw i32 %add, %add1
+  store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add4 = add nsw i32 %6, %7
+  %sub = sub nsw i32 %add3, %add4
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %add7 = add nsw i32 %add5, %add6
+  store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %sub10 = sub nsw i32 %add8, %add9
+  store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @subadd
+; CHECK:  %5 = add <4 x i32> %3, %4
+; CHECK:  %6 = sub <4 x i32> %2, %5
+; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @subadd() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %sub = sub nsw i32 %add, %add1
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add2 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %6, %7
+  %add4 = add nsw i32 %add2, %add3
+  store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %sub7 = sub nsw i32 %add5, %add6
+  store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %add10 = add nsw i32 %add8, %add9
+  store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @faddfsub
+; CHECK: %2 = fadd <4 x float> %0, %1
+; CHECK: %3 = fsub <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %sub = fsub float %2, %3
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add1 = fadd float %4, %5
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub2 = fsub float %6, %7
+  store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @fsubfadd
+; CHECK: %2 = fsub <4 x float> %0, %1
+; CHECK: %3 = fadd <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @fsubfadd() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add = fadd float %2, %3
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %sub1 = fsub float %4, %5
+  store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %add2 = fadd float %6, %7
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @No_faddfsub
+; CHECK-NOT: fadd <4 x float>
+; CHECK-NOT: fsub <4 x float>
+; CHECK-NOT: shufflevector
+; Function Attrs: nounwind uwtable
+define void @No_faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add1 = fadd float %2, %3
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add2 = fadd float %4, %5
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub = fsub float %6, %7
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll
new file mode 100644
index 0000000..9e105ec
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test if SLP can handle GEP expressions.
+; The test perform the following action:
+;   x->first  = y->first  + 16
+;   x->second = y->second + 16
+
+; CHECK-LABEL: foo1
+; CHECK: <2 x i32*>
+define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i64 16
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i64 16
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}
+
+; Test that we don't vectorize GEP expressions if indexes are not constants.
+; We can't produce an efficient code in that case.
+; CHECK-LABEL: foo2
+; CHECK-NOT: <2 x i32*>
+define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+  %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+  %2 = load i32** %1, align 8
+  %3 = getelementptr inbounds i32* %2, i32 %i
+  %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+  store i32* %3, i32** %4, align 8
+  %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+  %6 = load i32** %5, align 8
+  %7 = getelementptr inbounds i32* %6, i32 %i
+  %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+  store i32* %7, i32** %8, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 30c5093..937252f 100644
--- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -117,3 +117,270 @@ entry:
 ; CHECK: store <4 x i32>
 ; CHECK: ret
 }
+
+declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
+
+define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_ctlz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.ctlz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_ctlz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
+
+}
+
+
+declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
+
+define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_cttz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.cttz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
+
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_cttz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
+}
+
+
+declare float @llvm.powi.f32(float, i32)
+define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
+entry:
+  %i0 = load float* %a, align 4
+  %i1 = load float* %b, align 4
+  %add1 = fadd float %i0, %i1
+  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds float* %a, i32 1
+  %i2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %b, i32 1
+  %i3 = load float* %arrayidx3, align 4
+  %add2 = fadd float %i2, %i3
+  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds float* %a, i32 2
+  %i4 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %b, i32 2
+  %i5 = load float* %arrayidx5, align 4
+  %add3 = fadd float %i4, %i5
+  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds float* %a, i32 3
+  %i6 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %b, i32 3
+  %i7 = load float* %arrayidx7, align 4
+  %add4 = fadd float %i6, %i7
+  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_powi_f32(
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: call <4 x float> @llvm.powi.v4f32
+; CHECK: store <4 x float>
+; CHECK: ret
+}
+
+
+define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
+entry:
+  %i0 = load float* %a, align 4
+  %i1 = load float* %b, align 4
+  %add1 = fadd float %i0, %i1
+  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds float* %a, i32 1
+  %i2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %b, i32 1
+  %i3 = load float* %arrayidx3, align 4
+  %add2 = fadd float %i2, %i3
+  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds float* %a, i32 2
+  %i4 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %b, i32 2
+  %i5 = load float* %arrayidx5, align 4
+  %add3 = fadd float %i4, %i5
+  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds float* %a, i32 3
+  %i6 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %b, i32 3
+  %i7 = load float* %arrayidx7, align 4
+  %add4 = fadd float %i6, %i7
+  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_powi_f32_neg(
+; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
+}
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/SLPVectorizer/X86/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SLPVectorizer/X86/pr19657.ll b/test/Transforms/SLPVectorizer/X86/pr19657.ll
new file mode 100644
index 0000000..9352308
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: load <2 x double>*
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+
+; Function Attrs: nounwind uwtable
+define void @foo(double* %x) #0 {
+  %1 = alloca double*, align 8
+  store double* %x, double** %1, align 8
+  %2 = load double** %1, align 8
+  %3 = getelementptr inbounds double* %2, i64 0
+  %4 = load double* %3, align 8
+  %5 = load double** %1, align 8
+  %6 = getelementptr inbounds double* %5, i64 0
+  %7 = load double* %6, align 8
+  %8 = fadd double %4, %7
+  %9 = load double** %1, align 8
+  %10 = getelementptr inbounds double* %9, i64 0
+  %11 = load double* %10, align 8
+  %12 = fadd double %8, %11
+  %13 = load double** %1, align 8
+  %14 = getelementptr inbounds double* %13, i64 0
+  store double %12, double* %14, align 8
+  %15 = load double** %1, align 8
+  %16 = getelementptr inbounds double* %15, i64 1
+  %17 = load double* %16, align 8
+  %18 = load double** %1, align 8
+  %19 = getelementptr inbounds double* %18, i64 1
+  %20 = load double* %19, align 8
+  %21 = fadd double %17, %20
+  %22 = load double** %1, align 8
+  %23 = getelementptr inbounds double* %22, i64 1
+  %24 = load double* %23, align 8
+  %25 = fadd double %21, %24
+  %26 = load double** %1, align 8
+  %27 = getelementptr inbounds double* %26, i64 1
+  store double %25, double* %27, align 8
+  %28 = load double** %1, align 8
+  %29 = getelementptr inbounds double* %28, i64 2
+  %30 = load double* %29, align 8
+  %31 = load double** %1, align 8
+  %32 = getelementptr inbounds double* %31, i64 2
+  %33 = load double* %32, align 8
+  %34 = fadd double %30, %33
+  %35 = load double** %1, align 8
+  %36 = getelementptr inbounds double* %35, i64 2
+  %37 = load double* %36, align 8
+  %38 = fadd double %34, %37
+  %39 = load double** %1, align 8
+  %40 = getelementptr inbounds double* %39, i64 2
+  store double %38, double* %40, align 8
+  %41 = load double** %1, align 8
+  %42 = getelementptr inbounds double* %41, i64 3
+  %43 = load double* %42, align 8
+  %44 = load double** %1, align 8
+  %45 = getelementptr inbounds double* %44, i64 3
+  %46 = load double* %45, align 8
+  %47 = fadd double %43, %46
+  %48 = load double** %1, align 8
+  %49 = getelementptr inbounds double* %48, i64 3
+  %50 = load double* %49, align 8
+  %51 = fadd double %47, %50
+  %52 = load double** %1, align 8
+  %53 = getelementptr inbounds double* %52, i64 3
+  store double %51, double* %53, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
index 4d17d46..bb48713 100644
--- a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'XCore' in targets:
+if not 'XCore' in config.root.targets:
     config.unsupported = True
diff --git a/test/Transforms/SROA/slice-order-independence.ll b/test/Transforms/SROA/slice-order-independence.ll
new file mode 100644
index 0000000..364ef85
--- /dev/null
+++ b/test/Transforms/SROA/slice-order-independence.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; Check that the chosen type for a split is independent from the order of
+; slices even in case of types that are skipped because their width is not a
+; byte width multiple
+define void @skipped_inttype_first({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_first
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63* %pb0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8** %pb1
+  ret void
+}
+
+define void @skipped_inttype_last({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_last
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8** %pb1
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63* %pb0
+  ret void
+}
diff --git a/test/Transforms/SROA/slice-width.ll b/test/Transforms/SROA/slice-width.ll
new file mode 100644
index 0000000..179780b
--- /dev/null
+++ b/test/Transforms/SROA/slice-width.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @no_split_on_non_byte_width(i32) {
+; This tests that allocas are not split into slices that are not byte width multiple
+  %arg = alloca i32 , align 8
+  store i32 %0, i32* %arg
+  br label %load_i32
+
+load_i32:
+; CHECK-LABEL: load_i32:
+; CHECK-NOT: bitcast {{.*}} to i1
+; CHECK-NOT: zext i1
+  %r0 = load i32* %arg
+  br label %load_i1
+
+load_i1:
+; CHECK-LABEL: load_i1:
+; CHECK: bitcast {{.*}} to i1
+  %p1 = bitcast i32* %arg to i1*
+  %t1 = load i1* %p1
+  ret void
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
index 40532cd..a5e90f8 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'NVPTX' in targets:
+if not 'NVPTX' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 850fc4c..c07440c 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
 ; RUN: opt < %s -S -separate-const-offset-from-gep -gvn -dce | FileCheck %s --check-prefix=IR
 
@@ -20,6 +19,90 @@ target triple = "nvptx64-unknown-unknown"
 
 define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
 .preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i32 %y, 1
+  %7 = sext i32 %6 to i64
+  %8 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
+  %9 = addrspacecast float addrspace(3)* %8 to float*
+  %10 = load float* %9, align 4
+  %11 = fadd float %5, %10
+  %12 = add i32 %x, 1
+  %13 = sext i32 %12 to i64
+  %14 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
+  %15 = addrspacecast float addrspace(3)* %14 to float*
+  %16 = load float* %15, align 4
+  %17 = fadd float %11, %16
+  %18 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
+  %19 = addrspacecast float addrspace(3)* %18 to float*
+  %20 = load float* %19, align 4
+  %21 = fadd float %17, %20
+  store float %21, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
+
+; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
+; the order of "sext" and "add" when computing the array indices. @sum_of_array
+; computes add before sext, e.g., array[sext(x + 1)][sext(y + 1)], while
+; @sum_of_array2 computes sext before add,
+; e.g., array[sext(x) + 1][sext(y) + 1]. SeparateConstOffsetFromGEP should be
+; able to extract constant offsets from both forms.
+define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i64 %0, 1
+  %7 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
+  %8 = addrspacecast float addrspace(3)* %7 to float*
+  %9 = load float* %8, align 4
+  %10 = fadd float %5, %9
+  %11 = add i64 %1, 1
+  %12 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
+  %13 = addrspacecast float addrspace(3)* %12 to float*
+  %14 = load float* %13, align 4
+  %15 = fadd float %10, %14
+  %16 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
+  %17 = addrspacecast float addrspace(3)* %16 to float*
+  %18 = load float* %17, align 4
+  %19 = fadd float %15, %18
+  store float %19, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array2(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array2(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
+
+; Similar to @sum_of_array3, but extends array indices using zext instead of
+; sext. e.g., array[zext(x + 1)][zext(y + 1)].
+define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
   %0 = zext i32 %y to i64
   %1 = zext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
@@ -45,15 +128,14 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
   store float %21, float* %output, align 4
   ret void
 }
-
-; PTX-LABEL: sum_of_array(
+; PTX-LABEL: sum_of_array3(
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
 
-; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y
+; IR-LABEL: @sum_of_array3(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2e50f5f..ed40c7e 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -23,71 +23,94 @@ entry:
   %p = getelementptr inbounds [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
   ret double* %p
 }
-; CHECK-LABEL: @struct
-; CHECK: getelementptr [1024 x %struct.S]* @struct_array, i64 0, i32 %i, i32 1
+; CHECK-LABEL: @struct(
+; CHECK: getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1
 
-; We should be able to trace into sext/zext if it's directly used as a GEP
-; index.
-define float* @sext_zext(i32 %i, i32 %j) {
+; We should be able to trace into sext(a + b) if a + b is non-negative
+; (e.g., used as an index of an inbounds GEP) and one of a and b is
+; non-negative.
+define float* @sext_add(i32 %i, i32 %j) {
 entry:
-  %i1 = add i32 %i, 1
-  %j2 = add i32 %j, 2
-  %i1.ext = sext i32 %i1 to i64
-  %j2.ext = zext i32 %j2 to i64
-  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i1.ext, i64 %j2.ext
+  %0 = add i32 %i, 1
+  %1 = sext i32 %0 to i64  ; inbound sext(i + 1) = sext(i) + 1
+  %2 = add i32 %j, -2
+  ; However, inbound sext(j + -2) != sext(j) + -2, e.g., j = INT_MIN
+  %3 = sext i32 %2 to i64
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
   ret float* %p
 }
-; CHECK-LABEL: @sext_zext
-; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j
-; CHECK: getelementptr float* %{{[0-9]+}}, i64 34
+; CHECK-LABEL: @sext_add(
+; CHECK-NOT: = add
+; CHECK: add i32 %j, -2
+; CHECK: sext
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 32
 
 ; We should be able to trace into sext/zext if it can be distributed to both
 ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
+;
+; This test verifies we can transform
+;   gep base, a + sext(b +nsw 1), c + zext(d +nuw 1)
+; to
+;   gep base, a + sext(b), c + zext(d); gep ..., 1 * 32 + 1
 define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
   %b1 = add nsw i32 %b, 1
   %b2 = sext i32 %b1 to i64
-  %i = add i64 %a, %b2
+  %i = add i64 %a, %b2       ; i = a + sext(b +nsw 1)
   %d1 = add nuw i32 %d, 1
   %d2 = zext i32 %d1 to i64
-  %j = add i64 %c, %d2
+  %j = add i64 %c, %d2       ; j = c + zext(d +nuw 1)
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
   ret float* %p
 }
-; CHECK-LABEL: @ext_add_no_overflow
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
+; CHECK-LABEL: @ext_add_no_overflow(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 33
 
-; Similar to @ext_add_no_overflow, we should be able to trace into sext/zext if
-; its operand is an "or" instruction.
-define float* @ext_or(i64 %a, i32 %b) {
+; Verifies we handle nested sext/zext correctly.
+define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
+entry:
+  %0 = add nsw nuw i32 %a, 1
+  %1 = sext i32 %0 to i48
+  %2 = zext i48 %1 to i64    ; zext(sext(a +nsw nuw 1)) = zext(sext(a)) + 1
+  %3 = add nsw i32 %b, 2
+  %4 = sext i32 %3 to i48
+  %5 = zext i48 %4 to i64    ; zext(sext(b +nsw 2)) != zext(sext(b)) + 2
+  %p1 = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %2, i64 %5
+  store float* %p1, float** %out1
+  %6 = add nuw i32 %a, 3
+  %7 = zext i32 %6 to i48
+  %8 = sext i48 %7 to i64 ; sext(zext(a +nuw 3)) = zext(a +nuw 3) = zext(a) + 3
+  %9 = add nsw i32 %b, 4
+  %10 = zext i32 %9 to i48
+  %11 = sext i48 %10 to i64  ; sext(zext(b +nsw 4)) != zext(b) + 4
+  %p2 = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %8, i64 %11
+  store float* %p2, float** %out2
+  ret void
+}
+; CHECK-LABEL: @sext_zext(
+; CHECK: [[BASE_PTR_1:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR_1]], i64 32
+; CHECK: [[BASE_PTR_2:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR_2]], i64 96
+
+; Similar to @ext_add_no_overflow, we should be able to trace into s/zext if
+; its operand is an OR and the two operands of the OR have no common bits.
+define float* @sext_or(i64 %a, i32 %b) {
 entry:
   %b1 = shl i32 %b, 2
-  %b2 = or i32 %b1, 1
-  %b3 = or i32 %b1, 2
-  %b2.ext = sext i32 %b2 to i64
+  %b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
+  %b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
+  %b2.ext = zext i32 %b2 to i64
   %b3.ext = sext i32 %b3 to i64
   %i = add i64 %a, %b2.ext
   %j = add i64 %a, %b3.ext
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
   ret float* %p
 }
-; CHECK-LABEL: @ext_or
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
-; CHECK: getelementptr float* [[BASE_PTR]], i64 34
-
-; We should treat "or" with no common bits (%k) as "add", and leave "or" with
-; potentially common bits (%l) as is.
-define float* @or(i64 %i) {
-entry:
-  %j = shl i64 %i, 2
-  %k = or i64 %j, 3 ; no common bits
-  %l = or i64 %j, 4 ; potentially common bits
-  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %k, i64 %l
-  ret float* %p
-}
-; CHECK-LABEL: @or
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l
-; CHECK: getelementptr float* [[BASE_PTR]], i64 96
+; CHECK-LABEL: @sext_or(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 32
 
 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
 ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -100,11 +123,28 @@ entry:
   store i64 %b5, i64* %out
   ret float* %p
 }
-; CHECK-LABEL: @expr
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0
+; CHECK-LABEL: @expr(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 160
 ; CHECK: store i64 %b5, i64* %out
 
+; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
+define float* @sext_expr(i32 %a, i32 %b, i32 %c, i64 %d) {
+entry:
+  %0 = add nsw i32 %c, 8
+  %1 = add nsw i32 %b, %0
+  %2 = add nsw i32 %a, %1
+  %3 = sext i32 %2 to i64
+  %i = add i64 %d, %3
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: @sext_expr(
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 8
+
 ; Verifies we handle "sub" correctly.
 define float* @sub(i64 %i, i64 %j) {
   %i2 = sub i64 %i, 5 ; i - 5
@@ -112,9 +152,9 @@ define float* @sub(i64 %i, i64 %j) {
   %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i2, i64 %j2
   ret float* %p
 }
-; CHECK-LABEL: @sub
-; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
+; CHECK-LABEL: @sub(
+; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
 ; CHECK: getelementptr float* [[BASE_PTR]], i64 -155
 
 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
@@ -130,8 +170,92 @@ entry:
   %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom
   ret i64* %arrayidx3
 }
-; CHECK-LABEL: @packed_struct
-; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i32 %i, i32 1, i32 %j
-; CHECK: [[CASTED_PTR:%[0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
+; CHECK-LABEL: @packed_struct(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
 ; CHECK: %uglygep = getelementptr i8* [[CASTED_PTR]], i64 100
 ; CHECK: bitcast i8* %uglygep to i64*
+
+; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
+; because "zext(b + 8) != zext(b) + 8"
+define float* @zext_expr(i32 %a, i32 %b) {
+entry:
+  %0 = add i32 %b, 8
+  %1 = add nuw i32 %a, %0
+  %i = zext i32 %1 to i64
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: zext_expr(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+
+; Per http://llvm.org/docs/LangRef.html#id181, the indices of a off-bound gep
+; should be considered sign-extended to the pointer size. Therefore,
+;   gep base, (add i32 a, b) != gep (gep base, i32 a), i32 b
+; because
+;   sext(a + b) != sext(a) + sext(b)
+;
+; This test verifies we do not illegitimately extract the 8 from
+;   gep base, (i32 a + 8)
+define float* @i32_add(i32 %a) {
+entry:
+  %i = add i32 %a, 8
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i32 %i
+  ret float* %p
+}
+; CHECK-LABEL: @i32_add(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK-NOT: getelementptr
+
+; Verifies that we compute the correct constant offset when the index is
+; sign-extended and then zero-extended. The old version of our code failed to
+; handle this case because it simply computed the constant offset as the
+; sign-extended value of the constant part of the GEP index.
+define float* @apint(i1 %a) {
+entry:
+  %0 = add nsw nuw i1 %a, 1
+  %1 = sext i1 %0 to i4
+  %2 = zext i4 %1 to i64         ; zext (sext i1 1 to i4) to i64 = 15
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %2
+  ret float* %p
+}
+; CHECK-LABEL: @apint(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 15
+
+; Do not trace into binary operators other than ADD, SUB, and OR.
+define float* @and(i64 %a) {
+entry:
+  %0 = shl i64 %a, 2
+  %1 = and i64 %0, 1
+  %p = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %1
+  ret float* %p
+}
+; CHECK-LABEL: @and(
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array
+; CHECK-NOT: getelementptr
+
+; if zext(a + b) <= max signed value of typeof(a + b), then we can prove
+; a + b >= 0 and zext(a + b) == sext(a + b). If we can prove further a or b is
+; non-negative, we have zext(a + b) == sext(a) + sext(b).
+define float* @inbounds_zext_add(i32 %i, i4 %j) {
+entry:
+  %0 = add i32 %i, 1
+  %1 = zext i32 %0 to i64
+  ; Because zext(i + 1) is an index of an in bounds GEP based on
+  ; float_2d_array, zext(i + 1) <= sizeof(float_2d_array) = 4096.
+  ; Furthermore, since typeof(i + 1) is i32 and 4096 < 2^31, we are sure the
+  ; sign bit of i + 1 is 0. This implies zext(i + 1) = sext(i + 1).
+  %2 = add i4 %j, 2
+  %3 = zext i4 %2 to i64
+  ; In this case, typeof(j + 2) is i4, so zext(j + 2) <= 4096 does not imply
+  ; the sign bit of j + 2 is 0.
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
+  ret float* %p
+}
+; CHECK-LABEL: @inbounds_zext_add(
+; CHECK-NOT: add
+; CHECK: add i4 %j, 2
+; CHECK: sext
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 32
diff --git a/test/Transforms/SimplifyCFG/PR17073.ll b/test/Transforms/SimplifyCFG/PR17073.ll
new file mode 100644
index 0000000..8dc9fb2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR17073.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; In PR17073 ( http://llvm.org/pr17073 ), we illegally hoisted an operation that can trap.
+; The first test confirms that we don't do that when the trapping op is reached by the current BB (block1).
+; The second test confirms that we don't do that when the trapping op is reached by the previous BB (entry).
+; The third test confirms that we can still do this optimization for an operation (add) that doesn't trap.
+; The tests must be complicated enough to prevent previous SimplifyCFG actions from optimizing away
+; the instructions that we're checking for.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+@a = common global i32 0, align 4
+@b = common global i8 0, align 1
+
+; CHECK-LABEL: can_trap1 
+; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
+; CHECK-NOT: select i1 %tobool, i32* null, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a) 
+define i32* @can_trap1() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ null, %entry ],[ null, %block2 ], [ select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %block1 ]
+  ret i32* %storemerge
+}
+
+; CHECK-LABEL: can_trap2 
+; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
+; CHECK-NOT: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
+define i32* @can_trap2() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %entry ],[ null, %block2 ], [ null, %block1 ]
+  ret i32* %storemerge
+}
+
+; CHECK-LABEL: cannot_trap 
+; CHECK: select i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), i32* select (i1 icmp eq (i64 add (i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64), i64 2), i64 0), i32* null, i32* @a), i32* null
+define i32* @cannot_trap() {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %exit, label %block1
+
+block1:
+  br i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), label %exit, label %block2
+
+block2:
+  br label %exit
+
+exit:
+  %storemerge = phi i32* [ null, %entry ],[ null, %block2 ], [ select (i1 icmp eq (i64 add (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), %block1 ]
+  ret i32* %storemerge
+}
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
index 4d344fa..fa6a54e 100644
--- a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'Sparc' in targets:
+if not 'Sparc' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/SimplifyCFG/X86/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 81079b1..51ced40 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -918,3 +918,58 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; Don't build tables for switches with TLS variables.
+@tls_a = thread_local global i32 0
+@tls_b = thread_local global i32 0
+@tls_c = thread_local global i32 0
+@tls_d = thread_local global i32 0
+define i32* @tls(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @tls(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
+
+; Don't build tables for switches with dllimport variables.
+@dllimport_a = external dllimport global [3x i32]
+@dllimport_b = external dllimport global [3x i32]
+@dllimport_c = external dllimport global [3x i32]
+@dllimport_d = external dllimport global [3x i32]
+define i32* @dllimport(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ getelementptr inbounds ([3 x i32]* @dllimport_d, i32 0, i32 0), %sw.default ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
+                       [ getelementptr inbounds ([3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @dllimport(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
diff --git a/test/Transforms/SimplifyCFG/speculate-vector-ops.ll b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll
new file mode 100644
index 0000000..91972eb
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-vector-ops.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define i32 @speculate_vector_extract(i32 %d, <4 x i32> %v) #0 {
+; CHECK-LABEL: @speculate_vector_extract(
+; CHECK-NOT: br
+entry:
+  %conv = insertelement <4 x i32> undef, i32 %d, i32 0
+  %conv2 = insertelement <4 x i32> %conv, i32 %d, i32 1
+  %conv3 = insertelement <4 x i32> %conv2, i32 %d, i32 2
+  %conv4 = insertelement <4 x i32> %conv3, i32 %d, i32 3
+  %tmp6 = add nsw <4 x i32> %conv4, <i32 0, i32 -1, i32 -2, i32 -3>
+  %cmp = icmp eq <4 x i32> %tmp6, zeroinitializer
+  %cmp.ext = sext <4 x i1> %cmp to <4 x i32>
+  %tmp8 = extractelement <4 x i32> %cmp.ext, i32 0
+  %tobool = icmp eq i32 %tmp8, 0
+  br i1 %tobool, label %cond.else, label %cond.then
+
+return:                                           ; preds = %cond.end28
+  ret i32 %cond32
+
+cond.then:                                        ; preds = %entry
+  %tmp10 = extractelement <4 x i32> %v, i32 0
+  br label %cond.end
+
+cond.else:                                        ; preds = %entry
+  %tmp12 = extractelement <4 x i32> %v, i32 3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.else, %cond.then
+  %cond = phi i32 [ %tmp10, %cond.then ], [ %tmp12, %cond.else ]
+  %tmp14 = extractelement <4 x i32> %cmp.ext, i32 1
+  %tobool15 = icmp eq i32 %tmp14, 0
+  br i1 %tobool15, label %cond.else17, label %cond.then16
+
+cond.then16:                                      ; preds = %cond.end
+  %tmp20 = extractelement <4 x i32> %v, i32 1
+  br label %cond.end18
+
+cond.else17:                                      ; preds = %cond.end
+  br label %cond.end18
+
+cond.end18:                                       ; preds = %cond.else17, %cond.then16
+  %cond22 = phi i32 [ %tmp20, %cond.then16 ], [ %cond, %cond.else17 ]
+  %tmp24 = extractelement <4 x i32> %cmp.ext, i32 2
+  %tobool25 = icmp eq i32 %tmp24, 0
+  br i1 %tobool25, label %cond.else27, label %cond.then26
+
+cond.then26:                                      ; preds = %cond.end18
+  %tmp30 = extractelement <4 x i32> %v, i32 2
+  br label %cond.end28
+
+cond.else27:                                      ; preds = %cond.end18
+  br label %cond.end28
+
+cond.end28:                                       ; preds = %cond.else27, %cond.then26
+  %cond32 = phi i32 [ %tmp30, %cond.then26 ], [ %cond22, %cond.else27 ]
+  br label %return
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
index ba763cf..e71f3cc 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
index 19840aa..c8625f4 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -1,3 +1,2 @@
-targets = set(config.root.targets_to_build.split())
-if not 'X86' in targets:
+if not 'X86' in config.root.targets:
     config.unsupported = True
author	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
committer	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
commit	c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree	81b7dd2bb4370a392f31d332a566c903b5744764 /test/Transforms
parent	19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download	external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2