74 files changed, 3786 insertions, 446 deletions
diff --git a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
index acb259b..b729677 100644
--- a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
+++ b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "ret i1 false"
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR2359
+
+; CHECK-LABEL: @f(
+; CHECK: ret i1 false
 define i1 @f(i8* %x) {
 entry:
        %tmp462 = load i8* %x, align 1          ; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
index f33a1f5..f1af7ce 100644
--- a/test/Transforms/InstCombine/2008-11-08-FCmp.ll
+++ b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -4,6 +4,7 @@
 ; When inst combining an FCMP with the LHS coming from a uitofp instruction, we
 ; can't lower it to signed ICMP instructions.
 
+; CHECK-LABEL: @test1(
 define i1 @test1(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ole double %1, 0.000000e+00
@@ -11,6 +12,7 @@ define i1 @test1(i32 %val) {
   ret i1 %2
 }
 
+; CHECK-LABEL: @test2(
 define i1 @test2(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, 0.000000e+00
@@ -18,6 +20,7 @@ define i1 @test2(i32 %val) {
 ; CHECK: ret i1 false
 }
 
+; CHECK-LABEL: @test3(
 define i1 @test3(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp oge double %1, 0.000000e+00
@@ -25,6 +28,7 @@ define i1 @test3(i32 %val) {
 ; CHECK: ret i1 true
 }
 
+; CHECK-LABEL: @test4(
 define i1 @test4(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, 0.000000e+00
@@ -32,6 +36,7 @@ define i1 @test4(i32 %val) {
   ret i1 %2
 }
 
+; CHECK-LABEL: @test5(
 define i1 @test5(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, -4.400000e+00
@@ -39,6 +44,7 @@ define i1 @test5(i32 %val) {
 ; CHECK: ret i1 true
 }
 
+; CHECK-LABEL: @test6(
 define i1 @test6(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, -4.400000e+00
@@ -48,6 +54,7 @@ define i1 @test6(i32 %val) {
 
 ; Check that optimizing unsigned >= comparisons correctly distinguishes
 ; positive and negative constants.  <rdar://problem/12029145>
+; CHECK-LABEL: @test7(
 define i1 @test7(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp oge double %1, 3.200000e+00
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index 895b260..c8f0351 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -50,7 +50,8 @@ define void @fu2(i32 %parm) nounwind ssp {
   %7 = add  i32 %6, 2048
 ; CHECK: alloca i8
   %8 = alloca i8, i32 %7
-; CHECK-NEXT: bitcast i8*
+; CHECK-NEXT: bitcast double**
+; CHECK-NEXT: store i8*
   %9 = bitcast i8* %8 to double*
   store double* %9, double** %ptr, align 4
   br label %10
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
index 8f3d429..bebfd62 100644
--- a/test/Transforms/InstCombine/AddOverFlow.ll
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -36,8 +36,8 @@ define i16 @zero_sign_bit2(i16 %a, i16 %b) {
 
 declare i16 @bounded(i16 %input);
 declare i32 @__gxx_personality_v0(...);
-!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
-!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+!0 = !{i16 0, i16 32768} ; [0, 32767]
+!1 = !{i16 0, i16 32769} ; [0, 32768]
 
 define i16 @add_bounded_values(i16 %a, i16 %b) {
 ; CHECK-LABEL: @add_bounded_values(
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index 10af4bc..0d42f7c 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -7,6 +7,7 @@
 declare i32 @generic_personality(i32, i64, i8*, i8*)
 declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
 declare i32 @__objc_personality_v0(i32, i64, i8*, i8*)
+declare i32 @__C_specific_handler(...)
 
 declare void @bar()
 
@@ -231,3 +232,54 @@ lpad.d:
 ; CHECK-NEXT: null
 ; CHECK-NEXT: unreachable
 }
+
+define void @foo_seh() {
+; CHECK-LABEL: @foo_seh(
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index a166e5f..fbbba59 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -219,7 +219,7 @@ define i16 @mul_add_to_mul_1(i16 %x) {
  %add2 = add nsw i16 %x, %mul1
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_1(
-; CHECK-NEXT: %add2 = mul i16 %x, 9
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -228,7 +228,7 @@ define i16 @mul_add_to_mul_2(i16 %x) {
  %add2 = add nsw i16 %mul1, %x
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_2(
-; CHECK-NEXT: %add2 = mul i16 %x, 9
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -248,7 +248,7 @@ define i16 @mul_add_to_mul_4(i16 %a) {
  %add = add nsw i16 %mul1, %mul2
  ret i16 %add
 ; CHECK-LABEL: @mul_add_to_mul_4(
-; CHECK-NEXT: %add = mul i16 %a, 9
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
 ; CHECK-NEXT: ret i16 %add
 }
 
@@ -294,7 +294,7 @@ define i16 @add_cttz(i16 %a) {
   ret i16 %b
 }
 declare i16 @llvm.cttz.i16(i16, i1)
-!0 = metadata !{i16 0, i16 8}
+!0 = !{i16 0, i16 8}
 
 ; Similar to @add_cttz, but in this test, the range implied by the
 ; intrinsic is more strict. Therefore, ValueTracking uses that range.
@@ -312,7 +312,7 @@ define i16 @add_cttz_2(i16 %a) {
 ; CHECK: or i16 %cttz, -16
   ret i16 %b
 }
-!1 = metadata !{i16 0, i16 32}
+!1 = !{i16 0, i16 32}
 
 define i32 @add_or_and(i32 %x, i32 %y) {
   %or = or i32 %x, %y
diff --git a/test/Transforms/InstCombine/addnegneg.ll b/test/Transforms/InstCombine/addnegneg.ll
index ad8791d..90f6baf 100644
--- a/test/Transforms/InstCombine/addnegneg.ll
+++ b/test/Transforms/InstCombine/addnegneg.ll
@@ -9,4 +9,3 @@ entry:
 	%sub6 = add i32 %sub4, %d		; <i32> [#uses=1]
 	ret i32 %sub6
 }
-
diff --git a/test/Transforms/InstCombine/alias-recursion.ll b/test/Transforms/InstCombine/alias-recursion.ll
new file mode 100644
index 0000000..fa63726
--- /dev/null
+++ b/test/Transforms/InstCombine/alias-recursion.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.A = type { i32 (...)** }
+
+@0 = constant [1 x i8*] zeroinitializer
+
+@vtbl = alias getelementptr inbounds ([1 x i8*]* @0, i32 0, i32 0)
+
+define i32 (%class.A*)* @test() {
+; CHECK-LABEL: test
+entry:
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %A = phi i32 (%class.A*)** [ bitcast (i8** @vtbl to i32 (%class.A*)**), %for.body ], [ null, %entry ]
+  %B = load i32 (%class.A*)** %A
+  ret i32 (%class.A*)* %B
+}
diff --git a/test/Transforms/InstCombine/aligned-altivec.ll b/test/Transforms/InstCombine/aligned-altivec.ll
new file mode 100644
index 0000000..6ac2691
--- /dev/null
+++ b/test/Transforms/InstCombine/aligned-altivec.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
+
+define <4 x i32> @test1(<4 x i32>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+
+; CHECK-LABEL: @test1
+; CHECK: @llvm.ppc.altivec.lvx
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+
+; CHECK-LABEL: @test1a
+; CHECK-NOT: @llvm.ppc.altivec.lvx
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
+
+define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: @llvm.ppc.altivec.stvx
+; CHECK: ret <4 x i32>
+}
+
+define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: @llvm.ppc.altivec.stvx
+; CHECK: ret <4 x i32>
+}
+
+declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1
+
+define <4 x i32> @test1l(<4 x i32>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+
+; CHECK-LABEL: @test1l
+; CHECK: @llvm.ppc.altivec.lvxl
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+
+; CHECK-LABEL: @test1la
+; CHECK-NOT: @llvm.ppc.altivec.lvxl
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0
+
+define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK: @llvm.ppc.altivec.stvxl
+; CHECK: ret <4 x i32>
+}
+
+define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK-NOT: @llvm.ppc.altivec.stvxl
+; CHECK: ret <4 x i32>
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/aligned-qpx.ll b/test/Transforms/InstCombine/aligned-qpx.ll
new file mode 100644
index 0000000..c8a1f6f
--- /dev/null
+++ b/test/Transforms/InstCombine/aligned-qpx.ll
@@ -0,0 +1,162 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.ppc.qpx.qvlfs(i8*) #1
+
+define <4 x double> @test1(<4 x float>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1
+; CHECK: @llvm.ppc.qpx.qvlfs
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x float>* %h, align 8
+  %v0e = fpext <4 x float> %v0 to <4 x double>
+  %a = fadd <4 x double> %v0e, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1a(<4 x float>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1a
+; CHECK-NOT: @llvm.ppc.qpx.qvlfs
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x float>* %h, align 8
+  %v0e = fpext <4 x float> %v0 to <4 x double>
+  %a = fadd <4 x double> %v0e, %vl
+  ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfs(<4 x double>, i8*) #0
+
+define <4 x float> @test2(<4 x float>* %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x float>* %h, align 8
+  ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: @llvm.ppc.qpx.qvstfs
+; CHECK: ret <4 x float>
+}
+
+define <4 x float> @test2a(<4 x float>* align 16 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x float>* %h, align 8
+  ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: @llvm.ppc.qpx.qvstfs
+; CHECK: ret <4 x float>
+}
+
+declare <4 x double> @llvm.ppc.qpx.qvlfd(i8*) #1
+
+define <4 x double> @test1l(<4 x double>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1l
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1ln(<4 x double>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1ln
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1la(<4 x double>* align 32 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1la
+; CHECK-NOT: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfd(<4 x double>, i8*) #0
+
+define <4 x double> @test2l(<4 x double>* %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2ln(<4 x double>* align 16 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2ln
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2la(<4 x double>* align 32 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK-NOT: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index c30a245..037641b 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,11 +1,15 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and | count 1
+; RUN: FileCheck %s
 
 ; Should be optimized to one and.
 define i1 @test1(i32 %a, i32 %b) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %1 = xor i32 %a, %b
+; CHECK-NEXT: %2 = and i32 %1, 65280
+; CHECK-NEXT: %tmp = icmp ne i32 %2, 0
+; CHECK-NEXT: ret i1 %tmp
         %tmp1 = and i32 %a, 65280               ; <i32> [#uses=1]
         %tmp3 = and i32 %b, 65280               ; <i32> [#uses=1]
         %tmp = icmp ne i32 %tmp1, %tmp3         ; <i1> [#uses=1]
         ret i1 %tmp
 }
-
diff --git a/test/Transforms/InstCombine/and-xor-merge.ll b/test/Transforms/InstCombine/and-xor-merge.ll
index e432a9a..b9a6a53 100644
--- a/test/Transforms/InstCombine/and-xor-merge.ll
+++ b/test/Transforms/InstCombine/and-xor-merge.ll
@@ -1,8 +1,11 @@
-; RUN: opt < %s -instcombine -S | grep and | count 1
-; RUN: opt < %s -instcombine -S | grep xor | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; (x&z) ^ (y&z) -> (x^y)&z
 define i32 @test1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %tmp61 = xor i32 %x, %y
+; CHECK-NEXT: %tmp7 = and i32 %tmp61, %z
+; CHECK-NEXT: ret i32 %tmp7
         %tmp3 = and i32 %z, %x
         %tmp6 = and i32 %z, %y
         %tmp7 = xor i32 %tmp3, %tmp6
@@ -11,9 +14,11 @@ define i32 @test1(i32 %x, i32 %y, i32 %z) {
 
 ; (x & y) ^ (x|y) -> x^y
 define i32 @test2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %tmp7 = xor i32 %y, %x
+; CHECK-NEXT: ret i32 %tmp7
         %tmp3 = and i32 %y, %x
         %tmp6 = or i32 %y, %x
         %tmp7 = xor i32 %tmp3, %tmp6
         ret i32 %tmp7
 }
-
diff --git a/test/Transforms/InstCombine/apint-call-cast-target.ll b/test/Transforms/InstCombine/apint-call-cast-target.ll
index 4e98f9b..f3a66c3 100644
--- a/test/Transforms/InstCombine/apint-call-cast-target.ll
+++ b/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -5,15 +5,18 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @main() {
 ; CHECK-LABEL: @main(
-; CHECK: call i32 bitcast
+; CHECK: %[[call:.*]] = call i7* @ctime(i999* null)
+; CHECK: %[[cast:.*]] = ptrtoint i7* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
 entry:
 	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
 	ret i32 %tmp
 }
 
 define i7* @ctime(i999*) {
-; CHECK-LABEL: @ctime(
-; CHECK: call i7* bitcast
+; CHECK-LABEL: define i7* @ctime(
+; CHECK: %[[call:.*]] = call i32 @main()
+; CHECK: %[[cast:.*]] = inttoptr i32 %[[call]] to i7*
 entry:
 	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
 	ret i7* %tmp
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index bc36b25..cfec092 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -94,7 +94,8 @@ entry:
 ; CHECK: load i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
-; CHECK: bitcast i32 %call to float
+; CHECK: bitcast float* %dest to i32*
+; CHECK: store i32
   %tmp = load float* %source, align 8
   %call = call float @alias_i32_to_f32(float %tmp) nounwind
   store float %call, float* %dest, align 8
@@ -109,7 +110,8 @@ entry:
 ; CHECK: load <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
-; CHECK: bitcast <2 x i32> %call to <2 x float>
+; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
+; CHECK: store <2 x i32>
   %tmp = load <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
@@ -123,7 +125,8 @@ entry:
 ; CHECK: bitcast <2 x float>* %source to i64*
 ; CHECK: load i64*
 ; CHECK: %call = call i64 @func_i64
-; CHECK: bitcast i64 %call to <2 x float>
+; CHECK: bitcast <2 x float>* %dest to i64*
+; CHECK: store i64
   %tmp = load <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
@@ -136,7 +139,8 @@ entry:
 ; CHECK: bitcast i64* %source to <2 x float>*
 ; CHECK: load <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
-; CHECK: bitcast <2 x float> %call to i64
+; CHECK: bitcast i64* %dest to <2 x float>*
+; CHECK: store <2 x float>
   %tmp = load i64* %source, align 8
   %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
   store i64 %call, i64* %dest, align 8
@@ -149,7 +153,8 @@ entry:
 ; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
 ; CHECK: load <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
-; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
+; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
+; CHECK: store <2 x i32*>
   %tmp = load <2 x i64*>* %source, align 8
   %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
   store <2 x i64*> %call, <2 x i64*>* %dest, align 8
diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll
index e46b5c8..ea4d680 100644
--- a/test/Transforms/InstCombine/bitcast-store.ll
+++ b/test/Transforms/InstCombine/bitcast-store.ll
@@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @G = external constant [5 x i8*]
 
 ; CHECK-LABEL: @foo
-; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
-define void @foo(%struct.A* %a) nounwind {
+; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0
+define void @foo(i32 %x, float* %p) nounwind {
 entry:
-  %0 = bitcast %struct.A* %a to i8***
-  store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0
+  %x.cast = bitcast i32 %x to float
+  store float %x.cast, float* %p, align 16, !noalias !0
   ret void
 }
 
@@ -32,4 +32,4 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !"hello"}
+!0 = !{!0}
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 442ce58..63b0775 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,63 +1,79 @@
-; RUN: opt < %s -instcombine -S | not grep call.*bswap
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test1(i16 %tmp2) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT:  %tmp = icmp eq i16 %tmp2, 256
+; CHECK-NEXT:  ret i1 %tmp
         %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )
         %tmp = icmp eq i16 %tmp10, 1
         ret i1 %tmp
 }
 
 define i1 @test2(i32 %tmp) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT:  %tmp.upgrd.1 = icmp eq i32 %tmp, 16777216
+; CHECK-NEXT:  ret i1 %tmp.upgrd.1
         %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
         %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
         ret i1 %tmp.upgrd.1
 }
 
-declare i32 @llvm.bswap.i32(i32)
-
 define i1 @test3(i64 %tmp) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT:  %tmp.upgrd.2 = icmp eq i64 %tmp, 72057594037927936
+; CHECK-NEXT:  ret i1 %tmp.upgrd.2
         %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
         %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
         ret i1 %tmp.upgrd.2
 }
 
-declare i64 @llvm.bswap.i64(i64)
-
-declare i16 @llvm.bswap.i16(i16)
-
 ; rdar://5992453
 ; A & 255
 define i32 @test4(i32 %a) nounwind  {
-entry:
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+; CHECK-LABEL: @test4
+; CHECK-NEXT:  %tmp2 = and i32 %a, 255
+; CHECK-NEXT:  ret i32 %tmp2
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = lshr i32 %tmp2, 24
 	ret i32 %tmp4
 }
 
 ; A
-define i32 @test5(i32 %a) nounwind  {
-entry:
+define i32 @test5(i32 %a) nounwind {
+; CHECK-LABEL: @test5
+; CHECK-NEXT:  ret i32 %a
 	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 )
 	ret i32 %tmp4
 }
 
 ; a >> 24
-define i32 @test6(i32 %a) nounwind  {
-entry:
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+define i32 @test6(i32 %a) nounwind {
+; CHECK-LABEL: @test6
+; CHECK-NEXT:  %tmp2 = lshr i32 %a, 24
+; CHECK-NEXT   ret i32 %tmp4
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = and i32 %tmp2, 255
 	ret i32 %tmp4
 }
 
 ; PR5284
 define i16 @test7(i32 %A) {
-  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
+; CHECK-LABEL: @test7
+; CHECK-NEXT:  %1 = lshr i32 %A, 16
+; CHECK-NEXT:  %D = trunc i32 %1 to i16
+; CHECK-NEXT   ret i16 %D
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
   %C = trunc i32 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
   ret i16 %D
 }
 
 define i16 @test8(i64 %A) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT:  %1 = lshr i64 %A, 48
+; CHECK-NEXT:  %D = trunc i64 %1 to i16
+; CHECK-NEXT   ret i16 %D
   %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
   %C = trunc i64 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -66,6 +82,144 @@ define i16 @test8(i64 %A) {
 
 ; Misc: Fold bswap(undef) to undef.
 define i64 @foo() {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: ret i64 undef
   %a = call i64 @llvm.bswap.i64(i64 undef)
   ret i64 %a
 }
+
+; PR15782
+; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+define i16 @bs_and16i(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16i
+; CHECK-NEXT:  %1 = and i16 %a, 4391
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %2 = and i16 %1, 10001
+  ret i16 %2
+}
+
+define i16 @bs_and16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16
+; CHECK-NEXT:  %1 = and i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = and i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_or16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_or16
+; CHECK-NEXT:  %1 = or i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = or i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_xor16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_xor16
+; CHECK-NEXT:  %1 = xor i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = xor i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i32 @bs_and32i(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32i
+; CHECK-NEXT:  %1 = and i32 %a, -1585053440
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = and i32 %tmp1, 100001
+  ret i32 %tmp2
+}
+
+define i32 @bs_and32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32
+; CHECK-NEXT:  %1 = and i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = and i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_or32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_or32
+; CHECK-NEXT:  %1 = or i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_xor32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_xor32
+; CHECK-NEXT:  %1 = xor i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = xor i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i64 @bs_and64i(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i
+; CHECK-NEXT:  %1 = and i64 %a, 129085117527228416
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  ret i64 %tmp2
+}
+
+define i64 @bs_and64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64
+; CHECK-NEXT:  %1 = and i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_or64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_or64
+; CHECK-NEXT:  %1 = or i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = or i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_xor64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_xor64
+; CHECK-NEXT:  %1 = xor i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = xor i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll
index 1af3317..4a5c949 100644
--- a/test/Transforms/InstCombine/call-cast-target.ll
+++ b/test/Transforms/InstCombine/call-cast-target.ll
@@ -5,7 +5,9 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @main() {
 ; CHECK-LABEL: @main
-; CHECK: call i32 bitcast
+; CHECK: %[[call:.*]] = call i8* @ctime(i32* null)
+; CHECK: %[[cast:.*]] = ptrtoint i8* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
 entry:
   %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
   ret i32 %tmp
@@ -25,3 +27,48 @@ entry:
   %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null)
   ret void
 }
+
+declare i32 @fn1(i32)
+
+define i32 @test1(i32* %a) {
+; CHECK-LABEL: @test1
+; CHECK:      %[[cast:.*]] = ptrtoint i32* %a to i32
+; CHECK-NEXT: %[[call:.*]] = tail call i32 @fn1(i32 %[[cast]])
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn1 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn2(i16)
+
+define i32 @test2(i32* %a) {
+; CHECK-LABEL: @test2
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn3(i64)
+
+define i32 @test3(i32* %a) {
+; CHECK-LABEL: @test3
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn4(i32) "thunk"
+
+define i32 @test4(i32* %a) {
+; CHECK-LABEL: @test4
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
diff --git a/test/Transforms/InstCombine/canonicalize_branch.ll b/test/Transforms/InstCombine/canonicalize_branch.ll
index b62b143..29fd51a 100644
--- a/test/Transforms/InstCombine/canonicalize_branch.ll
+++ b/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -57,10 +57,10 @@ F:
         ret i32 123
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 2}
-!1 = metadata !{metadata !"branch_weights", i32 3, i32 4}
-!2 = metadata !{metadata !"branch_weights", i32 5, i32 6}
-!3 = metadata !{metadata !"branch_weights", i32 7, i32 8}
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 3, i32 4}
+!2 = !{!"branch_weights", i32 5, i32 6}
+!3 = !{!"branch_weights", i32 7, i32 8}
 ; Base case shouldn't change.
 ; CHECK: !0 = {{.*}} i32 1, i32 2}
 ; Ensure that the branch metadata is reversed to match the reversals above.
diff --git a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
new file mode 100644
index 0000000..551d0ef
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -0,0 +1,454 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_uitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp one
+define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_sitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp one
+define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp ueq
+define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp ueq
+define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_uitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp une
+define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_sitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp une
+define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_uitofp(
+; CHECK: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp ogt
+define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_sitofp(
+; CHECK: icmp sgt i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp ogt
+define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_uitofp(
+; CHECK: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ole_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_sitofp(
+; CHECK: icmp slt i32 %i, 1
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ole_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_olt_int_0_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_olt_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp olt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_olt_int_0_sitofp(
+; CHECK: icmp slt i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_olt_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp olt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_uitofp(i64 %i) {
+  %f = uitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_sitofp(i64 %i) {
+  %f = sitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp_half(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_uitofp_half(i64 %i) {
+  %f = uitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp_half(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_sitofp_half(i64 %i) {
+  %f = sitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
+  %f = uitofp i32 %i to ppc_fp128
+  %cmp = fcmp oeq ppc_fp128 %f, 0xM00000000000000000000000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777215
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777215
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777216
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777216
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imin_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imin_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imin_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imin_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_negi32umax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_oeq_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_sitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_oeq_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_half_uitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_one_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_half_sitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_one_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_ueq_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_sitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_ueq_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_half_uitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_une_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_half_sitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 578b16d..aac7a53 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -99,6 +99,26 @@ define void @test11(i32* %P) {
 ; CHECK: ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
+define void @test_invoke_vararg_cast(i32* %a, i32* %b) {
+entry:
+  %0 = bitcast i32* %b to i8*
+  %1 = bitcast i32* %a to i64*
+  invoke void (i32, ...)* @varargs(i32 1, i8* %0, i64* %1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+; CHECK-LABEL: test_invoke_vararg_cast
+; CHECK-LABEL: entry:
+; CHECK: invoke void (i32, ...)* @varargs(i32 1, i32* %b, i32* %a)
+}
+
 define i8* @test13(i64 %A) {
         %c = getelementptr [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A             ; <i8*> [#uses=1]
         ret i8* %c
@@ -199,15 +219,6 @@ define i1 @test24(i1 %C) {
 ; CHECK: ret i1 true
 }
 
-define void @test25(i32** %P) {
-        %c = bitcast i32** %P to float**                ; <float**> [#uses=1]
-        ;; Fold cast into null
-        store float* null, float** %c
-        ret void
-; CHECK: store i32* null, i32** %P
-; CHECK: ret void
-}
-
 define i32 @test26(float %F) {
         ;; no need to cast from float->double.
         %c = fpext float %F to double           ; <double> [#uses=1]
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 23006a8..cc7a2bf 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -3,6 +3,8 @@
 
 target datalayout = "p:32:32-p1:32:32-p2:16:16"
 
+@global = global i8 0
+
 ; This shouldn't convert to getelementptr because the relationship
 ; between the arithmetic and the layout of allocated memory is
 ; entirely unknown.
@@ -47,10 +49,29 @@ define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
   ret i1 %r
 }
 
+; These casts should not be folded away.
+; CHECK-LABEL: @test2_diff_as
+; CHECK: icmp sge i32 %i0, %i1
+define i1 @test2_diff_as(i8* %p, i8 addrspace(1)* %q) {
+  %i0 = ptrtoint i8* %p to i32
+  %i1 = ptrtoint i8 addrspace(1)* %q to i32
+  %r0 = icmp sge i32 %i0, %i1
+  ret i1 %r0
+}
+
+; These casts should not be folded away.
+; CHECK-LABEL: @test2_diff_as_global
+; CHECK: icmp sge i32 %i1
+define i1 @test2_diff_as_global(i8 addrspace(1)* %q) {
+  %i0 = ptrtoint i8* @global to i32
+  %i1 = ptrtoint i8 addrspace(1)* %q to i32
+  %r0 = icmp sge i32 %i1, %i0
+  ret i1 %r0
+}
+
 ; These casts should also be folded away.
 ; CHECK-LABEL: @test3(
 ; CHECK: icmp eq i8* %a, @global
-@global = global i8 0
 define i1 @test3(i8* %a) {
         %tmpa = ptrtoint i8* %a to i32
         %r = icmp eq i32 %tmpa, ptrtoint (i8* @global to i32)
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 309843f..1946576 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -15,14 +15,14 @@ declare i32 @printf(i8*, ...)
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{metadata !"0x2e\00foo\00foo\00\004\000\001\000\006\000\000\000", metadata !8, metadata !1, metadata !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{metadata !"0x29", metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{metadata !"0x11\0012\00clang\001\00\000\00\000", metadata !8, metadata !4, metadata !4, metadata !9, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !8, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{metadata !"0xb\004\0012\000", metadata !8, metadata !0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 6, i32 1, metadata !6, null}
-!8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
-!9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
+!0 = !{!"0x2e\00foo\00foo\00\004\000\001\000\006\000\000\000", !8, !1, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang\001\00\000\00\000", !8, !4, !4, !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !MDLocation(line: 5, column: 2, scope: !6)
+!6 = !{!"0xb\004\0012\000", !8, !0} ; [ DW_TAG_lexical_block ]
+!7 = !MDLocation(line: 6, column: 1, scope: !6)
+!8 = !{!"m.c", !"/private/tmp"}
+!9 = !{!0}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index a7a491e..ae72f70 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -14,11 +14,11 @@ entry:
   store i8* %__dest, i8** %__dest.addr, align 8
 ; CHECK-NOT: call void @llvm.dbg.declare
 ; CHECK: call void @llvm.dbg.value
-  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0, metadata !{}), !dbg !16
+  call void @llvm.dbg.declare(metadata i8** %__dest.addr, metadata !0, metadata !{}), !dbg !16
   store i32 %__val, i32* %__val.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7, metadata !{}), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %__val.addr, metadata !7, metadata !{}), !dbg !18
   store i64 %__len, i64* %__len.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9, metadata !{}), !dbg !20
+  call void @llvm.dbg.declare(metadata i64* %__len.addr, metadata !9, metadata !{}), !dbg !20
   %tmp = load i8** %__dest.addr, align 8, !dbg !21
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
@@ -31,29 +31,29 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
 
-!0 = metadata !{metadata !"0x101\00__dest\0016777294\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{metadata !"0x2e\00foobar\00foobar\00\0079\001\001\000\006\00256\001\0079", metadata !27, metadata !2, metadata !4, null, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
-!2 = metadata !{metadata !"0x29", metadata !27} ; [ DW_TAG_file_type ]
-!3 = metadata !{metadata !"0x11\0012\00clang version 3.0 (trunk 127710)\001\00\000\00\000", metadata !28, metadata !29, metadata !29, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !27, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, metadata !3, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{metadata !"0x101\00__val\0033554510\000", metadata !1, metadata !2, metadata !8} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !3} ; [ DW_TAG_base_type ]
-!9 = metadata !{metadata !"0x101\00__len\0050331726\000", metadata !1, metadata !2, metadata !10} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{metadata !"0x16\00size_t\0080\000\000\000\000", metadata !27, metadata !3, metadata !11} ; [ DW_TAG_typedef ]
-!11 = metadata !{metadata !"0x16\00__darwin_size_t\0090\000\000\000\000", metadata !27, metadata !3, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{metadata !"0x24\00long unsigned int\000\0064\0064\000\000\007", null, metadata !3} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 78, i32 28, metadata !1, null}
-!18 = metadata !{i32 78, i32 40, metadata !1, null}
-!20 = metadata !{i32 78, i32 54, metadata !1, null}
-!21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{metadata !"0xb\0080\003\007", metadata !27, metadata !23} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{metadata !"0xb\0079\001\006", metadata !27, metadata !1} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !1}
-!25 = metadata !{metadata !0, metadata !7, metadata !9}
-!26 = metadata !{metadata !"0x29", metadata !28} ; [ DW_TAG_file_type ]
-!27 = metadata !{metadata !"string.h", metadata !"Game"}
-!28 = metadata !{metadata !"bits.c", metadata !"Game"}
-!29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
+!0 = !{!"0x101\00__dest\0016777294\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foobar\00foobar\00\0079\001\001\000\006\00256\001\0079", !27, !2, !4, null, i8* (i8*, i32, i64)* @foobar, null, null, !25} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
+!2 = !{!"0x29", !27} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang version 3.0 (trunk 127710)\001\00\000\00\000", !28, !29, !29, !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !27, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6}
+!6 = !{!"0xf\00\000\0064\0064\000\000", null, !3, null} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x101\00__val\0033554510\000", !1, !2, !8} ; [ DW_TAG_arg_variable ]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3} ; [ DW_TAG_base_type ]
+!9 = !{!"0x101\00__len\0050331726\000", !1, !2, !10} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x16\00size_t\0080\000\000\000\000", !27, !3, !11} ; [ DW_TAG_typedef ]
+!11 = !{!"0x16\00__darwin_size_t\0090\000\000\000\000", !27, !3, !12} ; [ DW_TAG_typedef ]
+!12 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, !3} ; [ DW_TAG_base_type ]
+!16 = !MDLocation(line: 78, column: 28, scope: !1)
+!18 = !MDLocation(line: 78, column: 40, scope: !1)
+!20 = !MDLocation(line: 78, column: 54, scope: !1)
+!21 = !MDLocation(line: 80, column: 3, scope: !22)
+!22 = !{!"0xb\0080\003\007", !27, !23} ; [ DW_TAG_lexical_block ]
+!23 = !{!"0xb\0079\001\006", !27, !1} ; [ DW_TAG_lexical_block ]
+!24 = !{!1}
+!25 = !{!0, !7, !9}
+!26 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!27 = !{!"string.h", !"Game"}
+!28 = !{!"bits.c", !"Game"}
+!29 = !{i32 0}
+!30 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 2841043..e0ff07b 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -217,7 +217,7 @@ define i32 @test25(i32 %a) {
   %div = sdiv i32 %shl, 2
   ret i32 %div
 ; CHECK-LABEL: @test25(
-; CHECK-NEXT: %div = shl i32 %a, 1
+; CHECK-NEXT: %div = shl nsw i32 %a, 1
 ; CHECK-NEXT: ret i32 %div
 }
 
@@ -226,7 +226,7 @@ define i32 @test26(i32 %a) {
   %div = sdiv i32 %mul, 3
   ret i32 %div
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT: %div = shl i32 %a, 2
+; CHECK-NEXT: %div = shl nsw i32 %a, 2
 ; CHECK-NEXT: ret i32 %div
 }
 
@@ -286,3 +286,42 @@ define i32 @test32(i32 %a, i32 %b) {
 ; CHECK-NEXT: %[[div:.*]] = udiv i32 %a, %[[shr]]
 ; CHECK-NEXT: ret i32
 }
+
+define <2 x i64> @test33(<2 x i64> %x) nounwind {
+  %shr = lshr exact <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv exact <2 x i64> %shr, <i64 6, i64 6>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: udiv exact <2 x i64> %x, <i64 192, i64 192>
+; CHECK-NEXT: ret <2 x i64>
+}
+
+define <2 x i64> @test34(<2 x i64> %x) nounwind {
+  %neg = sub nsw <2 x i64> zeroinitializer, %x
+  %div = sdiv exact <2 x i64> %neg, <i64 3, i64 4>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: sdiv exact <2 x i64> %x, <i64 -3, i64 -4>
+; CHECK-NEXT: ret <2 x i64>
+}
+
+define i32 @test35(i32 %A) {
+  %and = and i32 %A, 2147483647
+  %mul = sdiv exact i32 %and, 2147483647
+  ret i32 %mul
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: %[[and:.*]]  = and i32 %A, 2147483647
+; CHECK-NEXT: %[[udiv:.*]] = udiv exact i32 %[[and]], 2147483647
+; CHECK-NEXT: ret i32 %[[udiv]]
+}
+
+define i32 @test36(i32 %A) {
+  %and = and i32 %A, 2147483647
+  %shl = shl nsw i32 1, %A
+  %mul = sdiv exact i32 %and, %shl
+  ret i32 %mul
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: %[[and:.*]] = and i32 %A, 2147483647
+; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
+; CHECK-NEXT: ret i32 %[[shr]]
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index b0ec895..c6081c3 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -93,7 +93,7 @@ define float @fold9(float %f1, float %f2) {
   ret float %t3
 
 ; CHECK-LABEL: @fold9(
-; CHECK: fsub fast float 0.000000e+00, %f2
+; CHECK: fsub fast float -0.000000e+00, %f2
 }
 
 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
@@ -322,6 +322,14 @@ define float @fneg1(float %f1, float %f2) {
 ; CHECK: fmul float %f1, %f2
 }
 
+define float @fneg2(float %x) {
+  %sub = fsub nsz float 0.0, %x
+  ret float %sub
+; CHECK-LABEL: @fneg2(
+; CHECK-NEXT: fsub nsz float -0.000000e+00, %x
+; CHECK-NEXT: ret float 
+}
+
 ; =========================================================================
 ;
 ;   Testing-cases about div
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index afc6782..ee39d10 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
+declare double @llvm.fabs.f64(double) nounwind readnone
+
 define i1 @test1(float %x, float %y) nounwind {
   %ext1 = fpext float %x to double
   %ext2 = fpext float %y to double
@@ -81,6 +83,16 @@ define i32 @test9(double %a) nounwind {
 ; CHECK: ret i32 0
 }
 
+define i32 @test9_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp olt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test9_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: ret i32 0
+}
+
 define i32 @test10(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ole double %call, 0.000000e+00
@@ -91,6 +103,16 @@ define i32 @test10(double %a) nounwind {
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
+define i32 @test10_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ole double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test10_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
 define i32 @test11(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ogt double %call, 0.000000e+00
@@ -101,6 +123,16 @@ define i32 @test11(double %a) nounwind {
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
 
+define i32 @test11_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ogt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test11_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
 define i32 @test12(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oge double %call, 0.000000e+00
@@ -111,6 +143,16 @@ define i32 @test12(double %a) nounwind {
 ; CHECK: fcmp ord double %a, 0.000000e+00
 }
 
+define i32 @test12_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp oge double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test12_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp ord double %a, 0.000000e+00
+}
+
 define i32 @test13(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp une double %call, 0.000000e+00
@@ -121,6 +163,16 @@ define i32 @test13(double %a) nounwind {
 ; CHECK: fcmp une double %a, 0.000000e+00
 }
 
+define i32 @test13_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp une double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test13_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp une double %a, 0.000000e+00
+}
+
 define i32 @test14(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oeq double %call, 0.000000e+00
@@ -131,6 +183,16 @@ define i32 @test14(double %a) nounwind {
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
+define i32 @test14_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp oeq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test14_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
 define i32 @test15(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp one double %call, 0.000000e+00
@@ -141,6 +203,16 @@ define i32 @test15(double %a) nounwind {
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
 
+define i32 @test15_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp one double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test15_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
 define i32 @test16(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ueq double %call, 0.000000e+00
@@ -151,6 +223,16 @@ define i32 @test16(double %a) nounwind {
 ; CHECK: fcmp ueq double %a, 0.000000e+00
 }
 
+define i32 @test16_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ueq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test16_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp ueq double %a, 0.000000e+00
+}
+
 ; Don't crash.
 define i32 @test17(double %a, double (double)* %p) nounwind {
   %call = tail call double %p(double %a) nounwind
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll
index e500467..a08f953 100644
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -222,8 +222,46 @@ define i32 @test18(float %x, float %y, float %z) nounwind uwtable {
 ; CHECK-NEXT: fcmp oeq float %fmaxf, %z
 }
 
+define i32 @test19(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @copysign(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: %copysignf = call float @copysignf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %copysignf, %z
+}
+
+define i32 @test20(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.000000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: %fminf = call float @fminf(float 1.000000e+00, float %x)
+; CHECK-NEXT: fcmp oeq float %fminf, %y
+}
+
+define i32 @test21(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.300000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; should not be changed to fminf as the constant would loose precision
+; CHECK-LABEL: @test21(
+; CHECK: %3 = call double @fmin(double 1.300000e+00, double %2)
+}
+
 declare double @fabs(double) nounwind readnone
 declare double @ceil(double) nounwind readnone
+declare double @copysign(double, double) nounwind readnone
 declare double @floor(double) nounwind readnone
 declare double @nearbyint(double) nounwind readnone
 declare double @rint(double) nounwind readnone
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index ac03402..8319624 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -73,3 +73,15 @@ define float @test7(double %V) {
 ; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
 ; CHECK-NEXT: ret float %trunc
 }
+
+define float @test8(float %V) {
+  %fext = fpext float %V to double
+  %frem = frem double %fext, 1.000000e-01
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+; CHECK-LABEL: @test8
+; CHECK-NEXT: %[[fext:.*]]  = fpext float %V to double
+; CHECK-NEXT: %[[frem:.*]]  = frem double %fext, 1.000000e-01
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
+; CHECK-NEXT: ret float %trunc
+}
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
new file mode 100644
index 0000000..d10ef5f
--- /dev/null
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -datalayout -instcombine -S | FileCheck %s
+
+; Uses InstCombine with DataLayout to propagate dereferenceable
+; attribute via gc.relocate: if the derived ptr is dereferenceable(N),
+; then the return attribute of gc.relocate is dereferenceable(N).
+
+declare zeroext i1 @return_i1()
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+
+define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) {
+; Checks that a dereferenceabler pointer
+; CHECK-LABEL: @deref
+; CHECK: call dereferenceable(8)
+entry:
+    %load = load i32 addrspace(1)* %dparam
+    %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4)
+    ret i32 addrspace(1)* %relocate
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/gep-sext.ll b/test/Transforms/InstCombine/gep-sext.ll
new file mode 100644
index 0000000..3d23dab
--- /dev/null
+++ b/test/Transforms/InstCombine/gep-sext.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-win32"
+
+declare void @use(i32) readonly
+
+; We prefer to canonicalize the machine width gep indices early
+define void @test(i32* %p, i32 %index) {
+; CHECK-LABEL: @test
+; CHECK-NEXT: %1 = sext i32 %index to i64
+; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %1
+  %addr = getelementptr i32* %p, i32 %index
+  %val = load i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; If they've already been canonicalized via zext, that's fine
+define void @test2(i32* %p, i32 %index) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %i = zext i32 %index to i64
+; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %i
+  %i = zext i32 %index to i64
+  %addr = getelementptr i32* %p, i64 %i
+  %val = load i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; If we can use a zext, we prefer that.  This requires
+; knowing that the index is positive.
+define void @test3(i32* %p, i32 %index) {
+; CHECK-LABEL: @test3
+; CHECK:   zext
+; CHECK-NOT: sext
+  %addr_begin = getelementptr i32* %p, i64 40
+  %addr_fixed = getelementptr i32* %addr_begin, i64 48
+  %val_fixed = load i32* %addr_fixed, !range !0
+  %addr = getelementptr i32* %addr_begin, i32 %val_fixed
+  %val = load i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; Replace sext with zext where possible
+define void @test4(i32* %p, i32 %index) {
+; CHECK-LABEL: @test4
+; CHECK:   zext
+; CHECK-NOT: sext
+  %addr_begin = getelementptr i32* %p, i64 40
+  %addr_fixed = getelementptr i32* %addr_begin, i64 48
+  %val_fixed = load i32* %addr_fixed, !range !0
+  %i = sext i32 %val_fixed to i64
+  %addr = getelementptr i32* %addr_begin, i64 %i
+  %val = load i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+
+;;  !range !0
+!0 = !{i32 0, i32 2147483647}
+
+
+
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
index 9aab609..86295e4 100644
--- a/test/Transforms/InstCombine/gepphigep.ll
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -2,6 +2,8 @@
 
 %struct1 = type { %struct2*, i32, i32, i32 }
 %struct2 = type { i32, i32 }
+%struct3 = type { i32, %struct4, %struct4 }
+%struct4 = type { %struct2, %struct2 }
 
 define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
 bb:
@@ -54,3 +56,45 @@ bb:
 ; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
 ; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1
 }
+
+; Check that instcombine doesn't insert GEPs before landingpad.
+
+define i32 @test3(%struct3* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19, i64 %tmp20, i64 %tmp21) {
+bb:
+  %tmp = getelementptr inbounds %struct3* %dm, i64 0
+  br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+  %tmp1 = getelementptr inbounds %struct3* %tmp, i64 %tmp19, i32 1
+  %tmp11 = getelementptr inbounds %struct4* %tmp1, i64 0, i32 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  br label %bb3
+
+bb2:
+  %tmp2 = getelementptr inbounds %struct3* %tmp, i64 %tmp20, i32 1
+  %tmp12 = getelementptr inbounds %struct4* %tmp2, i64 0, i32 0, i32 1
+  store i32 0, i32* %tmp12, align 4
+  br label %bb3
+
+bb3:
+  %phi = phi %struct4* [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+  %tmp22 = invoke i32 @foo1(i32 11) to label %bb4 unwind label %bb5
+
+bb4:
+  ret i32 0
+
+bb5:
+  %tmp27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp34 = getelementptr inbounds %struct4* %phi, i64 %tmp21, i32 1
+  %tmp35 = getelementptr inbounds %struct2* %tmp34, i64 0, i32 1
+  %tmp25 = load i32* %tmp35, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test3(
+; CHECK: bb5:
+; CHECK-NEXT: {{.*}}landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+}
+
+@_ZTIi = external constant i8*
+declare i32 @__gxx_personality_v0(...)
+declare i32 @foo1(i32)
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index bb46662..94cc180 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -602,8 +602,8 @@ entry:
 	%C = load i8** %B, align 8
 	ret i8* %C
 ; CHECK-LABEL: @test34(
-; CHECK: %V.c = inttoptr i64 %V to i8*
-; CHECK: ret i8* %V.c
+; CHECK: %[[C:.*]] = inttoptr i64 %V to i8*
+; CHECK: ret i8* %[[C]]
 }
 
 %t0 = type { i8*, [19 x i8] }
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
index 97d231f..0911ab0 100644
--- a/test/Transforms/InstCombine/icmp-range.ll
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -55,7 +55,7 @@ define i1 @test_nonzero6(i8* %argw) {
 }
 
 
-!0 = metadata !{i32 1, i32 6} 
-!1 = metadata !{i32 0, i32 6} 
-!2 = metadata !{i8 0, i8 1} 
-!3 = metadata !{i8 0, i8 6} 
+!0 = !{i32 1, i32 6} 
+!1 = !{i32 0, i32 6} 
+!2 = !{i8 0, i8 1} 
+!3 = !{i8 0, i8 6} 
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 279d86d..64741c5 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1522,3 +1522,54 @@ define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
   %icmp = icmp eq i32 %xtrc, %old_val
   ret i1 %icmp
 }
+
+; CHECK-LABEL: @f1
+; CHECK-NEXT: %[[cmp:.*]] = icmp sge i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f1(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sge i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f2
+; CHECK-NEXT: %[[cmp:.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f2(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sgt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f3
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f3(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp slt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f4
+; CHECK-NEXT: %[[cmp:.*]] = icmp sle i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f4(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sle i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f5
+; CHECK: %[[cmp:.*]] = icmp slt i32 %[[sub:.*]], 0
+; CHECK: %[[neg:.*]] = sub nsw i32 0, %[[sub]]
+; CHECK: %[[sel:.*]] = select i1 %[[cmp]], i32 %[[neg]], i32 %[[sub]]
+; CHECK: ret i32 %[[sel]]
+define i32 @f5(i8 %a, i8 %b) {
+  %conv = zext i8 %a to i32
+  %conv3 = zext i8 %b to i32
+  %sub = sub nsw i32 %conv, %conv3
+  %cmp4 = icmp slt i32 %sub, 0
+  %sub7 = sub nsw i32 0, %sub
+  %sub7.sub = select i1 %cmp4, i32 %sub7, i32 %sub
+  ret i32 %sub7.sub
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 9b58d93..2791adf 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -1,10 +1,17 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 %overflow.result = type {i8, i1}
+%ov.result.32 = type { i32, i1 }
+
 
-declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
-declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
+declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
 declare double @llvm.powi.f64(double, i32) nounwind readonly
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
@@ -91,17 +98,92 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
 }
 
 ; PR20194
-define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
+define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) {
   %A = sext i8 %a to i32
   %B = sext i8 %b to i32
-  %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
-  ret { i32, i1 } %x
-; CHECK-LABEL: @saddtest1
+  %x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @saddtest_nsw
 ; CHECK: %x = add nsw i32 %A, %B
-; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
-; CHECK-NEXT:  ret { i32, i1 } %1
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
 }
 
+define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) {
+  %A = and i32 %a, 2147483647
+  %B = and i32 %b, 2147483647
+  %x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @uaddtest_nuw
+; CHECK: %x = add nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) {
+  %A = sext i8 %a to i32
+  %B = sext i8 %b to i32
+  %x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @ssubtest_nsw
+; CHECK: %x = sub nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) {
+  %A = or i32 %a, 2147483648
+  %B = and i32 %b, 2147483647
+  %x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @usubtest_nuw
+; CHECK: %x = sub nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) {
+  %A = and i32 %a, 4095 ; 0xfff
+  %B = and i32 %b, 524287; 0x7ffff
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest1_nsw
+; CHECK: %x = mul nuw nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) {
+  %A = ashr i32 %a, 16
+  %B = ashr i32 %b, 16
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest2_nsw
+; CHECK: %x = mul nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) {
+  %A = ashr i32 %a, 16
+  %B = ashr i32 %b, 15
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest3_sw
+; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+; CHECK-NEXT:  ret %ov.result.32 %x
+}
+
+define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) {
+  %A = and i32 %a, 65535 ; 0xffff
+  %B = and i32 %b, 65535 ; 0xffff
+  %x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @umultest_nuw
+; CHECK: %x = mul nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
 
 define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
@@ -125,9 +207,6 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
 ; CHECK-NEXT: ret i8 %A
 }
 
-%ov.result.32 = type { i32, i1 }
-declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
-
 define i32 @umultest3(i32 %n) nounwind {
   %shr = lshr i32 %n, 2
   %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
@@ -152,6 +231,19 @@ define i32 @umultest4(i32 %n) nounwind {
 ; CHECK: umul.with.overflow
 }
 
+define %ov.result.32 @umultest5(i32 %x, i32 %y) nounwind {
+  %or_x = or i32 %x, 2147483648
+  %or_y = or i32 %y, 2147483648
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %or_x, i32 %or_y)
+  ret %ov.result.32 %mul
+; CHECK-LABEL: @umultest5(
+; CHECK-NEXT: %[[or_x:.*]] = or i32 %x, -2147483648
+; CHECK-NEXT: %[[or_y:.*]] = or i32 %y, -2147483648
+; CHECK-NEXT: %[[mul:.*]] = mul i32 %[[or_x]], %[[or_y]]
+; CHECK-NEXT: %[[ret:.*]] = insertvalue %ov.result.32 { i32 undef, i1 true }, i32 %[[mul]], 0
+; CHECK-NEXT: ret %ov.result.32 %[[ret]]
+}
+
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
@@ -257,7 +349,8 @@ define i32 @ctlz_select(i32 %Value) nounwind {
   ret i32 %s
 
 ; CHECK-LABEL: @ctlz_select(
-; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+; CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 %Value, i1 false)
+; CHECK-NEXT: ret i32
 }
 
 define i32 @cttz_select(i32 %Value) nounwind {
@@ -267,5 +360,6 @@ define i32 @cttz_select(i32 %Value) nounwind {
   ret i32 %s
 
 ; CHECK-LABEL: @cttz_select(
-; CHECK: select i1 %tobool, i32 %cttz, i32 32
+; CHECK-NEXT: call i32 @llvm.cttz.i32(i32 %Value, i1 false)
+; CHECK-NEXT: ret i32
 }
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 9810026..40673a7 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -230,7 +230,7 @@ define i1 @test10_struct(i32 %x) {
 ; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
 
 ; P32-LABEL: @test10_struct(
-; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 9
@@ -256,8 +256,7 @@ define i1 @test10_struct_i16(i16 %x){
 ; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i16(
-; P32: %1 = sext i16 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0
@@ -271,8 +270,7 @@ define i1 @test10_struct_i64(i64 %x){
 ; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i64(
-; P32: %1 = trunc i64 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index b4b7558..624083b 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -1,8 +1,9 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 ; This test makes sure that these instructions are properly eliminated.
 
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
 @X = constant i32 42		; <i32*> [#uses=2]
 @X2 = constant i32 47		; <i32*> [#uses=1]
@@ -150,3 +151,53 @@ define i8 @test15(i8 %x, i32 %y) {
   %r = load i8* %g.i8
   ret i8 %r
 }
+
+define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) {
+; Check that we canonicalize loads which are only stored to use integer types
+; when there is a valid integer type.
+; CHECK-LABEL: @test16(
+; CHECK: %[[L1:.*]] = load i32*
+; CHECK-NOT: load
+; CHECK: store i32 %[[L1]], i32*
+; CHECK: store i32 %[[L1]], i32*
+; CHECK-NOT: store
+; CHECK: %[[L1:.*]] = load i32*
+; CHECK-NOT: load
+; CHECK: store i32 %[[L1]], i32*
+; CHECK: store i32 %[[L1]], i32*
+; CHECK-NOT: store
+; CHECK: ret
+
+entry:
+  %x.cast = bitcast i8* %x to float*
+  %a.cast = bitcast i8* %a to float*
+  %b.cast = bitcast i8* %b to float*
+  %c.cast = bitcast i8* %c to i32*
+
+  %x1 = load float* %x.cast
+  store float %x1, float* %a.cast
+  store float %x1, float* %b.cast
+
+  %x2 = load float* %x.cast
+  store float %x2, float* %b.cast
+  %x2.cast = bitcast float %x2 to i32
+  store i32 %x2.cast, i32* %c.cast
+
+  ret void
+}
+
+define void @test17(i8** %x, i8 %y) {
+; Check that in cases similar to @test16 we don't try to rewrite a load when
+; its only use is a store but it is used as the pointer to that store rather
+; than the value.
+;
+; CHECK-LABEL: @test17(
+; CHECK: %[[L:.*]] = load i8**
+; CHECK: store i8 %y, i8* %[[L]]
+
+entry:
+  %x.load = load i8** %x
+  store i8 %y, i8* %x.load
+
+  ret void
+}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
index 863edae..be55fa6 100644
--- a/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
 define i32 @test_load_cast_combine_tbaa(float* %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
 ; CHECK-LABEL: @test_load_cast_combine_tbaa(
@@ -78,9 +80,34 @@ exit:
   ret void
 }
 
-!0 = metadata !{ metadata !1, metadata !1, i64 0 }
-!1 = metadata !{ metadata !1 }
-!2 = metadata !{ metadata !2, metadata !1 }
-!3 = metadata !{ }
-!4 = metadata !{ i32 1 }
-!5 = metadata !{ i32 0, i32 42 }
+define void @test_load_cast_combine_nonnull(float** %ptr) {
+; We can't preserve nonnull metadata when converting a load of a pointer to
+; a load of an integer. Instead, we translate it to range metadata.
+; FIXME: We should also transform range metadata back into nonnull metadata.
+; FIXME: This test is very fragile. If any LABEL lines are added after
+; this point, the test will fail, because this test depends on a metadata tuple,
+; which is always emitted at the end of the file. At some point, we should
+; consider an option to the IR printer to emit MD tuples after the function
+; that first uses them--this will allow us to refer to them like this and not
+; have the tests break. For now, this function must always come last in this
+; file, and no LABEL lines are to be added after this point.
+;
+; CHECK-LABEL: @test_load_cast_combine_nonnull(
+; CHECK: %[[V:.*]] = load i64* %{{.*}}, !range ![[MD:[0-9]+]]
+; CHECK-NOT: !nonnull
+; CHECK: store i64 %[[V]], i64*
+entry:
+  %p = load float** %ptr, !nonnull !3
+  %gep = getelementptr float** %ptr, i32 42
+  store float* %p, float** %gep
+  ret void
+}
+
+; This is the metadata tuple that we reference above:
+; CHECK: ![[MD]] = !{i64 1, i64 0}
+!0 = !{ !1, !1, i64 0 }
+!1 = !{ !1 }
+!2 = !{ !2, !1 }
+!3 = !{ }
+!4 = !{ i32 1 }
+!5 = !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index ed25e4e..765c8c3 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -146,17 +146,36 @@ lpad.i:                                           ; preds = %entry
 }
 
 declare i8* @_Znwm(i64) nobuiltin
-declare void @_ZdlPvm(i8*, i64) nobuiltin
 declare i8* @_Znwj(i32) nobuiltin
-declare void @_ZdlPvj(i8*, i32) nobuiltin
 declare i8* @_Znam(i64) nobuiltin
-declare void @_ZdaPvm(i8*, i64) nobuiltin
 declare i8* @_Znaj(i32) nobuiltin
-declare void @_ZdaPvj(i8*, i32) nobuiltin
+declare void @_ZdlPv(i8*) nobuiltin
+declare void @_ZdaPv(i8*) nobuiltin
+
+define linkonce void @_ZdlPvm(i8* %p, i64) nobuiltin {
+  call void @_ZdlPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdlPvj(i8* %p, i32) nobuiltin {
+  call void @_ZdlPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdaPvm(i8* %p, i64) nobuiltin {
+  call void @_ZdaPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdaPvj(i8* %p, i32) nobuiltin {
+  call void @_ZdaPv(i8* %p)
+  ret void
+}
 
 ; CHECK-LABEL: @test8(
 define void @test8() {
   ; CHECK-NOT: call
+  %nw = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPv(i8* %nw) builtin
+  %na = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPv(i8* %na) builtin
   %nwm = call i8* @_Znwm(i64 32) builtin
   call void @_ZdlPvm(i8* %nwm, i64 32) builtin
   %nwj = call i8* @_Znwj(i32 32) builtin
diff --git a/test/Transforms/InstCombine/max-of-nots.ll b/test/Transforms/InstCombine/max-of-nots.ll
new file mode 100644
index 0000000..41e3038
--- /dev/null
+++ b/test/Transforms/InstCombine/max-of-nots.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i32 @compute_min_2(i32 %x, i32 %y) {
+; CHECK-LABEL: compute_min_2
+ entry:
+  %not_x = sub i32 -1, %x
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_x, %not_y
+  %not_min = select i1 %cmp, i32 %not_x, i32 %not_y
+  %min = sub i32 -1, %not_min
+  ret i32 %min
+
+; CHECK: %0 = icmp slt i32 %x, %y
+; CHECK-NEXT: %1 = select i1 %0, i32 %x, i32 %y
+; CHECK-NEXT: ret i32 %1
+}
+
+define i32 @compute_min_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: compute_min_3
+ entry:
+  %not_x = sub i32 -1, %x
+  %not_y = sub i32 -1, %y
+  %not_z = sub i32 -1, %z
+  %cmp_1 = icmp sgt i32 %not_x, %not_y
+  %not_min_1 = select i1 %cmp_1, i32 %not_x, i32 %not_y
+  %cmp_2 = icmp sgt i32 %not_min_1, %not_z
+  %not_min_2 = select i1 %cmp_2, i32 %not_min_1, i32 %not_z
+  %min = sub i32 -1, %not_min_2
+  ret i32 %min
+
+; CHECK: %0 = icmp slt i32 %x, %y
+; CHECK-NEXT: %1 = select i1 %0, i32 %x, i32 %y
+; CHECK-NEXT: %2 = icmp slt i32 %1, %z
+; CHECK-NEXT: %3 = select i1 %2, i32 %1, i32 %z
+; CHECK-NEXT: ret i32 %3
+}
+
+define i32 @compute_min_arithmetic(i32 %x, i32 %y) {
+; CHECK-LABEL: compute_min_arithmetic
+ entry:
+  %not_value = sub i32 3, %x
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_value, %not_y
+  %not_min = select i1 %cmp, i32 %not_value, i32 %not_y
+  ret i32 %not_min
+
+; CHECK: %0 = add i32 %x, -4
+; CHECK-NEXT: %1 = icmp slt i32 %0, %y
+; CHECK-NEXT: %2 = select i1 %1, i32 %0, i32 %y
+; CHECK-NEXT: %3 = xor i32 %2, -1
+; CHECK-NEXT: ret i32 %3
+}
+
+declare void @fake_use(i32)
+
+define i32 @compute_min_pessimization(i32 %x, i32 %y) {
+; CHECK-LABEL: compute_min_pessimization
+ entry:
+  %not_value = sub i32 3, %x
+  call void @fake_use(i32 %not_value)
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_value, %not_y
+; CHECK: %not_value = sub i32 3, %x
+; CHECK: %cmp = icmp sgt i32 %not_value, %not_y
+  %not_min = select i1 %cmp, i32 %not_value, i32 %not_y
+  %min = sub i32 -1, %not_min
+  ret i32 %min
+}
diff --git a/test/Transforms/InstCombine/mem-gep-zidx.ll b/test/Transforms/InstCombine/mem-gep-zidx.ll
new file mode 100644
index 0000000..9141d99
--- /dev/null
+++ b/test/Transforms/InstCombine/mem-gep-zidx.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+@f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4
+
+define signext i32 @test1(i32 signext %x) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: ret i32 12
+}
+
+declare void @foo(i64* %p)
+define void @test2(i32 signext %x, i64 %v) #0 {
+entry:
+  %p = alloca i64
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds i64* %p, i64 %idxprom
+  store i64 %v, i64* %arrayidx
+  call void @foo(i64* %p)
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: %p = alloca i64
+; CHECK: store i64 %v, i64* %p
+; CHECK: ret void
+}
+
+define signext i32 @test3(i32 signext %x, i1 %y) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b
+  %arrayidx = getelementptr inbounds [1 x i32]* %p, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test3
+; CHECK: getelementptr inbounds [1 x i32]* %p, i64 0, i64 0
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll
index 9216ae7..ddaaf82 100644
--- a/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -15,46 +15,63 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Check cases where dstlen >= len.
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
-  call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
-  call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+  ret i8* %ret
 }
 
 ; Check cases where dstlen < len.
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T3* @t3 to i8*
   %src = bitcast %struct.T1* @t1 to i8*
 
-; CHECK-NEXT: call i8* @__memcpy_chk
-  call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+  ret i8* %ret
 }
 
-define void @test_no_simplify2() {
+define i8* @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call i8* @__memcpy_chk
-  call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+  ret i8* %ret
+}
+
+define i8* @test_simplify_return_indcall(i8* ()* %alloc) {
+; CHECK-LABEL: @test_simplify_return_indcall(
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: %dst = call i8* %alloc()
+  %dst = call i8* %alloc()
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+; CHECK-NEXT: ret i8* %dst
+  ret i8* %ret
 }
 
 declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll
index 6d93bbb..e4e1f6e 100644
--- a/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -15,46 +15,50 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Check cases where dstlen >= len.
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
-  call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret void
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
-  call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
-  ret void
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+  ret i8* %ret
 }
 
 ; Check cases where dstlen < len.
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T3* @t3 to i8*
   %src = bitcast %struct.T1* @t1 to i8*
 
-; CHECK-NEXT: call i8* @__memmove_chk
-  call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+  ret i8* %ret
 }
 
-define void @test_no_simplify2() {
+define i8* @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call i8* @__memmove_chk
-  call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+  ret i8* %ret
 }
 
 declare i8* @__memmove_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 47cc7db..27f7293 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -11,51 +11,56 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Check cases where dstlen >= len.
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
-  ret void
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
-  ret void
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
+  ret i8* %ret
 }
 
-define void @test_simplify3() {
+define i8* @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64
-  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
-  ret void
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
+  ret i8* %ret
 }
 
 ; Check cases where dstlen < len.
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call i8* @__memset_chk
-  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 400)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
+  ret i8* %ret
 }
 
-define void @test_no_simplify2() {
+define i8* @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call i8* @__memset_chk
-  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
+  ret i8* %ret
 }
 
 declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/minnum.ll b/test/Transforms/InstCombine/minnum.ll
index 57d6e16..f7494e7 100644
--- a/test/Transforms/InstCombine/minnum.ll
+++ b/test/Transforms/InstCombine/minnum.ll
@@ -7,7 +7,7 @@ declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
 declare double @llvm.minnum.f64(double, double) #0
 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
 
-declare float @llvm.fmax.f32(float, float) #0
+declare float @llvm.maxnum.f32(float, float) #0
 
 ; CHECK-LABEL: @constant_fold_minnum_f32
 ; CHECK-NEXT: ret float 1.000000e+00
@@ -206,23 +206,23 @@ define float @minnum4(float %x, float %y, float %z, float %w) #0 {
   ret float %c
 }
 
-; CHECK-LABEL: @minnum_x_fmax_x_y
-; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-LABEL: @minnum_x_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32
 ; CHECK-NEXT: call float @llvm.minnum.f32
 ; CHECK-NEXT: ret float
-define float @minnum_x_fmax_x_y(float %x, float %y) #0 {
-  %a = call float @llvm.fmax.f32(float %x, float %y) #0
+define float @minnum_x_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
   %b = call float @llvm.minnum.f32(float %x, float %a) #0
   ret float %b
 }
 
-; CHECK-LABEL: @fmax_x_minnum_x_y
+; CHECK-LABEL: @maxnum_x_minnum_x_y
 ; CHECK-NEXT: call float @llvm.minnum.f32
-; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-NEXT: call float @llvm.maxnum.f32
 ; CHECK-NEXT: ret float
-define float @fmax_x_minnum_x_y(float %x, float %y) #0 {
+define float @maxnum_x_minnum_x_y(float %x, float %y) #0 {
   %a = call float @llvm.minnum.f32(float %x, float %y) #0
-  %b = call float @llvm.fmax.f32(float %x, float %a) #0
+  %b = call float @llvm.maxnum.f32(float %x, float %a) #0
   ret float %b
 }
 
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index d19bedc..4d1e6c7 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -197,3 +197,94 @@ define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) {
         ret <2 x i1> %C
 ; CHECK: %C = and <2 x i1> %A, %B
 }
+
+define i32 @test22(i32 %A) {
+; CHECK-LABEL: @test22(
+        %B = mul nsw i32 %A, -1
+        ret i32 %B
+; CHECK: sub nsw i32 0, %A
+}
+
+define i32 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+        %B = shl nuw i32 %A, 1
+        %C = mul nuw i32 %B, 3
+        ret i32 %C
+; CHECK: mul nuw i32 %A, 6
+}
+
+define i32 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+        %B = shl nsw i32 %A, 1
+        %C = mul nsw i32 %B, 3
+        ret i32 %C
+; CHECK: mul nsw i32 %A, 6
+}
+
+define i32 @test25(i32 %A, i32 %B) {
+; CHECK-LABEL: @test25(
+        %C = sub nsw i32 0, %A
+        %D = sub nsw i32 0, %B
+        %E = mul nsw i32 %C, %D
+        ret i32 %E
+; CHECK: mul nsw i32 %A, %B
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+        %C = shl nsw i32 1, %B
+        %D = mul nsw i32 %A, %C
+        ret i32 %D
+; CHECK: shl nsw i32 %A, %B
+}
+
+define i32 @test27(i32 %A, i32 %B) {
+; CHECK-LABEL: @test27(
+        %C = shl i32 1, %B
+        %D = mul nuw i32 %A, %C
+        ret i32 %D
+; CHECK: shl nuw i32 %A, %B
+}
+
+define i32 @test28(i32 %A) {
+; CHECK-LABEL: @test28(
+        %B = shl i32 1, %A
+        %C = mul nsw i32 %B, %B
+        ret i32 %C
+; CHECK:      %[[shl1:.*]] = shl i32 1, %A
+; CHECK-NEXT: %[[shl2:.*]] = shl i32 %[[shl1]], %A
+; CHECK-NEXT: ret i32 %[[shl2]]
+}
+
+define i64 @test29(i31 %A, i31 %B) {
+; CHECK-LABEL: @test29(
+        %C = sext i31 %A to i64
+        %D = sext i31 %B to i64
+        %E = mul i64 %C, %D
+        ret i64 %E
+; CHECK:      %[[sext1:.*]] = sext i31 %A to i64
+; CHECK-NEXT: %[[sext2:.*]] = sext i31 %B to i64
+; CHECK-NEXT: %[[mul:.*]] = mul nsw i64 %[[sext1]], %[[sext2]]
+; CHECK-NEXT: ret i64 %[[mul]]
+}
+
+define i64 @test30(i32 %A, i32 %B) {
+; CHECK-LABEL: @test30(
+        %C = zext i32 %A to i64
+        %D = zext i32 %B to i64
+        %E = mul i64 %C, %D
+        ret i64 %E
+; CHECK:      %[[zext1:.*]] = zext i32 %A to i64
+; CHECK-NEXT: %[[zext2:.*]] = zext i32 %B to i64
+; CHECK-NEXT: %[[mul:.*]] = mul nuw i64 %[[zext1]], %[[zext2]]
+; CHECK-NEXT: ret i64 %[[mul]]
+}
+
+@PR22087 = external global i32
+define i32 @test31(i32 %V) {
+; CHECK-LABEL: @test31
+  %mul = mul i32 %V, shl (i32 1, i32 zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32))
+  ret i32 %mul
+; CHECK:      %[[mul:.*]] = shl i32 %V, zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32)
+; CHECK-NEXT: ret i32 %[[mul]]
+}
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
index 7646189..f3f19ba 100644
--- a/test/Transforms/InstCombine/narrow-switch.ll
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -91,3 +91,33 @@ return:
   %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
   ret i32 %retval.0
 }
+
+; Make sure to avoid assertion crashes and use the type before
+; truncation to generate the sub constant expressions that leads
+; to the recomputed condition.
+;
+; CHECK-LABEL: @trunc64to59
+; CHECK: switch i59
+; CHECK: i59 0, label
+; CHECK: i59 18717182647723699, label
+
+define void @trunc64to59(i64 %a) {
+entry:
+  %tmp0 = and i64 %a, 15
+  %tmp1 = mul i64 %tmp0, -6425668444178048401
+  %tmp2 = add i64 %tmp1, 5170979678563097242
+  %tmp3 = mul i64 %tmp2, 1627972535142754813
+  switch i64 %tmp3, label %sw.default [
+    i64 847514119312061490, label %sw.bb1
+    i64 866231301959785189, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %sw.default
+
+sw.bb2:
+  br label %sw.default
+
+sw.default:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/not-fcmp.ll b/test/Transforms/InstCombine/not-fcmp.ll
index ad01a6b..9718e0b 100644
--- a/test/Transforms/InstCombine/not-fcmp.ll
+++ b/test/Transforms/InstCombine/not-fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "fcmp uge"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1570
 
 define i1 @f(float %X, float %Y) {
@@ -6,5 +6,8 @@ entry:
         %tmp3 = fcmp olt float %X, %Y           ; <i1> [#uses=1]
         %toBoolnot5 = xor i1 %tmp3, true                ; <i1> [#uses=1]
         ret i1 %toBoolnot5
+; CHECK-LABEL: @f(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %toBoolnot5 = fcmp uge float %X, %Y
+; CHECK-NEXT: ret i1 %toBoolnot5
 }
-
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 4a8825b..9d59edd 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -1,7 +1,8 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 
-; RUN: opt < %s -instcombine -S | not grep xor
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: xor
 
 define i32 @test1(i32 %A) {
         %B = xor i32 %A, -1             ; <i32> [#uses=1]
@@ -52,3 +53,8 @@ entry:
 	ret i8 %retval67
 }
 
+define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
+        %cond = icmp sle <2 x i32> %A, %B
+        %Ret = xor <2 x i1> %cond, <i1 true, i1 true>
+        ret <2 x i1> %Ret
+}
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index 670e3e0..546b777 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -103,81 +103,75 @@ define i32 @test10(i32 %A, i32 %B) {
 ; CHECK-NEXT: ret i32 -1
 }
 
-define i32 @test11(i32 %A, i32 %B) {
-  %xor1 = xor i32 %B, %A
-  %not = xor i32 %A, -1
-  %xor2 = xor i32 %not, %B
-  %or = or i32 %xor1, %xor2
-  ret i32 %or
-; CHECK-LABEL: @test11(
-; CHECK-NEXT: ret i32 -1
-}
-
 ; (x | y) & ((~x) ^ y) -> (x & y)
-define i32 @test12(i32 %x, i32 %y) {
+define i32 @test11(i32 %x, i32 %y) {
  %or = or i32 %x, %y
  %neg = xor i32 %x, -1
  %xor = xor i32 %neg, %y
  %and = and i32 %or, %xor
  ret i32 %and
-; CHECK-LABEL: @test12(
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: %and = and i32 %x, %y
 ; CHECK-NEXT: ret i32 %and
 }
 
 ; ((~x) ^ y) & (x | y) -> (x & y)
-define i32 @test13(i32 %x, i32 %y) {
+define i32 @test12(i32 %x, i32 %y) {
  %neg = xor i32 %x, -1
  %xor = xor i32 %neg, %y
  %or = or i32 %x, %y
  %and = and i32 %xor, %or
  ret i32 %and
-; CHECK-LABEL: @test13(
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: %and = and i32 %x, %y
 ; CHECK-NEXT: ret i32 %and
 }
 
 ; ((x | y) ^ (x ^ y)) -> (x & y)
-define i32 @test15(i32 %x, i32 %y) {
+define i32 @test13(i32 %x, i32 %y) {
   %1 = xor i32 %y, %x
   %2 = or i32 %y, %x
   %3 = xor i32 %2, %1
   ret i32 %3
-; CHECK-LABEL: @test15(
+; CHECK-LABEL: @test13(
 ; CHECK-NEXT: %1 = and i32 %y, %x
 ; CHECK-NEXT: ret i32 %1
 }
 
 ; ((x | ~y) ^ (~x | y)) -> x ^ y
-define i32 @test16(i32 %x, i32 %y) {
+define i32 @test14(i32 %x, i32 %y) {
   %noty = xor i32 %y, -1
   %notx = xor i32 %x, -1
   %or1 = or i32 %x, %noty
   %or2 = or i32 %notx, %y
   %xor = xor i32 %or1, %or2
   ret i32 %xor
-; CHECK-LABEL: @test16(
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: %xor = xor i32 %x, %y
 ; CHECK-NEXT: ret i32 %xor
 }
 
 ; ((x & ~y) ^ (~x & y)) -> x ^ y
-define i32 @test17(i32 %x, i32 %y) {
+define i32 @test15(i32 %x, i32 %y) {
   %noty = xor i32 %y, -1
   %notx = xor i32 %x, -1
   %and1 = and i32 %x, %noty
   %and2 = and i32 %notx, %y
   %xor = xor i32 %and1, %and2
   ret i32 %xor
-; CHECK-LABEL: @test17(
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: %xor = xor i32 %x, %y
 ; CHECK-NEXT: ret i32 %xor
 }
 
-define i32 @test18(i32 %a, i32 %b) {
+define i32 @test16(i32 %a, i32 %b) {
   %or = xor i32 %a, %b
   %and1 = and i32 %or, 1
   %and2 = and i32 %b, -2
   %xor = or i32 %and1, %and2
   ret i32 %xor
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %1 = and i32 %a, 1
+; CHECK-NEXT: %xor = xor i32 %1, %b
+; CHECK-NEXT: ret i32 %xor
 }
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 23dad21..f604baf 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -506,3 +506,13 @@ define i1 @test47(i8 signext %c)  {
 ; CHECK-NEXT:  add i8 %1, -65
 ; CHECK-NEXT:  icmp ult i8 %2, 27
 }
+
+define i1 @test48(i64 %x, i1 %b) {
+  %1 = icmp ult i64 %x, 2305843009213693952
+  %2 = icmp ugt i64 %x, 2305843009213693951
+  %.b = or i1 %2, %b
+  %3 = or i1 %1, %.b
+  ret i1 %3
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:  ret i1 true
+}
diff --git a/test/Transforms/InstCombine/pr12251.ll b/test/Transforms/InstCombine/pr12251.ll
index 74a41eb..8c382bb 100644
--- a/test/Transforms/InstCombine/pr12251.ll
+++ b/test/Transforms/InstCombine/pr12251.ll
@@ -12,4 +12,4 @@ entry:
 ; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
 ; CHECK-NEXT: ret i1 %tobool
 
-!0 = metadata !{i8 0, i8 2}
+!0 = !{i8 0, i8 2}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index 614387a..7e0bf59 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -4,6 +4,7 @@ define void @entry() nounwind {
 entry:
   br label %for.cond
 
+; CHECK: br label %for.cond
 for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
   %phi3 = sub <1 x i32> zeroinitializer, %local
@@ -18,7 +19,6 @@ cond.end:
 
 cond.end47:
   %sum = add <1 x i32> %cond, <i32 92>
-; CHECK: sub <1 x i32> <i32 -92>, %cond
   %phi2 = sub <1 x i32> zeroinitializer, %sum
   br label %for.cond
 }
diff --git a/test/Transforms/InstCombine/pr21199.ll b/test/Transforms/InstCombine/pr21199.ll
new file mode 100644
index 0000000..e6599fb
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21199.ll
@@ -0,0 +1,25 @@
+; do not replace a 'select' with 'or' in 'select - cmp - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f(i32)
+
+define void @test(i32 %len) {
+entry:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  %cmp11 = icmp ult i32 0, %cond
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void @f(i32 %cond)
+  %inc = add i32 %i.02, 1
+  %cmp1 = icmp ult i32 %inc, %cond
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: select
+}
diff --git a/test/Transforms/InstCombine/pr21210.ll b/test/Transforms/InstCombine/pr21210.ll
new file mode 100644
index 0000000..1db8794
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21210.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -instcombine -S | FileCheck %s
+; Checks that the select-icmp optimization is safe in two cases
+declare void @foo(i32)
+declare i32 @bar(i32)
+
+; don't replace 'cond' by 'len' in the home block ('bb') that
+; contains the select
+define void @test1(i32 %len) {
+entry:
+  br label %bb
+
+bb:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  call void @foo(i32 %cond)
+  %cmp11 = icmp eq i32 %cond, 8
+  br i1 %cmp11, label %for.end, label %bb
+
+for.end:
+  ret void
+; CHECK: select
+; CHECK: icmp eq i32 %cond, 8
+}
+
+; don't replace 'cond' by 'len' in a block ('b1') that dominates all uses
+; of the select outside the home block ('bb'), but can be reached from the home
+; block on another path ('bb -> b0 -> b1')
+define void @test2(i32 %len) {
+entry:
+  %0 = call i32 @bar(i32 %len);
+  %cmp = icmp ult i32 %len, 4
+  br i1 %cmp, label %bb, label %b1
+bb:
+  %cond = select i1 %cmp, i32 %len, i32 8
+  %cmp11 = icmp eq i32 %cond, 8
+  br i1 %cmp11, label %b0, label %b1
+
+b0:
+  call void @foo(i32 %len)
+  br label %b1
+
+b1:
+; CHECK: phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  %1 = phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  br label %ret
+
+ret:
+  call void @foo(i32 %1)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/pr21651.ll b/test/Transforms/InstCombine/pr21651.ll
new file mode 100644
index 0000000..914785f
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21651.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @PR21651() {
+  switch i2 0, label %out [
+    i2 0, label %out
+    i2 1, label %out
+  ]
+
+out:
+  ret void
+}
+
+; CHECK-LABEL: define void @PR21651(
+; CHECK:   switch i2 0, label %out [
+; CHECK:     i2 0, label %out
+; CHECK:     i2 1, label %out
+; CHECK:   ]
+; CHECK: out:                                              ; preds = %0, %0, %0
+; CHECK:   ret void
+; CHECK: }
diff --git a/test/Transforms/InstCombine/pr21891.ll b/test/Transforms/InstCombine/pr21891.ll
new file mode 100644
index 0000000..8194976
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21891.ll
@@ -0,0 +1,18 @@
+; RUN: opt %s -instcombine
+
+define i32 @f(i32 %theNumber) {
+entry:
+  %cmp = icmp sgt i32 %theNumber, -1
+  call void @llvm.assume(i1 %cmp)
+  br i1 true, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %shl = shl nuw i32 %theNumber, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi i32 [ %shl, %if.then ], [ undef, %entry ]
+  ret i32 %phi
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/InstCombine/range-check.ll b/test/Transforms/InstCombine/range-check.ll
new file mode 100644
index 0000000..35f11dd
--- /dev/null
+++ b/test/Transforms/InstCombine/range-check.ll
@@ -0,0 +1,159 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check simplification of
+; (icmp sgt x, -1) & (icmp sgt/sge n, x) --> icmp ugt/uge n, x
+
+; CHECK-LABEL: define i1 @test_and1
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %x, 0
+  %b = icmp slt i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and2
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %x, -1
+  %b = icmp sle i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and3
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and4
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or1
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, 0
+  %b = icmp sge i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or2
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %x, -1
+  %b = icmp sgt i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or3
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or4
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; Negative tests
+
+; CHECK-LABEL: define i1 @negative1
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sgt i32 %x, 0      ; should be: icmp sge
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative2
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative2(i32 %x, i32 %n) {
+  %a = icmp slt i32 %x, %n     ; n can be negative
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative3
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative3(i32 %x, i32 %y, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %y, 0      ; should compare %x and not %y
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative4
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp ne i32 %x, %nn     ; should be: icmp slt/sle
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative5
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = or i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative5(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %x, 0
+  %c = or i1 %a, %b            ; should be: and
+  ret i1 %c
+}
+
diff --git a/test/Transforms/InstCombine/select-cmp-br.ll b/test/Transforms/InstCombine/select-cmp-br.ll
new file mode 100644
index 0000000..f10d587
--- /dev/null
+++ b/test/Transforms/InstCombine/select-cmp-br.ll
@@ -0,0 +1,155 @@
+; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%C = type <{ %struct.S }>
+%struct.S = type { i64*, i32, i32 }
+
+declare void @bar(%struct.S *) #1
+declare void @foobar()
+
+define void @test1(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* %0, %C* null
+  %8 = icmp eq %C* %7, null
+  br i1 %8, label %12, label %10
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test1(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test2(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* null, %C* %0
+  %8 = icmp eq %C* %7, null
+  br i1 %8, label %12, label %10
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test2(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test3(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* %0, %C* null
+  %8 = icmp ne %C* %7, null
+  br i1 %8, label %10, label %12
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test3(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test4(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* null, %C* %0
+  %8 = icmp ne %C* %7, null
+  br i1 %8, label %10, label %12
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test4(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test5(%C*, i1) {
+entry:
+  %2 = select i1 %1, %C* null, %C* %0
+  %3 = icmp ne %C* %2, null
+  br i1 %3, label %5, label %7
+
+; <label>:4                                       ; preds = %10, %12
+  ret void
+
+; <label>:5                                      ; preds = %entry
+  %6 = getelementptr inbounds %C* %2, i64 0, i32 0
+  tail call void @bar(%struct.S* %6)
+  br label %4
+
+; <label>:7                                      ; preds = %entry
+  tail call void @foobar()
+  br label %4
+; CHECK-LABEL: @test5(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
diff --git a/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
new file mode 100644
index 0000000..894bf6d
--- /dev/null
+++ b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
@@ -0,0 +1,327 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This test is to verify that the instruction combiner is able to fold
+; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with
+; the 'is_zero_undef' flag cleared.
+
+define i16 @test1(i16 %x) {
+; CHECK-LABEL: @test1(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %0, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %0, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3(i64 %x) {
+; CHECK-LABEL: @test3(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %0, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4(i16 %x) {
+; CHECK-LABEL: @test4(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %0
+  ret i16 %cond
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: @test5(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %0
+  ret i32 %cond
+}
+
+define i64 @test6(i64 %x) {
+; CHECK-LABEL: @test6(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %0
+  ret i64 %cond
+}
+
+define i16 @test1b(i16 %x) {
+; CHECK-LABEL: @test1b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %0, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2b(i32 %x) {
+; CHECK-LABEL: @test2b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %0, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3b(i64 %x) {
+; CHECK-LABEL: @test3b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %0, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4b(i16 %x) {
+; CHECK-LABEL: @test4b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %0
+  ret i16 %cond
+}
+
+define i32 @test5b(i32 %x) {
+; CHECK-LABEL: @test5b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %0
+  ret i32 %cond
+}
+
+define i64 @test6b(i64 %x) {
+; CHECK-LABEL: @test6b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %0
+  ret i64 %cond
+}
+
+define i32 @test1c(i16 %x) {
+; CHECK-LABEL: @test1c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %cast2 = zext i16 %0 to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast2, i32 16
+  ret i32 %cond
+}
+
+define i64 @test2c(i16 %x) {
+; CHECK-LABEL: @test2c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %conv = zext i16 %0 to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 16
+  ret i64 %cond
+}
+
+define i64 @test3c(i32 %x) {
+; CHECK-LABEL: @test3c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %conv = zext i32 %0 to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 32
+  ret i64 %cond
+}
+
+define i32 @test4c(i16 %x) {
+; CHECK-LABEL: @test4c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %0 to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 16
+  ret i32 %cond
+}
+
+define i64 @test5c(i16 %x) {
+; CHECK-LABEL: @test5c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %0 to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 16
+  ret i64 %cond
+}
+
+define i64 @test6c(i32 %x) {
+; CHECK-LABEL: @test6c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = zext i32 %0 to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 32
+  ret i64 %cond
+}
+
+define i16 @test1d(i64 %x) {
+; CHECK-LABEL: @test1d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %conv = trunc i64 %0 to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %conv, i16 64
+  ret i16 %cond
+}
+
+define i32 @test2d(i64 %x) {
+; CHECK-LABEL: @test2d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test3d(i32 %x) {
+; CHECK-LABEL: @test3d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+define i16 @test4d(i64 %x) {
+; CHECK-LABEL: @test4d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 64
+  ret i16 %cond
+}
+
+define i32 @test5d(i64 %x) {
+; CHECK-LABEL: @test5d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test6d(i32 %x) {
+; CHECK-LABEL: @test6d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+define i64 @select_bug1(i32 %x) {
+; CHECK-LABEL: @select_bug1(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %conv = zext i32 %0 to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 32
+  ret i64 %cond
+}
+
+define i16 @select_bug2(i32 %x) {
+; CHECK-LABEL: @select_bug2(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %conv = trunc i32 %0 to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %conv, i16 32
+  ret i16 %cond
+}
+
+
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 6cf9f0f..a6a7aa9 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -308,6 +308,26 @@ define i32 @test16(i1 %C, i32* %P) {
 ; CHECK: ret i32 %V
 }
 
+;; It may be legal to load from a null address in a non-zero address space
+define i32 @test16_neg(i1 %C, i32 addrspace(1)* %P) {
+        %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
+        %V = load i32 addrspace(1)* %P2
+        ret i32 %V
+; CHECK-LABEL: @test16_neg
+; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
+; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK: ret i32 %V
+}
+define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
+        %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
+        %V = load i32 addrspace(1)* %P2
+        ret i32 %V
+; CHECK-LABEL: @test16_neg2
+; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
+; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK: ret i32 %V
+}
+
 define i1 @test17(i32* %X, i1 %C) {
         %R = select i1 %C, i32* %X, i32* null           
         %RV = icmp eq i32* %R, null             
@@ -997,17 +1017,6 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
   ret <2 x i32> %select
 }
 
-; CHECK-LABEL: @select_icmp_and_8_eq_0_or_8(
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
-; CHECK-NEXT: ret i32 [[OR]]
-define i32 @select_icmp_and_8_eq_0_or_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %or = or i32 %x, 8
-  %or.x = select i1 %cmp, i32 %or, i32 %x
-  ret i32 %or.x
-}
-
 ; CHECK-LABEL: @select_icmp_and_8_ne_0_xor_8(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
 ; CHECK-NEXT: ret i32 [[AND]]
@@ -1030,27 +1039,6 @@ define i32 @select_icmp_and_8_eq_0_xor_8(i32 %x) {
   ret i32 %xor.x
 }
 
-; CHECK-LABEL: @select_icmp_and_8_ne_0_and_not_8(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
-; CHECK-NEXT: ret i32 [[AND]]
-define i32 @select_icmp_and_8_ne_0_and_not_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i32 %x, -9
-  %x.and1 = select i1 %cmp, i32 %x, i32 %and1
-  ret i32 %x.and1
-}
-
-; CHECK-LABEL: @select_icmp_and_8_eq_0_and_not_8(
-; CHECK-NEXT: ret i32 %x
-define i32 @select_icmp_and_8_eq_0_and_not_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i32 %x, -9
-  %and1.x = select i1 %cmp, i32 %and1, i32 %x
-  ret i32 %and1.x
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8(
 ; CHECK: select i1 %cmp, i64 %y, i64 %xor
 define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
@@ -1061,16 +1049,6 @@ define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
   ret i64 %y.xor
 }
 
-; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_and_not_8(
-; CHECK: select i1 %cmp, i64 %y, i64 %and1
-define i64 @select_icmp_x_and_8_eq_0_y_and_not_8(i32 %x, i64 %y) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i64 %y, -9
-  %y.and1 = select i1 %cmp, i64 %y, i64 %and1
-  ret i64 %y.and1
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8(
 ; CHECK: select i1 %cmp, i64 %xor, i64 %y
 define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
@@ -1081,16 +1059,6 @@ define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
   ret i64 %xor.y
 }
 
-; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
-; CHECK: select i1 %cmp, i64 %and1, i64 %y
-define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i64 %y, -9
-  %and1.y = select i1 %cmp, i64 %and1, i64 %y
-  ret i64 %and1.y
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8(
 ; CHECK: xor i64 %1, 8
 ; CHECK: or i64 %2, %y
@@ -1102,6 +1070,39 @@ define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
   ret i64 %or.y
 }
 
+; CHECK-LABEL: @select_icmp_and_2147483648_ne_0_xor_2147483648(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 2147483647
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_2147483648_ne_0_xor_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+; CHECK-LABEL: @select_icmp_and_2147483648_eq_0_xor_2147483648(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, -2147483648
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_2147483648_eq_0_xor_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+; CHECK-LABEL: @select_icmp_x_and_2147483648_ne_0_or_2147483648(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, -2147483648
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_x_and_2147483648_ne_0_or_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 2147483648
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
@@ -1256,7 +1257,7 @@ define i32 @test76(i1 %flag, i32* %x) {
   ret i32 %v
 }
 
-declare void @scribble_on_memory(i32*)
+declare void @scribble_on_i32(i32*)
 
 define i32 @test77(i1 %flag, i32* %x) {
 ; The load here must not be speculated around the select. One side of the
@@ -1264,13 +1265,13 @@ define i32 @test77(i1 %flag, i32* %x) {
 ; load does.
 ; CHECK-LABEL: @test77(
 ; CHECK: %[[A:.*]] = alloca i32, align 1
-; CHECK: call void @scribble_on_memory(i32* %[[A]])
+; CHECK: call void @scribble_on_i32(i32* %[[A]])
 ; CHECK: store i32 0, i32* %x
 ; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
 ; CHECK: load i32* %[[P]]
 
   %under_aligned = alloca i32, align 1
-  call void @scribble_on_memory(i32* %under_aligned)
+  call void @scribble_on_i32(i32* %under_aligned)
   store i32 0, i32* %x
   %p = select i1 %flag, i32* %under_aligned, i32* %x
   %v = load i32* %p
@@ -1327,8 +1328,8 @@ define i32 @test80(i1 %flag) {
 entry:
   %x = alloca i32
   %y = alloca i32
-  call void @scribble_on_memory(i32* %x)
-  call void @scribble_on_memory(i32* %y)
+  call void @scribble_on_i32(i32* %x)
+  call void @scribble_on_i32(i32* %y)
   %tmp = load i32* %x
   store i32 %tmp, i32* %y
   %p = select i1 %flag, i32* %x, i32* %y
@@ -1351,8 +1352,8 @@ entry:
   %y = alloca i32
   %x1 = bitcast float* %x to i32*
   %y1 = bitcast i32* %y to float*
-  call void @scribble_on_memory(i32* %x1)
-  call void @scribble_on_memory(i32* %y)
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
   %tmp = load i32* %x1
   store i32 %tmp, i32* %y
   %p = select i1 %flag, float* %x, float* %y1
@@ -1377,11 +1378,145 @@ entry:
   %y = alloca i32
   %x1 = bitcast float* %x to i32*
   %y1 = bitcast i32* %y to float*
-  call void @scribble_on_memory(i32* %x1)
-  call void @scribble_on_memory(i32* %y)
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
   %tmp = load float* %x
   store float %tmp, float* %y1
   %p = select i1 %flag, i32* %x1, i32* %y
   %v = load i32* %p
   ret i32 %v
 }
+
+declare void @scribble_on_i64(i64*)
+declare void @scribble_on_i128(i128*)
+
+define i8* @test83(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires inttoptr casts.
+; CHECK-LABEL: @test83(
+; CHECK:         %[[X:.*]] = alloca i8*
+; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
+; CHECK-DAG:     %[[X2:.*]] = bitcast i8** %[[X]] to i64*
+; CHECK-DAG:     %[[Y2:.*]] = bitcast i8** %[[Y]] to i64*
+; CHECK:         %[[V:.*]] = load i64* %[[X2]]
+; CHECK-NEXT:    store i64 %[[V]], i64* %[[Y2]]
+; CHECK-NEXT:    %[[C:.*]] = inttoptr i64 %[[V]] to i8*
+; CHECK-NEXT:    ret i8* %[[S]]
+entry:
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i64* %x1
+  store i64 %tmp, i64* %y
+  %p = select i1 %flag, i8** %x, i8** %y1
+  %v = load i8** %p
+  ret i8* %v
+}
+
+define i64 @test84(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires a ptrtoint cast.
+; CHECK-LABEL: @test84(
+; CHECK:         %[[X:.*]] = alloca i8*
+; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
+; CHECK:         %[[V:.*]] = load i8** %[[X]]
+; CHECK-NEXT:    store i8* %[[V]], i8** %[[Y]]
+; CHECK-NEXT:    %[[C:.*]] = ptrtoint i8* %[[V]] to i64
+; CHECK-NEXT:    ret i64 %[[C]]
+entry:
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i8** %x
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i64* %x1, i64* %y
+  %v = load i64* %p
+  ret i64 %v
+}
+
+define i8* @test85(i1 %flag) {
+; Test that we can't speculate the load around the select. The load of the
+; pointer doesn't load all of the stored integer bits. We could fix this, but it
+; would require endianness checks and other nastiness.
+; CHECK-LABEL: @test85(
+; CHECK:         %[[T:.*]] = load i128*
+; CHECK-NEXT:    store i128 %[[T]], i128*
+; CHECK-NEXT:    %[[X:.*]] = load i8**
+; CHECK-NEXT:    %[[Y:.*]] = load i8**
+; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i8* %[[X]], i8* %[[Y]]
+; CHECK-NEXT:    ret i8* %[[V]]
+entry:
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i128* %x2
+  store i128 %tmp, i128* %y
+  %p = select i1 %flag, i8** %x1, i8** %y1
+  %v = load i8** %p
+  ret i8* %v
+}
+
+define i128 @test86(i1 %flag) {
+; Test that we can't speculate the load around the select when the integer size
+; is larger than the pointer size. The store of the pointer doesn't store to all
+; the bits of the integer.
+;
+; CHECK-LABEL: @test86(
+; CHECK:         %[[T:.*]] = load i8**
+; CHECK-NEXT:    store i8* %[[T]], i8**
+; CHECK-NEXT:    %[[X:.*]] = load i128*
+; CHECK-NEXT:    %[[Y:.*]] = load i128*
+; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i128 %[[X]], i128 %[[Y]]
+; CHECK-NEXT:    ret i128 %[[V]]
+entry:
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i8** %x1
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i128* %x2, i128* %y
+  %v = load i128* %p
+  ret i128 %v
+}
+
+define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
+  ; CHECK-LABEL: @test_select_select0(
+  ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
+  ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
+  ; CHECK-NEXT: %[[C:.*]] = and i1 %[[C1]], %[[C0]]
+  ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
+  ; CHECK-NEXT: ret i32 %[[SEL]]
+  %c0 = icmp sge i32 %a, %v1
+  %s0 = select i1 %c0, i32 %r0, i32 %r1
+  %c1 = icmp slt i32 %a, %v2
+  %s1 = select i1 %c1, i32 %s0, i32 %r1
+  ret i32 %s1
+}
+
+define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
+  ; CHECK-LABEL: @test_select_select1(
+  ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
+  ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
+  ; CHECK-NEXT: %[[C:.*]] = or i1 %[[C1]], %[[C0]]
+  ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
+  ; CHECK-NEXT: ret i32 %[[SEL]]
+  %c0 = icmp sge i32 %a, %v1
+  %s0 = select i1 %c0, i32 %r0, i32 %r1
+  %c1 = icmp slt i32 %a, %v2
+  %s1 = select i1 %c1, i32 %r0, i32 %s0
+  ret i32 %s1
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 5586bb6..0b5b5de 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -57,7 +57,7 @@ define <4 x i32> @test5_zero_vector(<4 x i32> %A) {
 define <4 x i32> @test5_non_splat_vector(<4 x i32> %A) {
 ; CHECK-LABEL: @test5_non_splat_vector(
 ; CHECK-NOT: ret <4 x i32> undef
-  %B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  %B = lshr <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
   ret <4 x i32> %B
 }
 
@@ -84,14 +84,14 @@ define <4 x i32> @test5a_non_splat_vector(<4 x i32> %A) {
 
 define i32 @test5b() {
 ; CHECK-LABEL: @test5b(
-; CHECK: ret i32 -1
+; CHECK: ret i32 0
         %B = ashr i32 undef, 2  ;; top two bits must be equal, so not undef
         ret i32 %B
 }
 
 define i32 @test5b2(i32 %A) {
 ; CHECK-LABEL: @test5b2(
-; CHECK: ret i32 -1
+; CHECK: ret i32 0
         %B = ashr i32 undef, %A  ;; top %A bits must be equal, so not undef
         ret i32 %B
 }
@@ -738,67 +738,49 @@ define i32 @test56(i32 %x) {
 
 
 define i32 @test57(i32 %x) {
-  %shr = lshr i32 %x, 1
-  %shl = shl i32 %shr, 4
-  %and = and i32 %shl, 16
-  ret i32 %and
-; CHECK-LABEL: @test57(
-; CHECK: shl i32 %x, 3
-}
-
-define i32 @test58(i32 %x) {
-  %shr = lshr i32 %x, 1
-  %shl = shl i32 %shr, 4
-  %or = or i32 %shl, 8
-  ret i32 %or
-; CHECK-LABEL: @test58(
-; CHECK: shl i32 %x, 3
-}
-
-define i32 @test59(i32 %x) {
   %shr = ashr i32 %x, 1
   %shl = shl i32 %shr, 4
   %or = or i32 %shl, 7
   ret i32 %or
-; CHECK-LABEL: @test59(
+; CHECK-LABEL: @test57(
 ; CHECK: %shl = shl i32 %shr1, 4
 }
 
 
-define i32 @test60(i32 %x) {
+define i32 @test58(i32 %x) {
   %shr = ashr i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK-LABEL: @test60(
+; CHECK-LABEL: @test58(
 ; CHECK: ashr i32 %x, 3
 }
 
 
-define i32 @test61(i32 %x) {
+define i32 @test59(i32 %x) {
   %shr = ashr i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 2
   ret i32 %or
-; CHECK-LABEL: @test61(
+; CHECK-LABEL: @test59(
 ; CHECK: ashr i32 %x, 4
 }
 
 ; propagate "exact" trait
-define i32 @test62(i32 %x) {
+define i32 @test60(i32 %x) {
   %shr = ashr exact i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK-LABEL: @test62(
+; CHECK-LABEL: @test60(
 ; CHECK: ashr exact i32 %x, 3
 }
 
 ; PR17026
-; CHECK-LABEL: @test63(
+; CHECK-LABEL: @test61(
 ; CHECK-NOT: sh
 ; CHECK: ret
-define void @test63(i128 %arg) {
+define void @test61(i128 %arg) {
 bb:
   br i1 undef, label %bb1, label %bb12
 
@@ -830,29 +812,29 @@ bb12:                                             ; preds = %bb11, %bb8, %bb
   ret void
 }
 
-define i32 @test64(i32 %a) {
-; CHECK-LABEL: @test64(
+define i32 @test62(i32 %a) {
+; CHECK-LABEL: @test62(
 ; CHECK-NEXT: ret i32 undef
   %b = ashr i32 %a, 32  ; shift all bits out
   ret i32 %b
 }
 
-define <4 x i32> @test64_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test64_splat_vector
+define <4 x i32> @test62_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_splat_vector
 ; CHECK-NEXT: ret <4 x i32> undef
   %b = ashr <4 x i32> %a, <i32 32, i32 32, i32 32, i32 32>  ; shift all bits out
   ret <4 x i32> %b
 }
 
-define <4 x i32> @test64_non_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test64_non_splat_vector
+define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_non_splat_vector
 ; CHECK-NOT: ret <4 x i32> undef
   %b = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>  ; shift all bits out
   ret <4 x i32> %b
 }
 
-define <2 x i65> @test_65(<2 x i64> %t) {
-; CHECK-LABEL: @test_65
+define <2 x i65> @test_63(<2 x i64> %t) {
+; CHECK-LABEL: @test_63
   %a = zext <2 x i64> %t to <2 x i65>
   %sext = shl <2 x i65> %a, <i65 33, i65 33>
   %b = ashr <2 x i65> %sext, <i65 33, i65 33>
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index d700497..3a714d7 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -34,54 +34,45 @@ define i32 @test3(i16 %P) {
 ; CHECK: ret i32 %tmp.5
 }
 
-define i32 @test4(i16 %P) {
-        %tmp.1 = zext i16 %P to i32             ; <i32> [#uses=1]
-        %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
-        %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
-        ret i32 %tmp.5
-; CHECK-LABEL: @test4(
-; CHECK: %tmp.5 = sext i16 %P to i32
-; CHECK: ret i32 %tmp.5
-}
-
-define i32 @test5(i32 %x) {
+define i32 @test4(i32 %x) {
         %tmp.1 = and i32 %x, 255                ; <i32> [#uses=1]
         %tmp.2 = xor i32 %tmp.1, 128            ; <i32> [#uses=1]
         %tmp.3 = add i32 %tmp.2, -128           ; <i32> [#uses=1]
         ret i32 %tmp.3
-; CHECK-LABEL: @test5(
+; CHECK-LABEL: @test4(
 ; CHECK: %sext = shl i32 %x, 24
 ; CHECK: %tmp.3 = ashr exact i32 %sext, 24
 ; CHECK: ret i32 %tmp.3
 }
 
-define i32 @test6(i32 %x) {
+define i32 @test5(i32 %x) {
         %tmp.2 = shl i32 %x, 16         ; <i32> [#uses=1]
         %tmp.4 = ashr i32 %tmp.2, 16            ; <i32> [#uses=1]
         ret i32 %tmp.4
-; CHECK-LABEL: @test6(
+; CHECK-LABEL: @test5(
 ; CHECK: %tmp.2 = shl i32 %x, 16
 ; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
 ; CHECK: ret i32 %tmp.4
 }
 
-define i32 @test7(i16 %P) {
+define i32 @test6(i16 %P) {
   %tmp.1 = zext i16 %P to i32                     ; <i32> [#uses=1]
   %sext1 = shl i32 %tmp.1, 16                     ; <i32> [#uses=1]
   %tmp.5 = ashr i32 %sext1, 16                    ; <i32> [#uses=1]
   ret i32 %tmp.5
-; CHECK-LABEL: @test7(
+; CHECK-LABEL: @test6(
 ; CHECK: %tmp.5 = sext i16 %P to i32
 ; CHECK: ret i32 %tmp.5
 }
 
-define i32 @test8(i32 %x) nounwind readnone {
+define i32 @test7(i32 %x) nounwind readnone {
 entry:
   %shr = lshr i32 %x, 5                           ; <i32> [#uses=1]
   %xor = xor i32 %shr, 67108864                   ; <i32> [#uses=1]
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
-; CHECK-LABEL: @test8(
+; CHECK-LABEL: @test7(
 ; CHECK: %sub = ashr i32 %x, 5
 ; CHECK: ret i32 %sub
 }
+
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index bd31b89..8209778 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -1,28 +1,40 @@
-; RUN: opt < %s -instcombine -S | not grep itofp
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; CHECK-LABEL: test1
+; CHECK: ret i1 true
 define i1 @test1(i8 %A) {
   %B = sitofp i8 %A to double
   %C = fcmp ult double %B, 128.0
-  ret i1 %C  ;  True!
+  ret i1 %C
 }
+
+; CHECK-LABEL: test2
+; CHECK: ret i1 true
 define i1 @test2(i8 %A) {
   %B = sitofp i8 %A to double
   %C = fcmp ugt double %B, -128.1
-  ret i1 %C  ;  True!
+  ret i1 %C
 }
 
+; CHECK-LABEL: test3
+; CHECK: ret i1 true
 define i1 @test3(i8 %A) {
   %B = sitofp i8 %A to double
   %C = fcmp ule double %B, 127.0
-  ret i1 %C  ;  true!
+  ret i1 %C
 }
 
+; CHECK-LABEL: test4
+; CHECK: icmp ne i8 %A, 127
+; CHECK-NEXT: ret i1
 define i1 @test4(i8 %A) {
   %B = sitofp i8 %A to double
   %C = fcmp ult double %B, 127.0
-  ret i1 %C  ;  A != 127
+  ret i1 %C
 }
 
+; CHECK-LABEL: test5
+; CHECK: ret i32
 define i32 @test5(i32 %A) {
   %B = sitofp i32 %A to double
   %C = fptosi double %B to i32
@@ -31,25 +43,142 @@ define i32 @test5(i32 %A) {
   ret i32 %E
 }
 
+; CHECK-LABEL: test6
+; CHECK: and i32 %A, 39
+; CHECK-NEXT: ret i32
 define i32 @test6(i32 %A) {
-	%B = and i32 %A, 7		; <i32> [#uses=1]
-	%C = and i32 %A, 32		; <i32> [#uses=1]
-	%D = sitofp i32 %B to double		; <double> [#uses=1]
-	%E = sitofp i32 %C to double		; <double> [#uses=1]
-	%F = fadd double %D, %E		; <double> [#uses=1]
-	%G = fptosi double %F to i32		; <i32> [#uses=1]
-	ret i32 %G
+  %B = and i32 %A, 7
+  %C = and i32 %A, 32
+  %D = sitofp i32 %B to double
+  %E = sitofp i32 %C to double
+  %F = fadd double %D, %E
+  %G = fptosi double %F to i32
+  ret i32 %G
+}
+
+; CHECK-LABEL: test7
+; CHECK: ret i32
+define i32 @test7(i32 %A) nounwind {
+  %B = sitofp i32 %A to double
+  %C = fptoui double %B to i32
+  ret i32 %C
+}
+
+; CHECK-LABEL: test8
+; CHECK: ret i32
+define i32 @test8(i32 %A) nounwind {
+  %B = uitofp i32 %A to double
+  %C = fptosi double %B to i32
+  ret i32 %C
+}
+
+; CHECK-LABEL: test9
+; CHECK: zext i8
+; CHECK-NEXT: ret i32
+define i32 @test9(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; CHECK-LABEL: test10
+; CHECK: sext i8
+; CHECK-NEXT: ret i32
+define i32 @test10(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptosi float %B to i32
+  ret i32 %C
+}
+
+; If the input value is outside of the range of the output cast, it's
+; undefined behavior, so we can assume it fits.
+; CHECK-LABEL: test11
+; CHECK: trunc
+; CHECK-NEXT: ret i8
+define i8 @test11(i32 %A) nounwind {
+  %B = sitofp i32 %A to float
+  %C = fptosi float %B to i8
+  ret i8 %C
+}
+
+; If the input value is negative, it'll be outside the range of the
+; output cast, and thus undefined behavior.
+; CHECK-LABEL: test12
+; CHECK: zext i8
+; CHECK-NEXT: ret i32
+define i32 @test12(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; This can't fold because the 25-bit input doesn't fit in the mantissa.
+; CHECK-LABEL: test13
+; CHECK: uitofp
+; CHECK-NEXT: fptoui
+define i32 @test13(i25 %A) nounwind {
+  %B = uitofp i25 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; But this one can.
+; CHECK-LABEL: test14
+; CHECK: zext i24
+; CHECK-NEXT: ret i32
+define i32 @test14(i24 %A) nounwind {
+  %B = uitofp i24 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; And this one can too.
+; CHECK-LABEL: test15
+; CHECK: trunc i32
+; CHECK-NEXT: ret i24
+define i24 @test15(i32 %A) nounwind {
+  %B = uitofp i32 %A to float
+  %C = fptoui float %B to i24
+  ret i24 %C
+}
+
+; This can fold because the 25-bit input is signed and we disard the sign bit.
+; CHECK-LABEL: test16
+; CHECK: zext
+define i32 @test16(i25 %A) nounwind {
+ %B = sitofp i25 %A to float
+ %C = fptoui float %B to i32
+ ret i32 %C
+}
+
+; This can't fold because the 26-bit input won't fit the mantissa
+; even after disarding the signed bit.
+; CHECK-LABEL: test17
+; CHECK: sitofp
+; CHECK-NEXT: fptoui
+define i32 @test17(i26 %A) nounwind {
+ %B = sitofp i26 %A to float
+ %C = fptoui float %B to i32
+ ret i32 %C
 }
 
-define i32 @test7(i32 %a) nounwind {
-	%b = sitofp i32 %a to double		; <double> [#uses=1]
-	%c = fptoui double %b to i32		; <i32> [#uses=1]
-	ret i32 %c
+; This can fold because the 54-bit output is signed and we disard the sign bit.
+; CHECK-LABEL: test18
+; CHECK: trunc
+define i54 @test18(i64 %A) nounwind {
+ %B = sitofp i64 %A to double
+ %C = fptosi double %B to i54
+ ret i54 %C
 }
 
-define i32 @test8(i32 %a) nounwind {
-	%b = uitofp i32 %a to double		; <double> [#uses=1]
-	%c = fptosi double %b to i32		; <i32> [#uses=1]
-	ret i32 %c
+; This can't fold because the 55-bit output won't fit the mantissa
+; even after disarding the sign bit.
+; CHECK-LABEL: test19
+; CHECK: sitofp
+; CHECK-NEXT: fptosi
+define i55 @test19(i64 %A) nounwind {
+ %B = sitofp i64 %A to double
+ %C = fptosi double %B to i55
+ ret i55 %C
 }
 
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
new file mode 100644
index 0000000..bee219d
--- /dev/null
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These tests check the optimizations specific to
+; pointers being relocated at a statepoint.
+
+
+declare void @func()
+
+define i1 @test_negative(i32 addrspace(1)* %p) {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_negative
+; CHECK: %pnew = call i32 addrspace(1)*
+; CHECK: ret i1 %cmp
+}
+
+define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_nonnull
+; CHECK: ret i1 false
+}
+
+define i1 @test_null() {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_null
+; CHECK-NOT: %pnew
+; CHECK: ret i1 true
+}
+
+define i1 @test_undef() {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_undef
+; CHECK-NOT: %pnew
+; CHECK: ret i1 undef
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index b64c800..0bb1759 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,8 +113,8 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !4, metadata !4, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"float", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"float", !1}
+!4 = !{!"int", !1}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index 8a02529..393c5d9 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -11,46 +11,50 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 ; Check cases where slen >= strlen (src).
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
+  ret i8* %ret
 }
 
-define void @test_simplify3() {
+define i8* @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
 }
 
 ; Check cases where there are no string constants.
 
-define void @test_simplify4() {
+define i8* @test_simplify4() {
 ; CHECK-LABEL: @test_simplify4(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @stpcpy
-  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
-  ret void
+; CHECK-NEXT: %stpcpy = call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT: ret i8* %stpcpy
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
 }
 
 ; Check case where the string length is not constant.
@@ -60,10 +64,11 @@ define i8* @test_simplify5() {
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK: @__memcpy_chk
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
-; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   ret i8* %ret
 }
 
@@ -73,8 +78,9 @@ define i8* @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
-; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
-; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
+; CHECK-NEXT: %strlen = call i32 @strlen(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0))
+; CHECK-NEXT: %1 = getelementptr inbounds [60 x i8]* @a, i32 0, i32 %strlen
+; CHECK-NEXT: ret i8* %1
   %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
@@ -82,14 +88,15 @@ define i8* @test_simplify6() {
 
 ; Check case where slen < strlen (src).
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @__stpcpy_chk
-  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__stpcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8)
+  ret i8* %ret
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 8e7fec7..e3f163f 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -11,59 +11,65 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 ; Check cases where slen >= strlen (src).
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
+  ret i8* %ret
 }
 
-define void @test_simplify3() {
+define i8* @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
 }
 
 ; Check cases where there are no string constants.
 
-define void @test_simplify4() {
+define i8* @test_simplify4() {
 ; CHECK-LABEL: @test_simplify4(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @strcpy
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
-  ret void
+; CHECK-NEXT: %strcpy = call i8* @strcpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT: ret i8* %strcpy
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
 }
 
 ; Check case where the string length is not constant.
 
-define void @test_simplify5() {
+define i8* @test_simplify5() {
 ; CHECK-LABEL: @test_simplify5(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK: @__memcpy_chk
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
+; CHECK-NEXT: ret i8* %1
   %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
-  ret void
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
+  ret i8* %ret
 }
 
 ; Check case where the source and destination are the same.
@@ -72,7 +78,9 @@ define i8* @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
-; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i32 %len)
+; CHECK-NEXT: ret i8* %ret
   %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
@@ -80,14 +88,15 @@ define i8* @test_simplify6() {
 
 ; Check case where slen < strlen (src).
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @__strcpy_chk
-  call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+  ret i8* %ret
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll
index 90b4173..9242a8a 100644
--- a/test/Transforms/InstCombine/strncpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -11,56 +11,61 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 ; Check cases where dstlen >= len
 
-define void @test_simplify1() {
+define i8* @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+  ret i8* %ret
 }
 
-define void @test_simplify2() {
+define i8* @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
-  ret void
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
+  ret i8* %ret
 }
 
-define void @test_simplify3() {
+define i8* @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @strncpy
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
-  ret void
+; CHECK-NEXT: %strncpy = call i8* @strncpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 12)
+; CHECK-NEXT: ret i8* %strncpy
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+  ret i8* %ret
 }
 
 ; Check cases where dstlen < len
 
-define void @test_no_simplify1() {
+define i8* @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @__strncpy_chk
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 8, i32 4)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
+  ret i8* %ret
 }
 
-define void @test_no_simplify2() {
+define i8* @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @__strncpy_chk
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
-  ret void
+; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8, i32 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
+  ret i8* %ret
 }
 
 declare i8* @__strncpy_chk(i8*, i8*, i32, i32)
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index c80e31a..e949640 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -10,8 +10,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 %struct.test1 = type { float }
 
 ; CHECK: @test
-; CHECK: %2 = load float* %0, align 4, !tbaa !0
-; CHECK: store float %2, float* %1, align 4, !tbaa !0
+; CHECK: %[[LOAD:.*]] = load i32* %{{.*}}, align 4, !tbaa !0
+; CHECK: store i32 %[[LOAD:.*]], i32* %{{.*}}, align 4, !tbaa !0
 ; CHECK: ret
 define void @test1(%struct.test1* nocapture %a, %struct.test1* nocapture %b) {
 entry:
@@ -35,12 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
-; CHECK: !1 = metadata !{metadata !"float", metadata !2}
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
 
-!0 = metadata !{metadata !"Simple C/C++ TBAA"}
-!1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !5, metadata !5, i64 0}
-!3 = metadata !{i64 0, i64 4, metadata !2}
-!4 = metadata !{i64 0, i64 8, null}
-!5 = metadata !{metadata !"float", metadata !0}
+!0 = !{!"Simple C/C++ TBAA"}
+!1 = !{!"omnipotent char", !0}
+!2 = !{!5, !5, i64 0}
+!3 = !{i64 0, i64 4, !2}
+!4 = !{i64 0, i64 8, null}
+!5 = !{!"float", !0}
diff --git a/test/Transforms/InstCombine/type_pun.ll b/test/Transforms/InstCombine/type_pun.ll
new file mode 100644
index 0000000..33143ef
--- /dev/null
+++ b/test/Transforms/InstCombine/type_pun.ll
@@ -0,0 +1,137 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Ensure that type punning using a union of vector and same-sized array
+; generates an extract instead of a shuffle with an uncommon vector size:
+;
+;   typedef uint32_t v4i32 __attribute__((vector_size(16)));
+;   union { v4i32 v; uint32_t a[4]; };
+;
+; This cleans up behind SROA, which inserts the uncommon vector size when
+; cleaning up the alloca/store/GEP/load.
+
+
+; Extracting the zeroth element in an i32 array.
+define i32 @type_pun_zeroth(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_zeroth(
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
+; CHECK-NEXT: ret i32 %[[EXT]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Extracting the first element in an i32 array.
+define i32 @type_pun_first(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_first(
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 1
+; CHECK-NEXT: ret i32 %[[EXT]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Extracting an i32 that isn't aligned to any natural boundary.
+define i32 @type_pun_misaligned(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_misaligned(
+; CHECK-NEXT: %[[SHUF:.*]] = shufflevector <16 x i8> %in, <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %[[SHUF]] to <4 x i32>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
+; CHECK-NEXT: ret i32 %[[EXT]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Type punning to an array of pointers.
+define i32* @type_pun_pointer(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_pointer(
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
+; CHECK-NEXT: %[[I2P:.*]] = inttoptr i32 %[[EXT]] to i32*
+; CHECK-NEXT: ret i32* %[[I2P]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to i32
+  %2 = inttoptr i32 %1 to i32*
+  ret i32* %2
+}
+
+; Type punning to an array of 32-bit floating-point values.
+define float @type_pun_float(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_float(
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x float>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x float> %[[BC]], i32 0
+; CHECK-NEXT: ret float %[[EXT]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to float
+  ret float %1
+}
+
+; Type punning to an array of 64-bit floating-point values.
+define double @type_pun_double(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_double(
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <2 x double>
+; CHECK-NEXT: %[[EXT:.*]] = extractelement <2 x double> %[[BC]], i32 0
+; CHECK-NEXT: ret double %[[EXT]]
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = bitcast <8 x i8> %sroa to double
+  ret double %1
+}
+
+; Type punning to same-size floating-point and integer values.
+; Verify that multiple uses with different bitcast types are properly handled.
+define { float, i32 } @type_pun_float_i32(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_float_i32(
+; CHECK-NEXT: %[[BCI:.*]] = bitcast <16 x i8> %in to <4 x i32>
+; CHECK-NEXT: %[[EXTI:.*]] = extractelement <4 x i32> %[[BCI]], i32 0
+; CHECK-NEXT: %[[BCF:.*]] = bitcast <16 x i8> %in to <4 x float>
+; CHECK-NEXT: %[[EXTF:.*]] = extractelement <4 x float> %[[BCF]], i32 0
+; CHECK-NEXT: %1 = insertvalue { float, i32 } undef, float %[[EXTF]], 0
+; CHECK-NEXT: %2 = insertvalue { float, i32 } %1, i32 %[[EXTI]], 1
+; CHECK-NEXT: ret { float, i32 } %2
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %f = bitcast <4 x i8> %sroa to float
+  %i = bitcast <4 x i8> %sroa to i32
+  %1 = insertvalue { float, i32 } undef, float %f, 0
+  %2 = insertvalue { float, i32 } %1, i32 %i, 1
+  ret { float, i32 } %2
+}
+
+; Type punning two i32 values, with control flow.
+; Verify that the bitcast is shared and dominates usage.
+define i32 @type_pun_i32_ctrl(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_i32_ctrl(
+entry: ; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
+; CHECK-NEXT: br
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  br i1 undef, label %left, label %right
+left: ; CHECK: left:
+; CHECK-NEXT: %[[EXTL:.*]] = extractelement <4 x i32> %[[BC]], i32 0
+; CHECK-NEXT: br
+  %lhs = bitcast <4 x i8> %sroa to i32
+  br label %tail
+right: ; CHECK: right:
+; CHECK-NEXT: %[[EXTR:.*]] = extractelement <4 x i32> %[[BC]], i32 0
+; CHECK-NEXT: br
+  %rhs = bitcast <4 x i8> %sroa to i32
+  br label %tail
+tail: ; CHECK: tail:
+; CHECK-NEXT: %i = phi i32 [ %[[EXTL]], %left ], [ %[[EXTR]], %right ]
+; CHECK-NEXT: ret i32 %i
+  %i = phi i32 [ %lhs, %left ], [ %rhs, %right ]
+  ret i32 %i
+}
+
+; Extracting a type that won't fit in a vector isn't handled. The function
+; should stay the same.
+define i40 @type_pun_unhandled(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_unhandled(
+; CHECK-NEXT: %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT: %1 = bitcast <5 x i8> %sroa to i40
+; CHECK-NEXT: ret i40 %1
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
+  %1 = bitcast <5 x i8> %sroa to i40
+  ret i40 %1
+}
diff --git a/test/Transforms/InstCombine/unordered-fcmp-select.ll b/test/Transforms/InstCombine/unordered-fcmp-select.ll
new file mode 100644
index 0000000..0eb7290
--- /dev/null
+++ b/test/Transforms/InstCombine/unordered-fcmp-select.ll
@@ -0,0 +1,125 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @select_max_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_max_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK: ret float %sel
+define float @select_max_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_une(
+; CHECK:  %cmp.inv = fcmp oeq float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_une(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_ueq
+; CHECK:  %cmp.inv = fcmp one float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_ueq(float %a, float %b) {
+  %cmp = fcmp ueq float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+declare void @foo(i1)
+
+; CHECK-LABEL: @select_max_ugt_2_use_cmp(
+; CHECK: fcmp ugt
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_max_ugt_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge_2_use_cmp(
+; CHECK: fcmp uge
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_min_uge_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 41d2b29..00a029a 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -303,6 +303,33 @@ define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
   ret <2 x i64> %2
 }
 
+; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> %i
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+  ret <2 x i64> %1
+}
 
 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 3722697..c8debcb 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -1,50 +1,70 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep "xor "
+; RUN:    FileCheck %s
 ; END.
 @G1 = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
 
 define i1 @test0(i1 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: ret i1 %A
 	%B = xor i1 %A, false		; <i1> [#uses=1]
 	ret i1 %B
 }
 
 define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i32 %A
 	%B = xor i32 %A, 0		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i1 @test2(i1 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i1 false
 	%B = xor i1 %A, %A		; <i1> [#uses=1]
 	ret i1 %B
 }
 
 define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret i32 0
 	%B = xor i32 %A, %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: ret i32 -1
 	%NotA = xor i32 -1, %A		; <i32> [#uses=1]
 	%B = xor i32 %A, %NotA		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: %1 = and i32 %A, -124
+; CHECK-NEXT: ret i32 %1
 	%t1 = or i32 %A, 123		; <i32> [#uses=1]
 	%r = xor i32 %t1, 123		; <i32> [#uses=1]
 	ret i32 %r
 }
 
 define i8 @test6(i8 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i8 %A
 	%B = xor i8 %A, 17		; <i8> [#uses=1]
 	%C = xor i8 %B, 17		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i32 @test7(i32 %A, i32 %B) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %A1 = and i32 %A, 7
+; CHECK-NEXT: %B1 = and i32 %B, 128
+; CHECK-NEXT: %C11 = or i32 %A1, %B1
+; CHECK-NEXT: ret i32 %C11
 	%A1 = and i32 %A, 7		; <i32> [#uses=1]
 	%B1 = and i32 %B, 128		; <i32> [#uses=1]
 	%C1 = xor i32 %A1, %B1		; <i32> [#uses=1]
@@ -52,6 +72,8 @@ define i32 @test7(i32 %A, i32 %B) {
 }
 
 define i8 @test8(i1 %c) {
+; CHECK-LABEL: @test8(
+; CHECK: br i1 %c, label %False, label %True
 	%d = xor i1 %c, true		; <i1> [#uses=1]
 	br i1 %d, label %True, label %False
 
@@ -63,30 +85,47 @@ False:		; preds = %0
 }
 
 define i1 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %C = icmp eq i8 %A, 89
+; CHECK-NEXT: ret i1 %C
 	%B = xor i8 %A, 123		; <i8> [#uses=1]
 	%C = icmp eq i8 %B, 34		; <i1> [#uses=1]
 	ret i1 %C
 }
 
 define i8 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %B = and i8 %A, 3
+; CHECK-NEXT: %C1 = or i8 %B, 4
+; CHECK-NEXT: ret i8 %C1
 	%B = and i8 %A, 3		; <i8> [#uses=1]
 	%C = xor i8 %B, 4		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i8 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: %B = and i8 %A, -13
+; CHECK-NEXT: %1 = or i8 %B, 8
+; CHECK-NEXT: ret i8 %1
 	%B = or i8 %A, 12		; <i8> [#uses=1]
 	%C = xor i8 %B, 4		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i1 @test12(i8 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %c = icmp ne i8 %A, 4
+; CHECK-NEXT: ret i1 %c
 	%B = xor i8 %A, 4		; <i8> [#uses=1]
 	%c = icmp ne i8 %B, 0		; <i1> [#uses=1]
 	ret i1 %c
 }
 
 define i1 @test13(i8 %A, i8 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = icmp ne i8 %A, %B
+; CHECK-NEXT: ret i1 %1
 	%C = icmp ult i8 %A, %B		; <i1> [#uses=1]
 	%D = icmp ugt i8 %A, %B		; <i1> [#uses=1]
 	%E = xor i1 %C, %D		; <i1> [#uses=1]
@@ -94,6 +133,8 @@ define i1 @test13(i8 %A, i8 %B) {
 }
 
 define i1 @test14(i8 %A, i8 %B) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: ret i1 true
 	%C = icmp eq i8 %A, %B		; <i1> [#uses=1]
 	%D = icmp ne i8 %B, %A		; <i1> [#uses=1]
 	%E = xor i1 %C, %D		; <i1> [#uses=1]
@@ -101,36 +142,54 @@ define i1 @test14(i8 %A, i8 %B) {
 }
 
 define i32 @test15(i32 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %C = sub i32 0, %A
+; CHECK-NEXT: ret i32 %C
 	%B = add i32 %A, -1		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test16(i32 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %C = sub i32 -124, %A
+; CHECK-NEXT: ret i32 %C
 	%B = add i32 %A, 123		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %C = add i32 %A, -124
+; CHECK-NEXT: ret i32 %C
 	%B = sub i32 123, %A		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: %C = add i32 %A, 124
+; CHECK-NEXT: ret i32 %C
 	%B = xor i32 %A, -1		; <i32> [#uses=1]
 	%C = sub i32 123, %B		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test19(i32 %A, i32 %B) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: ret i32 %B
 	%C = xor i32 %A, %B		; <i32> [#uses=1]
 	%D = xor i32 %C, %A		; <i32> [#uses=1]
 	ret i32 %D
 }
 
 define void @test20(i32 %A, i32 %B) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: store i32 %B, i32* @G1
+; CHECK-NEXT: store i32 %A, i32* @G2
+; CHECK-NEXT: ret void
 	%tmp.2 = xor i32 %B, %A		; <i32> [#uses=2]
 	%tmp.5 = xor i32 %tmp.2, %B		; <i32> [#uses=2]
 	%tmp.8 = xor i32 %tmp.5, %tmp.2		; <i32> [#uses=1]
@@ -140,12 +199,18 @@ define void @test20(i32 %A, i32 %B) {
 }
 
 define i32 @test21(i1 %C, i32 %A, i32 %B) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %D = select i1 %C, i32 %B, i32 %A
+; CHECK-NEXT: ret i32 %D
 	%C2 = xor i1 %C, true		; <i1> [#uses=1]
 	%D = select i1 %C2, i32 %A, i32 %B		; <i32> [#uses=1]
 	ret i32 %D
 }
 
 define i32 @test22(i1 %X) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %1 = zext i1 %X to i32
+; CHECK-NEXT: ret i32 %1
 	%Y = xor i1 %X, true		; <i1> [#uses=1]
 	%Z = zext i1 %Y to i32		; <i32> [#uses=1]
 	%Q = xor i32 %Z, 1		; <i32> [#uses=1]
@@ -153,18 +218,27 @@ define i32 @test22(i1 %X) {
 }
 
 define i1 @test23(i32 %a, i32 %b) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %tmp.4 = icmp eq i32 %b, 0
+; CHECK-NEXT: ret i1 %tmp.4
 	%tmp.2 = xor i32 %b, %a		; <i32> [#uses=1]
 	%tmp.4 = icmp eq i32 %tmp.2, %a		; <i1> [#uses=1]
 	ret i1 %tmp.4
 }
 
 define i1 @test24(i32 %c, i32 %d) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %tmp.4 = icmp ne i32 %d, 0
+; CHECK-NEXT: ret i1 %tmp.4
 	%tmp.2 = xor i32 %d, %c		; <i32> [#uses=1]
 	%tmp.4 = icmp ne i32 %tmp.2, %c		; <i1> [#uses=1]
 	ret i1 %tmp.4
 }
 
 define i32 @test25(i32 %g, i32 %h) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %tmp4 = and i32 %h, %g
+; CHECK-NEXT: ret i32 %tmp4
 	%h2 = xor i32 %h, -1		; <i32> [#uses=1]
 	%tmp2 = and i32 %h2, %g		; <i32> [#uses=1]
 	%tmp4 = xor i32 %tmp2, %g		; <i32> [#uses=1]
@@ -172,6 +246,9 @@ define i32 @test25(i32 %g, i32 %h) {
 }
 
 define i32 @test26(i32 %a, i32 %b) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %tmp4 = and i32 %a, %b
+; CHECK-NEXT: ret i32 %tmp4
 	%b2 = xor i32 %b, -1		; <i32> [#uses=1]
 	%tmp2 = xor i32 %a, %b2		; <i32> [#uses=1]
 	%tmp4 = and i32 %tmp2, %a		; <i32> [#uses=1]
@@ -179,6 +256,10 @@ define i32 @test26(i32 %a, i32 %b) {
 }
 
 define i32 @test27(i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %tmp = icmp eq i32 %b, %c
+; CHECK-NEXT: %tmp6 = zext i1 %tmp to i32
+; CHECK-NEXT: ret i32 %tmp6
 	%tmp2 = xor i32 %d, %b		; <i32> [#uses=1]
 	%tmp5 = xor i32 %d, %c		; <i32> [#uses=1]
 	%tmp = icmp eq i32 %tmp2, %tmp5		; <i1> [#uses=1]
@@ -187,6 +268,9 @@ define i32 @test27(i32 %b, i32 %c, i32 %d) {
 }
 
 define i32 @test28(i32 %indvar) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %tmp214 = add i32 %indvar, 1
+; CHECK-NEXT: ret i32 %tmp214
 	%tmp7 = add i32 %indvar, -2147483647		; <i32> [#uses=1]
 	%tmp214 = xor i32 %tmp7, -2147483648		; <i32> [#uses=1]
 	ret i32 %tmp214