60 files changed, 3481 insertions, 467 deletions
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f9bd9e..6259893 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1646
 
-@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
 define weak i32 @pthread_cancel(i32) {
        ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index c7cef75..3793a86 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1678
 
-@A = alias weak void ()* @B		; <void ()*> [#uses=1]
+@A = weak alias void ()* @B		; <void ()*> [#uses=1]
 
 define weak void @B() {
        ret void
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index fe935f9..656fb34 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | not grep call
-; RUN: opt < %s -std-compile-opts -S | not grep xyz
+; RUN: opt < %s -O3 -S | not grep xyz
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
deleted file mode 100644
index 917d3d9..0000000
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "ret i.* 0" | count 2
-; PR2048
-
-define i32 @i(i32 %a) {
-  %tmp1 = sdiv i32 %a, -1431655765
-  %tmp2 = sdiv i32 %tmp1, 3
-  ret i32 %tmp2
-}
-
-define i8 @j(i8 %a) {
-  %tmp1 = sdiv i8 %a, 64
-  %tmp2 = sdiv i8 %tmp1, 3
-  ret i8 %tmp2
-}
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index a75a465..895b260 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -15,7 +15,7 @@ define void @fu1(i32 %parm) nounwind ssp {
 
 ; <label>:4                                       ; preds = %0
   %5 = load i32* %1, align 4
-  %6 = mul nsw i32 %5, 8
+  %6 = shl nsw i32 %5, 3
 ; With "nsw", the alloca and its bitcast can be fused:
   %7 = add nsw i32 %6, 2048
 ;  CHECK: alloca double
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index d7eac4b..a166e5f 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -219,7 +219,7 @@ define i16 @mul_add_to_mul_1(i16 %x) {
  %add2 = add nsw i16 %x, %mul1
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_1(
-; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: %add2 = mul i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -228,7 +228,7 @@ define i16 @mul_add_to_mul_2(i16 %x) {
  %add2 = add nsw i16 %mul1, %x
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_2(
-; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: %add2 = mul i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -248,7 +248,7 @@ define i16 @mul_add_to_mul_4(i16 %a) {
  %add = add nsw i16 %mul1, %mul2
  ret i16 %add
 ; CHECK-LABEL: @mul_add_to_mul_4(
-; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: %add = mul i16 %a, 9
 ; CHECK-NEXT: ret i16 %add
 }
 
@@ -313,3 +313,43 @@ define i16 @add_cttz_2(i16 %a) {
   ret i16 %b
 }
 !1 = metadata !{i16 0, i16 32}
+
+define i32 @add_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_or_and(
+; CHECK-NEXT: add i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nsw_or_and(
+; CHECK-NEXT: add nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_or_and(
+; CHECK-NEXT: add nuw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_nsw_or_and(
+; CHECK-NEXT: add nuw nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
deleted file mode 100644
index f9b7e3b..0000000
--- a/test/Transforms/InstCombine/add4.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define float @test1(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) -> select C, 0, A
-  %cf = uitofp i1 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test1(
-; CHECK: select i1 %C, float -0.000000e+00, float %A
-}
-
-define float @test2(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; B*(uitofp i1 C) -> select C, B, 0
-  %cf = uitofp i1 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test2(
-; CHECK: select i1 %C, float %B, float -0.000000e+00
-}
-
-define float @test3(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  select C, 0, B + select C, A, 0 -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %s1 = select i1 %C, float 0.000000e+00, float %B
-  %s2 = select i1 %C, float %A, float 0.000000e+00
-  %sum = fadd fast float %s1, %s2
-  ret float %sum
-; CHECK-LABEL: @test3(
-; CHECK: select i1 %C, float %A, float %B
-}
-
-define float @test4(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p2, %p1
-  ret float %s1
-; CHECK-LABEL: @test4(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-define float @test5(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p1, %p2
-  ret float %s1
-; CHECK-LABEL: @test5(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-; PR15952
-define float @test6(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test6(
-; CHECK: uitofp
-}
-
-define float @test7(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test7(
-; CHECK: uitofp
-}
-
-define <4 x float> @test8(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p2, %p1
-  ret <4 x float> %s1
-; CHECK-LABEL: @test8(
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
-
-define <4 x float> @test9(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p1, %p2
-  ret <4 x float> %s1
-; CHECK-LABEL: @test9
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
diff --git a/test/Transforms/InstCombine/align-attr.ll b/test/Transforms/InstCombine/align-attr.ll
new file mode 100644
index 0000000..9f366bf
--- /dev/null
+++ b/test/Transforms/InstCombine/align-attr.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* align 32 %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK: ret i32
+}
+
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index e88fd59..96b535d 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -45,7 +45,7 @@ define <4 x i32> @test5(<4 x i32> %A) {
 
 ; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
 define i32 @test6(i64 %x) nounwind {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: add i64 %x, 1
 ; CHECK-NEXT: icmp ugt i64 %x.off, 1
   %cmp1 = icmp ne i64 %x, -1
@@ -54,3 +54,26 @@ define i32 @test6(i64 %x) nounwind {
   %land.ext = zext i1 %.cmp1 to i32
   ret i32 %land.ext
 }
+
+define i1 @test7(i32 %i, i1 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], %b
+; CHECK-NEXT: ret i1 [[AND]]
+  %cmp1 = icmp slt i32 %i, 1
+  %cmp2 = icmp sgt i32 %i, -1
+  %and1 = and i1 %cmp1, %b
+  %and2 = and i1 %and1, %cmp2
+  ret i1 %and2
+}
+
+define i1 @test8(i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[DEC:%.*]] = add i32 %i, -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[DEC]], 13
+; CHECK-NEXT: ret i1 [[CMP]]
+  %cmp1 = icmp ne i32 %i, 0
+  %cmp2 = icmp ult i32 %i, 14
+  %cond = and i1 %cmp1, %cmp2
+  ret i1 %cond
+}
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index df8ec52..3b69c17 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -95,12 +95,6 @@ define i1024 @test14(i1024 %A) {
 	ret i1024 %D
 }
 
-define i14 @test15(i14 %A, i14 %B) {
-	%C = sub i14 0, %A		; <i14> [#uses=1]
-	%D = srem i14 %B, %C		; <i14> [#uses=1]
-	ret i14 %D
-}
-
 define i51 @test16(i51 %A) {
 	%X = sdiv i51 %A, 1123		; <i51> [#uses=1]
 	%Y = sub i51 0, %X		; <i51> [#uses=1]
diff --git a/test/Transforms/InstCombine/ashr-nop.ll b/test/Transforms/InstCombine/ashr-nop.ll
deleted file mode 100644
index 870ede3..0000000
--- a/test/Transforms/InstCombine/ashr-nop.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep ashr
-
-define i32 @foo(i32 %x) {
-  %o = and i32 %x, 1
-  %n = add i32 %o, -1
-  %t = ashr i32 %n, 17
-  ret i32 %t
-}
diff --git a/test/Transforms/InstCombine/assume-loop-align.ll b/test/Transforms/InstCombine/assume-loop-align.ll
new file mode 100644
index 0000000..19190de
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-loop-align.ll
@@ -0,0 +1,47 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %a, i32* %b) #0 {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 63
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr2 = and i64 %ptrint1, 63
+  %maskcond3 = icmp eq i64 %maskedptr2, 0
+  tail call void @llvm.assume(i1 %maskcond3)
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{.*}} align 64
+; CHECK: store i32 {{.*}}  align 64
+; CHECK: ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx5 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 1648
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
new file mode 100644
index 0000000..81fe094
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -0,0 +1,55 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { double* }
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 {
+
+; CHECK-LABEL: @_Z3fooR1s
+; CHECK: call void @llvm.assume
+; CHECK-NOT: call void @llvm.assume
+
+entry:
+  %a = getelementptr inbounds %struct.s* %x, i64 0, i32 0
+  %0 = load double** %a, align 8
+  %ptrint = ptrtoint double* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds double* %0, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 16
+  %add = fadd double %1, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul = fmul double %add, 2.000000e+00
+  store double %mul, double* %arrayidx, align 16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx.1 = getelementptr inbounds double* %0, i64 %indvars.iv.next
+  %2 = load double* %arrayidx.1, align 8
+  %add.1 = fadd double %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul.1 = fmul double %add.1, 2.000000e+00
+  store double %mul.1, double* %arrayidx.1, align 8
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
+  %exitcond.1 = icmp eq i64 %indvars.iv.next, 1599
+  br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll
new file mode 100644
index 0000000..7e45c04
--- /dev/null
+++ b/test/Transforms/InstCombine/assume.ll
@@ -0,0 +1,265 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+
+; Check that the alignment has been upgraded and that the assume has not
+; been removed:
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @foo2(i32* %a) #0 {
+entry:
+; Same check as in @foo1, but make sure it works if the assume is first too.
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  %0 = load i32* %a, align 4
+  ret i32 %0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+define i32 @simple(i32 %a) #1 {
+entry:
+
+; CHECK-LABEL: @simple
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %cmp = icmp eq i32 %a, 4
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %a
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can1(i1 %a, i1 %b, i1 %c) {
+entry:
+  %and1 = and i1 %a, %b
+  %and  = and i1 %and1, %c
+  tail call void @llvm.assume(i1 %and)
+
+; CHECK-LABEL: @can1
+; CHECK: call void @llvm.assume(i1 %a)
+; CHECK: call void @llvm.assume(i1 %b)
+; CHECK: call void @llvm.assume(i1 %c)
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can2(i1 %a, i1 %b, i1 %c) {
+entry:
+  %v = or i1 %a, %b
+  %w = xor i1 %v, 1
+  tail call void @llvm.assume(i1 %w)
+
+; CHECK-LABEL: @can2
+; CHECK: %[[V1:[^ ]+]] = xor i1 %a, true
+; CHECK: call void @llvm.assume(i1 %[[V1]])
+; CHECK: %[[V2:[^ ]+]] = xor i1 %b, true
+; CHECK: call void @llvm.assume(i1 %[[V2]])
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @bar1(i32 %a) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; CHECK-LABEL: @bar1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @bar2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; Don't be fooled by other assumes around.
+; CHECK-LABEL: @bar3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  tail call void @llvm.assume(i1 %x)
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  tail call void @llvm.assume(i1 %y)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar4(i32 %a, i32 %b) {
+entry:
+  %and1 = and i32 %b, 3
+
+; CHECK-LABEL: @bar4
+; CHECK: call void @llvm.assume
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %cmp2 = icmp eq i32 %a, %b
+  tail call void @llvm.assume(i1 %cmp2)
+
+  ret i32 %and1
+}
+
+define i32 @icmp1(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+
+; CHECK-LABEL: @icmp1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @icmp2(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %0 = zext i1 %cmp to i32
+  %lnot.ext = xor i32 %0, 1
+  ret i32 %lnot.ext
+
+; CHECK-LABEL: @icmp2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+}
+
+declare void @escape(i32* %a)
+
+; Do we canonicalize a nonnull assumption on a load into
+; metadata form?
+define i1 @nonnull1(i32** %a) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  tail call void @escape(i32* %load)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull1
+; CHECK: !nonnull
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret i1 false
+}
+
+; Make sure the above canonicalization applies only
+; to pointer types.  Doing otherwise would be illegal.
+define i1 @nonnull2(i32* %a) {
+entry:
+  %load = load i32* %a
+  %cmp = icmp ne i32 %load, 0
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32 %load, 0
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull2
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the assume is control dependent on something else
+define i1 @nonnull3(i32** %a, i1 %control) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  br i1 %control, label %taken, label %not_taken
+taken:
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+not_taken:
+  ret i1 true
+
+; CHECK-LABEL: @nonnull3
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the path from the load to the assume is potentially 
+; interrupted by an exception being thrown
+define i1 @nonnull4(i32** %a) {
+entry:
+  %load = load i32** %a
+  ;; This call may throw!
+  tail call void @escape(i32* %load)
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull4
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll
new file mode 100644
index 0000000..c41bbaa
--- /dev/null
+++ b/test/Transforms/InstCombine/assume2.ll
@@ -0,0 +1,174 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %and = and i32 %a, 15
+  %cmp = icmp eq i32 %and, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %and = and i32 %a, 15
+  %nand = xor i32 %and, -1
+  %cmp = icmp eq i32 %nand, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test3(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = or i32 %a, 4294967280
+  %cmp = icmp eq i32 %v, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test4(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test4
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %v = or i32 %a, 4294967280
+  %nv = xor i32 %v, -1
+  %cmp = icmp eq i32 %nv, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test5(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test5
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %v = xor i32 %a, 1
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test6(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test6
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = shl i32 %a, 2
+  %cmp = icmp eq i32 %v, 20
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 63
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test7(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test7
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test8(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test8
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test9(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test9
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test10(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test10
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 -2147483648
+
+  %cmp = icmp sle i32 %a, -2
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test11(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test11
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp ule i32 %a, 256
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3072
+  ret i32 %and1
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll
index ccee874..98cecef 100644
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Check transforms involving atomic operations
 
-define i32* @test1(i8** %p) {
-; CHECK-LABEL: define i32* @test1(
-; CHECK: load atomic i8** %p monotonic, align 8
-  %c = bitcast i8** %p to i32**
-  %r = load atomic i32** %c monotonic, align 8
-  ret i32* %r
-}
-
 define i32 @test2(i32* %p) {
 ; CHECK-LABEL: define i32 @test2(
 ; CHECK: %x = load atomic i32* %p seq_cst, align 4
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index a6b56f9..bc36b25 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -90,7 +90,8 @@ entry:
 define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar
-; CHECK: bitcast float %tmp to i32
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast i32 %call to float
@@ -104,7 +105,8 @@ entry:
 define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector
-; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast <2 x i32> %call to <2 x float>
@@ -118,7 +120,8 @@ entry:
 define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
-; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64*
 ; CHECK: %call = call i64 @func_i64
 ; CHECK: bitcast i64 %call to <2 x float>
   %tmp = load <2 x float>* %source, align 8
@@ -130,7 +133,8 @@ entry:
 define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
-; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
 ; CHECK: bitcast <2 x float> %call to i64
   %tmp = load i64* %source, align 8
@@ -142,7 +146,8 @@ entry:
 define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
-; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
 ; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
   %tmp = load <2 x i64*>* %source, align 8
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 0cbfbb0..578b16d 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -354,6 +354,24 @@ define i32* @test41(i32* %tmp1) {
 ; CHECK: ret i32* %tmp1
 }
 
+define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %tmp1) {
+  %tmp64 = addrspacecast i32* %tmp1 to { i32 } addrspace(1)*
+  %tmp65 = getelementptr { i32 } addrspace(1)* %tmp64, i32 0, i32 0
+  ret i32 addrspace(1)* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_smaller(
+; CHECK: addrspacecast i32* %tmp1 to i32 addrspace(1)*
+; CHECK-NEXT: ret i32 addrspace(1)*
+}
+
+define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %tmp1) {
+  %tmp64 = addrspacecast i32 addrspace(1)* %tmp1 to { i32 }*
+  %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
+  ret i32* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_larger(
+; CHECK: addrspacecast i32 addrspace(1)* %tmp1 to i32*
+; CHECK-NEXT: ret i32*
+}
+
 define i32 @test42(i32 %X) {
         %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
         %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
@@ -792,7 +810,7 @@ define double @test71(double *%p, i64 %i) {
 
 define double @test72(double *%p, i32 %i) {
 ; CHECK-LABEL: @test72(
-  %so = mul nsw i32 %i, 8
+  %so = shl nsw i32 %i, 3
   %o = sext i32 %so to i64
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast double* %p to i8*
@@ -807,7 +825,7 @@ define double @test72(double *%p, i32 %i) {
 
 define double @test73(double *%p, i128 %i) {
 ; CHECK-LABEL: @test73(
-  %lo = mul nsw i128 %i, 8
+  %lo = shl nsw i128 %i, 3
   %o = trunc i128 %lo to i64
 ; CHECK-NEXT: trunc i128 %i to i64
   %q = bitcast double* %p to i8*
@@ -919,7 +937,7 @@ define %s @test79(%s *%p, i64 %i, i32 %j) {
 
 define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-LABEL: @test80(
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast [100 x double]* %p to i8*
   %pp = getelementptr i8* %q, i32 %tmp
@@ -936,7 +954,7 @@ define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* %p
 ; CHECK-NEXT: load double addrspace(1)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
@@ -950,7 +968,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: addrspacecast double addrspace(1)*
 ; CHECK-NEXT: load double addrspace(3)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
@@ -960,7 +978,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 
 define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
 ; CHECK-LABEL: @test80_as1(
-  %tmp = mul nsw i16 %i, 8
+  %tmp = shl nsw i16 %i, 3
 ; CHECK-NEXT: sext i16 %i to i32
   %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
   %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
@@ -1004,7 +1022,74 @@ define i64 @test83(i16 %a, i64 %k) {
   ret i64 %sh_prom1
 
 ; CHECK-LABEL: @test83(
-; CHECK: %sub = add nsw i64 %k, 4294967295
+; CHECK: %sub = add i64 %k, 4294967295
 ; CHECK: %sh_prom = trunc i64 %sub to i32
 ; CHECK: %shl = shl i32 %conv, %sh_prom
 }
+
+define i8 @test84(i32 %a) {
+  %add = add nsw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test84(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+define i8 @test85(i32 %a) {
+  %add = add nuw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test85(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+; Overflow on a float to int or int to float conversion is undefined (PR21130).
+
+define i8 @overflow_fptosi() {
+  %i = fptosi double 1.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptosi(
+; CHECK-NEXT: ret i8 undef 
+}
+
+define i8 @overflow_fptoui() {
+  %i = fptoui double 2.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptoui(
+; CHECK-NEXT: ret i8 undef 
+}
+
+; The maximum float is approximately 2 ** 128 which is 3.4E38. 
+; The constant below is 4E38. Use a 130 bit integer to hold that
+; number; 129-bits for the value + 1 bit for the sign.
+define float @overflow_uitofp() {
+  %i = uitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_uitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define float @overflow_sitofp() {
+  %i = sitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_sitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define i32 @PR21388(i32* %v) {
+  %icmp = icmp slt i32* %v, null
+  %sext = sext i1 %icmp to i32
+  ret i32 %sext
+; CHECK-LABEL: @PR21388(
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i32* %v, null
+; CHECK-NEXT: %[[sext:.*]] = sext i1 %[[icmp]] to i32
+; CHECK-NEXT: ret i32 %[[sext]]
+}
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 7fac78a..bb61f02 100644
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() {
   ret i32 %a
 }
 
-define <4 x i32> @constant_fold_bitcast_vector_as() {
+define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
 ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-; CHECK: bitcast <4 x float> %1 to <4 x i32>
-  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
-  ret <4 x i32> %a
+  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
 }
 
 @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
new file mode 100644
index 0000000..13da0f4
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
+
+@G1 = global i32 42, align 1
+@G2 = global i32 42
+@G3 = global [4 x i8] zeroinitializer, align 1
+
+@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+
+define i64 @f1() {
+; This cannot be constant folded because G1 is underaligned.
+; CHECK-LABEL: @f1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @G1 to i64), i64 1)
+}
+
+define i64 @f2() {
+; The preferred alignment for G2 allows this one to foled to zero.
+; CHECK-LABEL: @f2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @G2 to i64), i64 1)
+}
+
+define i64 @g1() {
+; This cannot be constant folded because A1 aliases G3 which is underalaigned.
+; CHECK-LABEL: @g1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @A1 to i64), i64 1)
+}
+
+define i64 @g2() {
+; While A2 also aliases G3 which is underaligned, the math of A2 forces a
+; certain alignment allowing this to fold to zero.
+; CHECK-LABEL: @g2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @A2 to i64), i64 1)
+}
+
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
index 14377df..ce8d337 100644
--- a/test/Transforms/InstCombine/constant-fold-math.ll
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -7,6 +7,7 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
 declare double @llvm.fma.f64(double, double, double) #0
 declare double @llvm.fmuladd.f64(double, double, double) #0
 
+declare double @llvm.sqrt.f64(double) #0
 
 
 ; CHECK-LABEL: @constant_fold_fma_f32
@@ -44,4 +45,12 @@ define double @constant_fold_fmuladd_f64() #0 {
   ret double %x
 }
 
+; The sqrt intrinsic is undefined for negative inputs besides -0.0.
+; CHECK-LABEL: @bad_sqrt
+; CHECK-NEXT: ret double undef
+define double @bad_sqrt() {
+  %x = call double @llvm.sqrt.f64(double -2.000000e+00)
+  ret double %x
+}
+
 attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 2e3785f..309843f 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -15,14 +15,14 @@ declare i32 @printf(i8*, ...)
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\004\000\001\000\006\000\000\000", metadata !8, metadata !1, metadata !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{metadata !"0x29", metadata !8} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang\001\00\000\00\000", metadata !8, metadata !4, metadata !4, metadata !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !8, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{metadata !"0xb\004\0012\000", metadata !8, metadata !0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 75082dc..a7a491e 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
@@ -14,11 +14,11 @@ entry:
   store i8* %__dest, i8** %__dest.addr, align 8
 ; CHECK-NOT: call void @llvm.dbg.declare
 ; CHECK: call void @llvm.dbg.value
-  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0), !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0, metadata !{}), !dbg !16
   store i32 %__val, i32* %__val.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7), !dbg !18
+  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7, metadata !{}), !dbg !18
   store i64 %__len, i64* %__len.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9), !dbg !20
+  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9, metadata !{}), !dbg !20
   %tmp = load i8** %__dest.addr, align 8, !dbg !21
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
@@ -31,29 +31,29 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
-!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x101\00__dest\0016777294\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{metadata !"0x2e\00foobar\00foobar\00\0079\001\001\000\006\00256\001\0079", metadata !27, metadata !2, metadata !4, null, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
+!2 = metadata !{metadata !"0x29", metadata !27} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x11\0012\00clang version 3.0 (trunk 127710)\001\00\000\00\000", metadata !28, metadata !29, metadata !29, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !27, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786468, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"size_t", i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
-!11 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"__darwin_size_t", i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786468, null, metadata !3, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, metadata !3, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{metadata !"0x101\00__val\0033554510\000", metadata !1, metadata !2, metadata !8} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !3} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !"0x101\00__len\0050331726\000", metadata !1, metadata !2, metadata !10} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{metadata !"0x16\00size_t\0080\000\000\000\000", metadata !27, metadata !3, metadata !11} ; [ DW_TAG_typedef ]
+!11 = metadata !{metadata !"0x16\00__darwin_size_t\0090\000\000\000\000", metadata !27, metadata !3, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{metadata !"0x24\00long unsigned int\000\0064\0064\000\000\007", null, metadata !3} ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 78, i32 28, metadata !1, null}
 !18 = metadata !{i32 78, i32 40, metadata !1, null}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !27, metadata !23, i32 80, i32 3, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 786443, metadata !27, metadata !1, i32 79, i32 1, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{metadata !"0xb\0080\003\007", metadata !27, metadata !23} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{metadata !"0xb\0079\001\006", metadata !27, metadata !1} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{metadata !1}
 !25 = metadata !{metadata !0, metadata !7, metadata !9}
-!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!26 = metadata !{metadata !"0x29", metadata !28} ; [ DW_TAG_file_type ]
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
 !29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll
index 7990fdb..4656837 100644
--- a/test/Transforms/InstCombine/descale-zero.ll
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define internal i8* @descale_zero() {
 entry:
-; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
-; CHECK-NEXT: bitcast i16*
+; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
 ; CHECK-NEXT: ret i8*
   %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
   %num = load i64* inttoptr (i64 64 to i64*), align 64
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
deleted file mode 100644
index 9c7cf5d..0000000
--- a/test/Transforms/InstCombine/devirt.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-; CHECK-NOT: getelementptr
-; CHECK-NOT: ptrtoint
-; CHECK: bitcast i8*
-%struct.S = type { i32 (...)** }
-
-@_ZL1p = internal constant { i64, i64 } { i64 1, i64 0 }, align 8
-
-define void @_Z1g1S(%struct.S* %s) nounwind {
-entry:
-  %tmp = load { i64, i64 }* @_ZL1p, align 8
-  %memptr.adj = extractvalue { i64, i64 } %tmp, 1
-  %0 = bitcast %struct.S* %s to i8*
-  %1 = getelementptr inbounds i8* %0, i64 %memptr.adj
-  %this.adjusted = bitcast i8* %1 to %struct.S*
-  %memptr.ptr = extractvalue { i64, i64 } %tmp, 0
-  %2 = and i64 %memptr.ptr, 1
-  %memptr.isvirtual = icmp ne i64 %2, 0
-  br i1 %memptr.isvirtual, label %memptr.virtual, label %memptr.nonvirtual
-
-memptr.virtual:                                   ; preds = %entry
-  %3 = bitcast %struct.S* %this.adjusted to i8**
-  %memptr.vtable = load i8** %3
-  %4 = sub i64 %memptr.ptr, 1
-  %5 = getelementptr i8* %memptr.vtable, i64 %4
-  %6 = bitcast i8* %5 to void (%struct.S*)**
-  %memptr.virtualfn = load void (%struct.S*)** %6
-  br label %memptr.end
-
-memptr.nonvirtual:                                ; preds = %entry
-  %memptr.nonvirtualfn = inttoptr i64 %memptr.ptr to void (%struct.S*)*
-  br label %memptr.end
-
-memptr.end:                                       ; preds = %memptr.nonvirtual, %memptr.virtual
-  %7 = phi void (%struct.S*)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
-  call void %7(%struct.S* %this.adjusted)
-  ret void
-}
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 9c7ba9b..2841043 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -132,11 +132,11 @@ define i32 @test15(i32 %a, i32 %b) nounwind {
 }
 
 define <2 x i64> @test16(<2 x i64> %x) nounwind {
-  %shr = lshr <2 x i64> %x, <i64 3, i64 5>
-  %div = udiv <2 x i64> %shr, <i64 4, i64 6>
+  %shr = lshr <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv <2 x i64> %shr, <i64 6, i64 6>
   ret <2 x i64> %div
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT: udiv <2 x i64> %x, <i64 32, i64 192>
+; CHECK-NEXT: udiv <2 x i64> %x, <i64 192, i64 192>
 ; CHECK-NEXT: ret <2 x i64>
 }
 
@@ -175,3 +175,114 @@ define i32 @test20(i32 %x) {
 ; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
 ; CHECK-NEXT: ret i32
 }
+
+define i32 @test21(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %div = sdiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test22(i32 %a) {
+  %mul = mul nsw i32 %a, 3
+  %div = sdiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %div = sdiv i32 %a, 4
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test23(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %div = udiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test24(i32 %a) {
+  %mul = mul nuw i32 %a, 3
+  %div = udiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %div = lshr i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test25(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %div = shl i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test26(i32 %a) {
+  %mul = mul nsw i32 %a, 12
+  %div = sdiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %div = shl i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test27(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %div = shl nuw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test28(i32 %a) {
+  %mul = mul nuw i32 %a, 36
+  %div = udiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %div = mul nuw i32 %a, 12
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test29(i32 %a) {
+  %mul = shl nsw i32 %a, 31
+  %div = sdiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: %[[and:.*]] = and i32 %a, 1
+; CHECK-NEXT: ret i32 %[[and]]
+}
+
+define i32 @test30(i32 %a) {
+  %mul = shl nuw i32 %a, 31
+  %div = udiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: ret i32 %a
+}
+
+define <2 x i32> @test31(<2 x i32> %x) {
+  %shr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %div
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: ret <2 x i32>
+}
+
+define i32 @test32(i32 %a, i32 %b) {
+  %shl = shl i32 2, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: %[[shl:.*]] = shl i32 2, %b
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[shl]], 2
+; CHECK-NEXT: %[[div:.*]] = udiv i32 %a, %[[shr]]
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index d958470..63a02bb 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,349 +1,366 @@
-; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define float @acos_test(float %f) nounwind readnone {
-; CHECK: acos_test
+; Check for and against shrinkage when using the
+; unsafe-fp-math function attribute on a math lib
+; function. This optimization may be overridden by
+; the -enable-double-float-shrink option.
+; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
+
+define float @acos_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acos_test
 ; CHECK: call float @acosf(float %f)
 }
 
-define double @acos_test2(float %f) nounwind readnone {
-; CHECK: acos_test2
+define double @acos_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    ret double %call
+; CHECK-LABEL: acos_test2
 ; CHECK: call double @acos(double %conv)
 }
 
-define float @acosh_test(float %f) nounwind readnone {
-; CHECK: acosh_test
+define float @acosh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acosh_test
 ; CHECK: call float @acoshf(float %f)
 }
 
-define double @acosh_test2(float %f) nounwind readnone {
-; CHECK: acosh_test2
+define double @acosh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    ret double %call
+; CHECK-LABEL: acosh_test2
 ; CHECK: call double @acosh(double %conv)
 }
 
-define float @asin_test(float %f) nounwind readnone {
-; CHECK: asin_test
+define float @asin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asin_test
 ; CHECK: call float @asinf(float %f)
 }
 
-define double @asin_test2(float %f) nounwind readnone {
-; CHECK: asin_test2
+define double @asin_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    ret double %call
+; CHECK-LABEL: asin_test2
 ; CHECK: call double @asin(double %conv)
 }
 
-define float @asinh_test(float %f) nounwind readnone {
-; CHECK: asinh_test
+define float @asinh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asinh_test
 ; CHECK: call float @asinhf(float %f)
 }
 
-define double @asinh_test2(float %f) nounwind readnone {
-; CHECK: asinh_test2
+define double @asinh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    ret double %call
+; CHECK-LABEL: asinh_test2
 ; CHECK: call double @asinh(double %conv)
 }
 
-define float @atan_test(float %f) nounwind readnone {
-; CHECK: atan_test
+define float @atan_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atan_test
 ; CHECK: call float @atanf(float %f)
 }
 
-define double @atan_test2(float %f) nounwind readnone {
-; CHECK: atan_test2
+define double @atan_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    ret double %call
+; CHECK-LABEL: atan_test2
 ; CHECK: call double @atan(double %conv)
 }
-define float @atanh_test(float %f) nounwind readnone {
-; CHECK: atanh_test
+define float @atanh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atanh_test
 ; CHECK: call float @atanhf(float %f)
 }
 
-define double @atanh_test2(float %f) nounwind readnone {
-; CHECK: atanh_test2
+define double @atanh_test2(float %f)   {
     %conv = fpext float %f to double
     %call = call double @atanh(double %conv)
     ret double %call
+; CHECK-LABEL: atanh_test2
 ; CHECK: call double @atanh(double %conv)
 }
-define float @cbrt_test(float %f) nounwind readnone {
-; CHECK: cbrt_test
+define float @cbrt_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: cbrt_test
 ; CHECK: call float @cbrtf(float %f)
 }
 
-define double @cbrt_test2(float %f) nounwind readnone {
-; CHECK: cbrt_test2
+define double @cbrt_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    ret double %call
+; CHECK-LABEL: cbrt_test2
 ; CHECK: call double @cbrt(double %conv)
 }
-define float @exp_test(float %f) nounwind readnone {
-; CHECK: exp_test
+define float @exp_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: exp_test
 ; CHECK: call float @expf(float %f)
 }
 
-define double @exp_test2(float %f) nounwind readnone {
-; CHECK: exp_test2
+define double @exp_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    ret double %call
+; CHECK-LABEL: exp_test2
 ; CHECK: call double @exp(double %conv)
 }
-define float @expm1_test(float %f) nounwind readnone {
-; CHECK: expm1_test
+define float @expm1_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: expm1_test
 ; CHECK: call float @expm1f(float %f)
 }
 
-define double @expm1_test2(float %f) nounwind readnone {
-; CHECK: expm1_test2
+define double @expm1_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    ret double %call
+; CHECK-LABEL: expm1_test2
 ; CHECK: call double @expm1(double %conv)
 }
-define float @exp10_test(float %f) nounwind readnone {
-; CHECK: exp10_test
+define float @exp10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
-; FIXME: Re-enable this when Linux allows transforming this again, or when we
-; can use builtin attributes to test the transform regardless of OS.
-; DISABLED-CHECK: call float @exp10f(float %f)
+; CHECK-LABEL: exp10_test
 ; CHECK: call double @exp10(double %conv)
 }
 
-define double @exp10_test2(float %f) nounwind readnone {
-; CHECK: exp10_test2
+define double @exp10_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    ret double %call
+; CHECK-LABEL: exp10_test2
 ; CHECK: call double @exp10(double %conv)
 }
-define float @log_test(float %f) nounwind readnone {
-; CHECK: log_test
+define float @log_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log_test
 ; CHECK: call float @logf(float %f)
 }
 
-define double @log_test2(float %f) nounwind readnone {
-; CHECK: log_test2
+define double @log_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    ret double %call
+; CHECK-LABEL: log_test2
 ; CHECK: call double @log(double %conv)
 }
-define float @log10_test(float %f) nounwind readnone {
-; CHECK: log10_test
+define float @log10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log10_test
 ; CHECK: call float @log10f(float %f)
 }
 
-define double @log10_test2(float %f) nounwind readnone {
-; CHECK: log10_test2
+define double @log10_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    ret double %call
+; CHECK-LABEL: log10_test2
 ; CHECK: call double @log10(double %conv)
 }
-define float @log1p_test(float %f) nounwind readnone {
-; CHECK: log1p_test
+define float @log1p_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log1p_test
 ; CHECK: call float @log1pf(float %f)
 }
 
-define double @log1p_test2(float %f) nounwind readnone {
-; CHECK: log1p_test2
+define double @log1p_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    ret double %call
+; CHECK-LABEL: log1p_test2
 ; CHECK: call double @log1p(double %conv)
 }
-define float @log2_test(float %f) nounwind readnone {
-; CHECK: log2_test
+define float @log2_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log2_test
 ; CHECK: call float @log2f(float %f)
 }
 
-define double @log2_test2(float %f) nounwind readnone {
-; CHECK: log2_test2
+define double @log2_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    ret double %call
+; CHECK-LABEL: log2_test2
 ; CHECK: call double @log2(double %conv)
 }
-define float @logb_test(float %f) nounwind readnone {
-; CHECK: logb_test
+define float @logb_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: logb_test
 ; CHECK: call float @logbf(float %f)
 }
 
-define double @logb_test2(float %f) nounwind readnone {
-; CHECK: logb_test2
+define double @logb_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    ret double %call
+; CHECK-LABEL: logb_test2
 ; CHECK: call double @logb(double %conv)
 }
-define float @sin_test(float %f) nounwind readnone {
-; CHECK: sin_test
+define float @sin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sin_test
 ; CHECK: call float @sinf(float %f)
 }
 
-define double @sin_test2(float %f) nounwind readnone {
-; CHECK: sin_test2
+define double @sin_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    ret double %call
+; CHECK-LABEL: sin_test2
 ; CHECK: call double @sin(double %conv)
 }
 
-define float @sqrt_test(float %f) nounwind readnone {
-; CHECK: sqrt_test
+define float @sqrt_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @sqrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_test
 ; CHECK: call float @sqrtf(float %f)
 }
 
-define float @sqrt_int_test(float %f) nounwind readnone {
-; CHECK: sqrt_int_test
+define double @sqrt_test2(float %f) {
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   ret double %call
+; CHECK-LABEL: sqrt_test2
+; CHECK: call double @sqrt(double %conv)
+}
+
+define float @sqrt_int_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @llvm.sqrt.f64(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_int_test
 ; CHECK: call float @llvm.sqrt.f32(float %f)
 }
 
-define double @sqrt_test2(float %f) nounwind readnone {
-; CHECK: sqrt_test2
+define double @sqrt_int_test2(float %f) {
    %conv = fpext float %f to double
-   %call = call double @sqrt(double %conv)
+   %call = call double @llvm.sqrt.f64(double %conv)
    ret double %call
-; CHECK: call double @sqrt(double %conv)
+; CHECK-LABEL: sqrt_int_test2
+; CHECK: call double @llvm.sqrt.f64(double %conv)
 }
-define float @tan_test(float %f) nounwind readnone {
-; CHECK: tan_test
+
+define float @tan_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tan_test
 ; CHECK: call float @tanf(float %f)
 }
 
-define double @tan_test2(float %f) nounwind readnone {
-; CHECK: tan_test2
+define double @tan_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    ret double %call
+; CHECK-LABEL: tan_test2
 ; CHECK: call double @tan(double %conv)
 }
-define float @tanh_test(float %f) nounwind readnone {
-; CHECK: tanh_test
+define float @tanh_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tanh_test
 ; CHECK: call float @tanhf(float %f)
 }
 
-define double @tanh_test2(float %f) nounwind readnone {
-; CHECK: tanh_test2
+define double @tanh_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    ret double %call
+; CHECK-LABEL: tanh_test2
 ; CHECK: call double @tanh(double %conv)
 }
 
-declare double @tanh(double) nounwind readnone
-declare double @tan(double) nounwind readnone
-declare double @sqrt(double) nounwind readnone
-declare double @sin(double) nounwind readnone
-declare double @log2(double) nounwind readnone
-declare double @log1p(double) nounwind readnone
-declare double @log10(double) nounwind readnone
-declare double @log(double) nounwind readnone
-declare double @logb(double) nounwind readnone
-declare double @exp10(double) nounwind readnone
-declare double @expm1(double) nounwind readnone
-declare double @exp(double) nounwind readnone
-declare double @cbrt(double) nounwind readnone
-declare double @atanh(double) nounwind readnone
-declare double @atan(double) nounwind readnone
-declare double @acos(double) nounwind readnone
-declare double @acosh(double) nounwind readnone
-declare double @asin(double) nounwind readnone
-declare double @asinh(double) nounwind readnone
-
-declare double @llvm.sqrt.f64(double) nounwind readnone
+declare double @tanh(double) #1
+declare double @tan(double) #1
+
+; sqrt is a special case: the shrinking optimization 
+; is valid even without unsafe-fp-math.
+declare double @sqrt(double) 
+declare double @llvm.sqrt.f64(double) 
+
+declare double @sin(double) #1
+declare double @log2(double) #1
+declare double @log1p(double) #1
+declare double @log10(double) #1
+declare double @log(double) #1
+declare double @logb(double) #1
+declare double @exp10(double) #1
+declare double @expm1(double) #1
+declare double @exp(double) #1
+declare double @cbrt(double) #1
+declare double @atanh(double) #1
+declare double @atan(double) #1
+declare double @acos(double) #1
+declare double @acosh(double) #1
+declare double @asin(double) #1
+declare double @asinh(double) #1
+
+attributes #1 = { "unsafe-fp-math"="true" }
 
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
new file mode 100644
index 0000000..0479549
--- /dev/null
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure all library calls are eliminated when the input is known positive.
+
+declare float @fabsf(float)
+declare double @fabs(double)
+declare fp128 @fabsl(fp128)
+
+define float @square_fabs_call_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @fabsf(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_call_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_call_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @fabs(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_call_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_call_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @fabsl(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_call_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Make sure all intrinsic calls are eliminated when the input is known positive.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+define float @square_fabs_intrinsic_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @llvm.fabs.f32(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_intrinsic_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_intrinsic_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @llvm.fabs.f64(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_intrinsic_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_intrinsic_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
+
+define float @square_fabs_shrink_call1(float %x) {
+  %ext = fpext float %x to double
+  %sq = fmul double %ext, %ext
+  %fabs = call double @fabs(double %sq)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call1(
+; CHECK-NEXT: %trunc = fmul float %x, %x
+; CHECK-NEXT: ret float %trunc
+}
+
+define float @square_fabs_shrink_call2(float %x) {
+  %sq = fmul float %x, %x
+  %ext = fpext float %sq to double
+  %fabs = call double @fabs(double %ext)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call2(
+; CHECK-NEXT: %sq = fmul float %x, %x
+; CHECK-NEXT: ret float %sq
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 2ee4b0f..b0ec895 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -530,3 +530,173 @@ define float @fact_div6(float %x) {
 ; CHECK: fact_div6
 ; CHECK: %t3 = fsub fast float %t1, %t2
 }
+
+; =========================================================================
+;
+;   Test-cases for square root
+;
+; =========================================================================
+
+; A squared factor fed into a square root intrinsic should be hoisted out
+; as a fabs() value.
+; We have to rely on a function-level attribute to enable this optimization
+; because intrinsics don't currently have access to IR-level fast-math
+; flags. If that changes, we can relax the requirement on all of these
+; tests to just specify 'fast' on the sqrt.
+
+attributes #0 = { "unsafe-fp-math" = "true" }
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_intrinsic_arg_squared(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_squared(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+; Check all 6 combinations of a 3-way multiplication tree where
+; one factor is repeated.
+
+define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args1(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args2(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %y
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args3(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args4(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args5(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %y, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args6(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_arg_4th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_4th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define double @sqrt_intrinsic_arg_5th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %x
+  %mul3 = fmul fast double %mul2, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul3)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_5th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+; Check that square root calls have the same behavior.
+
+declare float @sqrtf(float)
+declare double @sqrt(double)
+declare fp128 @sqrtl(fp128)
+
+define float @sqrt_call_squared_f32(float %x) #0 {
+  %mul = fmul fast float %x, %x
+  %sqrt = call float @sqrtf(float %mul)
+  ret float %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f32(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: ret float %fabs
+}
+
+define double @sqrt_call_squared_f64(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @sqrt(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f64(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
+  %mul = fmul fast fp128 %x, %x
+  %sqrt = call fp128 @sqrtl(fp128 %mul)
+  ret fp128 %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f128(
+; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: ret fp128 %fabs
+}
+
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index 18cbf9d..a776765 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -123,3 +123,32 @@ define float @test11(float %x, float %y) {
 ; CHECK-NOT: fadd float
 ; CHECK: fadd fast float
 }
+
+; PR21126: http://llvm.org/bugs/show_bug.cgi?id=21126
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X.
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_squared1(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul = fmul fast double %sqrt, %sqrt
+  ret double %mul
+; CHECK-LABEL: @sqrt_squared1(
+; CHECK-NEXT: ret double %f
+}
+
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X, 
+; but make sure another use of the sqrt is intact.
+; Note that the remaining fmul is altered but is not 'fast'
+; itself because it was not marked 'fast' originally. 
+; Thus, we have an overall fast result, but no more indication of
+; 'fast'ness in the code.
+define double @sqrt_squared2(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul1 = fmul fast double %sqrt, %sqrt
+  %mul2 = fmul double %mul1, %sqrt
+  ret double %mul2
+; CHECK-LABEL: @sqrt_squared2(
+; CHECK-NEXT: %sqrt = call double @llvm.sqrt.f64(double %f)
+; CHECK-NEXT: %mul2 = fmul double %sqrt, %f
+; CHECK-NEXT: ret double %mul2
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
index bd01d58..c6bb1b3 100644
--- a/test/Transforms/InstCombine/fold-phi.ll
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -17,23 +17,23 @@ end:
   ret float %add5
 }
 
-; CHECK: fold_phi
-define float @fold_phi(float %a) nounwind {
+; CHECK-LABEL: @pr21377(
+define void @pr21377(i32) {
 entry:
-  br label %for.body
-
-for.body:
-; CHECK: phi float
-; CHECK-NEXT: br i1 undef
-  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
-  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
-  br i1 undef, label %bb0, label %end
-
-; CHECK: bb0:
-bb0:
-; CHECK: fadd float
-  br label %for.body
-
-end:
-  ret float %add5
+  br label %while.body
+
+while.body:                                       ; preds = %if.end, %entry
+  %phi1 = phi i64 [ undef, %entry ], [ %or2, %if.end ]
+  %zext = zext i32 %0 to i64
+  br i1 undef, label %if.end, label %if.else
+
+if.else:                                          ; preds = %while.body
+  %or1 = or i64 %phi1, %zext
+  %and = and i64 %or1, 4294967295
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %while.body
+  %phi2 = phi i64 [ %and, %if.else ], [ undef, %while.body ]
+  %or2 = or i64 %phi2, %zext
+  br label %while.body
 }
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 9be66fd..ac03402 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -53,3 +53,23 @@ define half @test5(float %a, float %b, float %c) {
 }
 
 declare float @llvm.fabs.f32(float) nounwind readonly
+
+define <1 x float> @test6(<1 x double> %V) {
+  %frem = frem <1 x double> %V, %V
+  %trunc = fptrunc <1 x double> %frem to <1 x float>
+  ret <1 x float> %trunc
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %[[frem:.*]]  = frem <1 x double> %V, %V
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc <1 x double> %[[frem]] to <1 x float>
+; CHECK-NEXT: ret <1 x float> %trunc
+}
+
+define float @test7(double %V) {
+  %frem = frem double %V, 1.000000e+00
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+; CHECK-LABEL: @test7
+; CHECK-NEXT: %[[frem:.*]]  = frem double %V, 1.000000e+00
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
+; CHECK-NEXT: ret float %trunc
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 3240c6d..bb46662 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -6,6 +6,7 @@ target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 %pair = type { i32, i32 }
 %struct.B = type { double }
 %struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
 
 
 @Global = constant [10 x i8] c"helloworld"
@@ -580,6 +581,16 @@ define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1
   ret i32 addrspace(1)* %C
 }
 
+define i32 addrspace(1)* @test33_addrspacecast(%struct.Key* %A) {
+; CHECK-LABEL: @test33_addrspacecast(
+; CHECK: %C = getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+; CHECK-NEXT: addrspacecast i32* %C to i32 addrspace(1)*
+; CHECK-NEXT: ret
+  %B = addrspacecast %struct.Key* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
@@ -692,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
 
 ; CHECK-LABEL: @test39(
 ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
-; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8
 }
 
 define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
@@ -803,6 +814,78 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
+define i8* @test42(i8* %c1, i8* %c2) {
+  %ptrtoint = ptrtoint i8* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %gep = getelementptr inbounds i8* %c2, i64 %sub
+  ret i8* %gep
+
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i8* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i8* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i8*
+; CHECK-NEXT:  ret i8* [[INTTOPTR]]
+}
+
+define i16* @test43(i16* %c1, i16* %c2) {
+  %ptrtoint = ptrtoint i16* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = ashr i64 %sub, 1
+  %gep = getelementptr inbounds i16* %c2, i64 %shr
+  ret i16* %gep
+
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i16* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i16* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i16*
+; CHECK-NEXT:  ret i16* [[INTTOPTR]]
+}
+
+define %struct.C* @test44(%struct.C* %c1, %struct.C* %c2) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %shr
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint %struct.C* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[INTTOPTR]]
+}
+
+define %struct.C* @test45(%struct.C* %c1, %struct.C** %c2) {
+  %ptrtoint1 = ptrtoint %struct.C* %c1 to i64
+  %ptrtoint2 = ptrtoint %struct.C** %c2 to i64
+  %sub = sub i64 %ptrtoint2, %ptrtoint1 ; C2 - C1
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c1, i64 %shr ; C1 + (C2 - C1)
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:  [[BITCAST:%.*]] = bitcast %struct.C** %c2 to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[BITCAST]]
+}
+
+define %struct.C* @test46(%struct.C* %c1, %struct.C* %c2, i64 %N) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %sdiv = sdiv i64 %sub, %N
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  [[PTRTOINT:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 0, [[PTRTOINT]]
+; CHECK-NEXT:  [[SDIV:%.*]] = sdiv i64 [[SUB]], %N
+; CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+; CHECK-NEXT:  ret %struct.C* [[GEP]]
+}
+
 define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
index d5d8cbc..faae201 100644
--- a/test/Transforms/InstCombine/icmp-logical.ll
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -150,3 +150,23 @@ define i1 @nomask_rhs(i32 %in) {
   %val = or i1 %tst1, %tst2
   ret i1 %val
 }
+
+define i1 @fold_mask_cmps_to_false(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_false
+; CHECK: ret i1 false
+  %1 = and i32 %x, 2147483647
+  %2 = icmp eq i32 %1, 0
+  %3 = icmp eq i32 %x, 2147483647
+  %4 = and i1 %3, %2
+  ret i1 %4
+}
+
+define i1 @fold_mask_cmps_to_true(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_true
+; CHECK: ret i1 true
+  %1 = and i32 %x, 2147483647
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp ne i32 %x, 2147483647
+  %4 = or i1 %3, %2
+  ret i1 %4
+}
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
new file mode 100644
index 0000000..97d231f
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These should be InstSimplify checks, but most of the code
+; is currently only in InstCombine.  TODO: move supporting code
+
+; Definitely out of range
+define i1 @test_nonzero(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero
+; CHECK: ret i1 true
+  %val = load i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+define i1 @test_nonzero2(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero2
+; CHECK: ret i1 false
+  %val = load i32* %arg, !range !0
+  %rval = icmp eq i32 %val, 0
+  ret i1 %rval
+}
+
+; Potentially in range
+define i1 @test_nonzero3(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero3
+; Check that this does not trigger - it wouldn't be legal
+; CHECK: icmp
+  %val = load i32* %arg, !range !1
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Definitely in range
+define i1 @test_nonzero4(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero4
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ne i8 %val, 0
+  ret i1 %rval
+}
+
+define i1 @test_nonzero5(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero5
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ugt i8 %val, 0
+  ret i1 %rval
+}
+
+; Cheaper checks (most values in range meet requirements)
+define i1 @test_nonzero6(i8* %argw) {
+; CHECK-LABEL: test_nonzero6
+; CHECK: icmp ne i8 %val, 0
+  %val = load i8* %argw, !range !3
+  %rval = icmp sgt i8 %val, 0
+  ret i1 %rval
+}
+
+
+!0 = metadata !{i32 1, i32 6} 
+!1 = metadata !{i32 0, i32 6} 
+!2 = metadata !{i8 0, i8 1} 
+!3 = metadata !{i8 0, i8 6} 
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
new file mode 100644
index 0000000..52414b9
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -0,0 +1,378 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK-LABEL: @lshr_eq_msb_low_last_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_msb_low_second_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_msb_low_last_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_msb_low_second_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @ashr_eq_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @ashr_ne_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @lshr_eq_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @lshr_ne_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_ashr_eq_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_ashr_ne_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_lshr_eq_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp eq i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_lshr_ne_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp ne i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_ashr_eq(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_ashr_ne(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @exact_lshr_eq(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @exact_lshr_ne(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @nonexact_ashr_eq(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @nonexact_ashr_ne(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @nonexact_lshr_eq(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @nonexact_lshr_ne(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
+}
+
+; Don't try to fold the entire body of function @PR20945 into a
+; single `ret i1 true` statement.
+; If %B is equal to 1, then this function would return false.
+; As a consequence, the instruction combiner is not allowed to fold %cmp
+; to 'true'. Instead, it should replace %cmp with a simpler comparison
+; between %B and 1.
+
+; CHECK-LABEL: @PR20945(
+; CHECK: icmp ne i32 %B, 1
+define i1 @PR20945(i32 %B) {
+  %shr = ashr i32 -9, %B
+  %cmp = icmp ne i32 %shr, -5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @PR21222
+; CHECK: icmp eq i32 %B, 6
+define i1 @PR21222(i32 %B) {
+  %shr = ashr i32 -93, %B
+  %cmp = icmp eq i32 %shr, -2
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 26e144f..279d86d 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1148,22 +1148,6 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_eq_31(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_eq_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp eq i32 %shl, 31
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ne_31(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ne_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ne i32 %shl, 31
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_30(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1209,22 +1193,6 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ugt i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ule i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %V, 31
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1424,3 +1392,133 @@ define i1 @icmp_neg_cst_slt(i32 %a) {
   %2 = icmp slt i32 %1, -10
   ret i1 %2
 }
+
+; CHECK-LABEL: @icmp_and_or_lshr
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw i32 1, %y
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[SHL]], 1
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[OR]], %x
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
+  %shf = lshr i32 %x, %y
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @icmp_and_or_lshr_cst
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 3
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr_cst(i32 %x) {
+  %shf = lshr i32 %x, 1
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 29
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
+ %shl = shl i32 4, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) {
+ %shl = shl i32 -2, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_positive(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_negative(i32 %a) {
+ %shl = shl i32 -50, %a
+ %cmp = icmp eq i32 %shl, -50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_1(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 25
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 1
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_2(i32 %a) {
+ %shl = shl i32 25, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_3(i32 %a) {
+ %shl = shl i32 26, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sgt_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -1
+define i1 @icmp_sgt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sgt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sge_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -2
+define i1 @icmp_sge_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sge i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_slt_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, -1
+define i1 @icmp_slt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp slt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sle_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, 0
+define i1 @icmp_sle_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sle i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_cmpxchg_strong
+; CHECK-NEXT: %[[xchg:.*]] = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+; CHECK-NEXT: %[[icmp:.*]] = extractvalue { i32, i1 } %[[xchg]], 1
+; CHECK-NEXT: ret i1 %[[icmp]]
+define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
+  %xchg = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+  %xtrc = extractvalue { i32, i1 } %xchg, 0
+  %icmp = icmp eq i32 %xtrc, %old_val
+  ret i1 %icmp
+}
diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll
deleted file mode 100644
index fd6339c..0000000
--- a/test/Transforms/InstCombine/load-addrspace-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-n8:16:32:64"
-
-define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
-; CHECK-LABEL: @pointer_to_addrspace_pointer(
-; CHECK: load
-; CHECK: addrspacecast
-  %y = bitcast i32 addrspace(1)** %x to i32**
-  %z = load i32** %y
-  ret i32* %z
-}
-
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index d11e08e..b4b7558 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -1,6 +1,8 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
 ; This test makes sure that these instructions are properly eliminated.
-;
-; RUN: opt < %s -instcombine -S | not grep load
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @X = constant i32 42		; <i32*> [#uses=2]
 @X2 = constant i32 47		; <i32*> [#uses=1]
@@ -10,47 +12,63 @@
 @GLOBAL = internal constant [4 x i32] zeroinitializer
 
 
+; CHECK-LABEL: @test1(
+; CHECK-NOT: load
 define i32 @test1() {
 	%B = load i32* @X		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test2(
+; CHECK-NOT: load
 define float @test2() {
 	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
 	%B = load float* %A		; <float> [#uses=1]
 	ret float %B
 }
 
+; CHECK-LABEL: @test3(
+; CHECK-NOT: load
 define i32 @test3() {
 	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
 	%B = load i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test4(
+; CHECK-NOT: load
 define i32 @test4() {
 	%A = getelementptr [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0		; <i32*> [#uses=1]
 	%B = load i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test5(
+; CHECK-NOT: load
 define i32 @test5(i1 %C) {
 	%Y = select i1 %C, i32* @X, i32* @X2		; <i32*> [#uses=1]
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
 
+; CHECK-LABEL: @test7(
+; CHECK-NOT: load
 define i32 @test7(i32 %X) {
 	%V = getelementptr i32* null, i32 %X		; <i32*> [#uses=1]
 	%R = load i32* %V		; <i32> [#uses=1]
 	ret i32 %R
 }
 
+; CHECK-LABEL: @test8(
+; CHECK-NOT: load
 define i32 @test8(i32* %P) {
 	store i32 1, i32* %P
 	%X = load i32* %P		; <i32> [#uses=1]
 	ret i32 %X
 }
 
+; CHECK-LABEL: @test9(
+; CHECK-NOT: load
 define i32 @test9(i32* %P) {
 	%X = load i32* %P		; <i32> [#uses=1]
 	%Y = load i32* %P		; <i32> [#uses=1]
@@ -58,6 +76,8 @@ define i32 @test9(i32* %P) {
 	ret i32 %Z
 }
 
+; CHECK-LABEL: @test10(
+; CHECK-NOT: load
 define i32 @test10(i1 %C.upgrd.1, i32* %P, i32* %Q) {
 	br i1 %C.upgrd.1, label %T, label %F
 T:		; preds = %0
@@ -72,6 +92,8 @@ C:		; preds = %F, %T
 	ret i32 %V
 }
 
+; CHECK-LABEL: @test11(
+; CHECK-NOT: load
 define double @test11(double* %p) {
   %t0 = getelementptr double* %p, i32 1
   store double 2.0, double* %t0
@@ -80,19 +102,51 @@ define double @test11(double* %p) {
   ret double %x
 }
 
+; CHECK-LABEL: @test12(
+; CHECK-NOT: load
 define i32 @test12(i32* %P) {
-        %A = alloca i32
-        store i32 123, i32* %A
-        ; Cast the result of the load not the source
-        %Q = bitcast i32* %A to i32*
-        %V = load i32* %Q
-        ret i32 %V
+  %A = alloca i32
+  store i32 123, i32* %A
+  ; Cast the result of the load not the source
+  %Q = bitcast i32* %A to i32*
+  %V = load i32* %Q
+  ret i32 %V
 }
 
+; CHECK-LABEL: @test13(
+; CHECK-NOT: load
 define <16 x i8> @test13(<2 x i64> %x) {
-entry:
-	%tmp = load <16 x i8> * bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
-	ret <16 x i8> %tmp
+  %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
+  ret <16 x i8> %tmp
 }
 
+define i8 @test14(i8 %x, i32 %y) {
+; This test must not have the store of %x forwarded to the load -- there is an
+; intervening store if %y. However, the intervening store occurs with a different
+; type and size and to a different pointer value. This is ensuring that none of
+; those confuse the analysis into thinking that the second store does not alias
+; the first.
+; CHECK-LABEL: @test14(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %a = alloca i32
+  %a.i8 = bitcast i32* %a to i8*
+  store i8 %x, i8* %a.i8
+  store i32 %y, i32* %a
+  %r = load i8* %a.i8
+  ret i8 %r
+}
 
+@test15_global = external global i32
+
+define i8 @test15(i8 %x, i32 %y) {
+; Same test as @test14 essentially, but using a global instead of an alloca.
+; CHECK-LABEL: @test15(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %g.i8 = bitcast i32* @test15_global to i8*
+  store i8 %x, i8* %g.i8
+  store i32 %y, i32* @test15_global
+  %r = load i8* %g.i8
+  ret i8 %r
+}
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index 2263cb2..e90bdb7 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -1,67 +1,117 @@
-; RUN: opt < %s -instcombine -S | grep ", align 16" | count 14
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:64:64:64-p2:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @x = external global <2 x i64>, align 16
 @xx = external global [13 x <2 x i64>], align 16
 
+@x.as2 = external addrspace(2) global <2 x i64>, align 16
+
+; CHECK-LABEL: @static_hem(
+; CHECK: , align 16
 define <2 x i64> @static_hem() {
-	%t = getelementptr <2 x i64>* @x, i32 7
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast() {
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(1)*
+  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast_smaller_ptr(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast_smaller_ptr() {
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(2)*
+  %tmp1 = load <2 x i64> addrspace(2)* %t.asc, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast_larger_ptr(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast_larger_ptr() {
+  %t = getelementptr <2 x i64> addrspace(2)* @x.as2, i32 7
+  %t.asc = addrspacecast <2 x i64> addrspace(2)* %t to <2 x i64> addrspace(1)*
+  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @hem(
+; CHECK: , align 16
 define <2 x i64> @hem(i32 %i) {
-	%t = getelementptr <2 x i64>* @x, i32 %i
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr <2 x i64>* @x, i32 %i
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @hem_2d(
+; CHECK: , align 16
 define <2 x i64> @hem_2d(i32 %i, i32 %j) {
-	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @foo(
+; CHECK: , align 16
 define <2 x i64> @foo() {
-	%tmp1 = load <2 x i64>* @x, align 1
-	ret <2 x i64> %tmp1
+  %tmp1 = load <2 x i64>* @x, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @bar(
+; CHECK: , align 16
+; CHECK: , align 16
 define <2 x i64> @bar() {
-	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t)
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @static_hem_store(
+; CHECK: , align 16
 define void @static_hem_store(<2 x i64> %y) {
-	%t = getelementptr <2 x i64>* @x, i32 7
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr <2 x i64>* @x, i32 7
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @hem_store(
+; CHECK: , align 16
 define void @hem_store(i32 %i, <2 x i64> %y) {
-	%t = getelementptr <2 x i64>* @x, i32 %i
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr <2 x i64>* @x, i32 %i
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @hem_2d_store(
+; CHECK: , align 16
 define void @hem_2d_store(i32 %i, i32 %j, <2 x i64> %y) {
-	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @foo_store(
+; CHECK: , align 16
 define void @foo_store(<2 x i64> %y) {
-	store <2 x i64> %y, <2 x i64>* @x, align 1
-        ret void
+  store <2 x i64> %y, <2 x i64>* @x, align 1
+  ret void
 }
 
+; CHECK-LABEL: @bar_store(
+; CHECK: , align 16
 define void @bar_store(<2 x i64> %y) {
-	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t)
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
 declare void @kip(<2 x i64>* %t)
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
new file mode 100644
index 0000000..863edae
--- /dev/null
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -0,0 +1,86 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test_load_cast_combine_tbaa(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
+; CHECK-LABEL: @test_load_cast_combine_tbaa(
+; CHECK: load i32* %{{.*}}, !tbaa !0
+entry:
+  %l = load float* %ptr, !tbaa !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_noalias(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
+; CHECK-LABEL: @test_load_cast_combine_noalias(
+; CHECK: load i32* %{{.*}}, !alias.scope !2, !noalias !1
+entry:
+  %l = load float* %ptr, !alias.scope !2, !noalias !1
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define float @test_load_cast_combine_range(i32* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
+; would be nice to preserve or update it somehow but this is hard when moving
+; between types.
+; CHECK-LABEL: @test_load_cast_combine_range(
+; CHECK: load float* %{{.*}}
+; CHECK-NOT: !range
+; CHECK: ret float
+entry:
+  %l = load i32* %ptr, !range !5
+  %c = bitcast i32 %l to float
+  ret float %c
+}
+
+define i32 @test_load_cast_combine_invariant(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
+; CHECK-LABEL: @test_load_cast_combine_invariant(
+; CHECK: load i32* %{{.*}}, !invariant.load !3
+entry:
+  %l = load float* %ptr, !invariant.load !3
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_nontemporal(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_nontemporal(
+; CHECK: load i32* %{{.*}}, !nontemporal !4
+entry:
+  %l = load float* %ptr, !nontemporal !4
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_loop(
+; CHECK: load i32* %{{.*}}, !llvm.mem.parallel_loop_access !1
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %src.gep = getelementptr inbounds float* %src, i32 %i
+  %dst.gep = getelementptr inbounds i32* %dst, i32 %i
+  %l = load float* %src.gep, !llvm.mem.parallel_loop_access !1
+  %c = bitcast float %l to i32
+  store i32 %c, i32* %dst.gep
+  %i.next = add i32 %i, 1
+  %cmp = icmp slt i32 %i.next, %n
+  br i1 %cmp, label %loop, label %exit, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+!0 = metadata !{ metadata !1, metadata !1, i64 0 }
+!1 = metadata !{ metadata !1 }
+!2 = metadata !{ metadata !2, metadata !1 }
+!3 = metadata !{ }
+!4 = metadata !{ i32 1 }
+!5 = metadata !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 2085206..ed25e4e 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -144,3 +144,26 @@ lpad.i:                                           ; preds = %entry
   call void @_ZdlPvRKSt9nothrow_t(i8* %call.i, i8* %nt) builtin nounwind
   resume { i8*, i32 } %0
 }
+
+declare i8* @_Znwm(i64) nobuiltin
+declare void @_ZdlPvm(i8*, i64) nobuiltin
+declare i8* @_Znwj(i32) nobuiltin
+declare void @_ZdlPvj(i8*, i32) nobuiltin
+declare i8* @_Znam(i64) nobuiltin
+declare void @_ZdaPvm(i8*, i64) nobuiltin
+declare i8* @_Znaj(i32) nobuiltin
+declare void @_ZdaPvj(i8*, i32) nobuiltin
+
+; CHECK-LABEL: @test8(
+define void @test8() {
+  ; CHECK-NOT: call
+  %nwm = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPvm(i8* %nwm, i64 32) builtin
+  %nwj = call i8* @_Znwj(i32 32) builtin
+  call void @_ZdlPvj(i8* %nwj, i32 32) builtin
+  %nam = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPvm(i8* %nam, i64 32) builtin
+  %naj = call i8* @_Znaj(i32 32) builtin
+  call void @_ZdaPvj(i8* %naj, i32 32) builtin
+  ret void
+}
diff --git a/test/Transforms/InstCombine/maxnum.ll b/test/Transforms/InstCombine/maxnum.ll
new file mode 100644
index 0000000..585d9f4
--- /dev/null
+++ b/test/Transforms/InstCombine/maxnum.ll
@@ -0,0 +1,222 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.maxnum.f32(float, float) #0
+declare float @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.maxnum.f64(double, double) #0
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
+
+; CHECK-LABEL: @constant_fold_maxnum_f32
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32() #0 {
+  %x = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_inv
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_inv() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan1() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_maxnum_f32_nan_nan() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 2.000000e+00, float 8.000000e+00, float 1.000000e+01, float 9.000000e+00>
+define <4 x float> @constant_fold_maxnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64() #0 {
+  %x = call double @llvm.maxnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan0() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan1() #0 {
+  %x = call double @llvm.maxnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_maxnum_f64_nan_nan() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_maxnum_f32
+; CHECK: call float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_maxnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_maxnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @maxnum_x_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_x_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_y_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_y_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_z_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @maxnum_z_maxnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_maxnum_x_y_z
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @maxnum_maxnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum4
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @maxnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %w) #0
+  %c = call float @llvm.maxnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_inf_val
+; CHECK-NEXT: ret float 0x7FF0000000000000
+define float @fold_maxnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_neginf_val
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float 0xFFF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_maxnum_f32_neginf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 65349c6..d960693 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -37,7 +37,7 @@ define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
   ret i32 %ret
 ; CHECK: ret i32 [[RET]]
 }
diff --git a/test/Transforms/InstCombine/minnum.ll b/test/Transforms/InstCombine/minnum.ll
new file mode 100644
index 0000000..57d6e16
--- /dev/null
+++ b/test/Transforms/InstCombine/minnum.ll
@@ -0,0 +1,244 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.minnum.f32(float, float) #0
+declare float @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.minnum.f64(double, double) #0
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
+
+declare float @llvm.fmax.f32(float, float) #0
+
+; CHECK-LABEL: @constant_fold_minnum_f32
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32() #0 {
+  %x = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_inv
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32_inv() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan0() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan1() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_minnum_f32_nan_nan() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 5.000000e+00>
+define <4 x float> @constant_fold_minnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_minnum_f64() #0 {
+  %x = call double @llvm.minnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan0() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan1() #0 {
+  %x = call double @llvm.minnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_minnum_f64_nan_nan() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_minnum_f32
+; CHECK: call float @llvm.minnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_minnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_minnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @minnum_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_y_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_y_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_z_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @minnum_z_minnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_minnum_x_y_z
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @minnum_minnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum4
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @minnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %w) #0
+  %c = call float @llvm.minnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @minnum_x_fmax_x_y
+; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: ret float
+define float @minnum_x_fmax_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.fmax.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @fmax_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-NEXT: ret float
+define float @fmax_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.fmax.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @fold_minnum_f32_inf_val
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float 0x7FF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_minnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_minf_val
+; CHECK-NEXT: ret float 0xFFF0000000000000
+define float @fold_minnum_f32_minf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
new file mode 100644
index 0000000..7646189
--- /dev/null
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+
+; CHECK-LABEL: define i32 @positive1
+; CHECK: switch i32
+; CHECK: i32 10, label
+; CHECK: i32 100, label
+; CHECK: i32 1001, label
+
+define i32 @positive1(i64 %a) {
+entry:
+  %and = and i64 %a, 4294967295
+  switch i64 %and, label %sw.default [
+    i64 10, label %return
+    i64 100, label %sw.bb1
+    i64 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; CHECK-LABEL: define i32 @negative1
+; CHECK: switch i32
+; CHECK: i32 -10, label
+; CHECK: i32 -100, label
+; CHECK: i32 -1001, label
+
+define i32 @negative1(i64 %a) {
+entry:
+  %or = or i64 %a, -4294967296
+  switch i64 %or, label %sw.default [
+    i64 -10, label %return
+    i64 -100, label %sw.bb1
+    i64 -1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure truncating a constant int larger than 64-bit doesn't trigger an
+; assertion.
+
+; CHECK-LABEL: define i32 @trunc72to68
+; CHECK: switch i68
+; CHECK: i68 10, label
+; CHECK: i68 100, label
+; CHECK: i68 1001, label
+
+define i32 @trunc72to68(i72 %a) {
+entry:
+  %and = and i72 %a, 295147905179352825855
+  switch i72 %and, label %sw.default [
+    i72 10, label %return
+    i72 100, label %sw.bb1
+    i72 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
new file mode 100644
index 0000000..cec5297
--- /dev/null
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; PR21403: http://llvm.org/bugs/show_bug.cgi?id=21403
+; When the call to sqrtf is replaced by an intrinsic call to fabs,
+; it should not cause a problem in CGSCC. 
+
+define float @bar(float %f) #0 {
+  %mul = fmul fast float %f, %f
+  %call1 = call float @sqrtf(float %mul) #0
+  ret float %call1
+
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: ret float
+}
+
+declare float @sqrtf(float) #0
+
+attributes #0 = { readnone "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 9cb6884..a971c91 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
   ret i16 %1
 }
 
-@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
 define i32 @foo_alias() nounwind {
   %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
   ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 6459032..1285b1c 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,7 +256,7 @@ return:
   ret i32 7
 }
 
-@globalalias = alias internal [60 x i8]* @a
+@globalalias = internal alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: ret i32 60
@@ -266,7 +266,7 @@ define i32 @test18() {
   ret i32 %1
 }
 
-@globalalias2 = alias weak [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test19(
 ; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index cec36f1..670e3e0 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -92,3 +92,92 @@ define i32 @test9(i32 %x, i32 %y) nounwind {
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
 }
+
+define i32 @test10(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i32 -1
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 -1
+}
+
+; (x | y) & ((~x) ^ y) -> (x & y)
+define i32 @test12(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %and = and i32 %or, %xor
+ ret i32 %and
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((~x) ^ y) & (x | y) -> (x & y)
+define i32 @test13(i32 %x, i32 %y) {
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %or = or i32 %x, %y
+ %and = and i32 %xor, %or
+ ret i32 %and
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((x | y) ^ (x ^ y)) -> (x & y)
+define i32 @test15(i32 %x, i32 %y) {
+  %1 = xor i32 %y, %x
+  %2 = or i32 %y, %x
+  %3 = xor i32 %2, %1
+  ret i32 %3
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %1 = and i32 %y, %x
+; CHECK-NEXT: ret i32 %1
+}
+
+; ((x | ~y) ^ (~x | y)) -> x ^ y
+define i32 @test16(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %noty
+  %or2 = or i32 %notx, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+; ((x & ~y) ^ (~x & y)) -> x ^ y
+define i32 @test17(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %and1 = and i32 %x, %noty
+  %and2 = and i32 %notx, %y
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+define i32 @test18(i32 %a, i32 %b) {
+  %or = xor i32 %a, %b
+  %and1 = and i32 %or, 1
+  %and2 = and i32 %b, -2
+  %xor = or i32 %and1, %and2
+  ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 1cd897e..23dad21 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -408,3 +408,101 @@ define i32 @test38(i32* %xp, i32 %y) {
   %or = or i32 %x, %sext
   ret i32 %or
 }
+
+define i32 @test39(i32 %a, i32 %b) {
+; CHECK-LABEL: test39(
+; CHECK-NEXT: %or = or i32 %a, %b
+ %xor = xor i32 %a, -1
+ %and = and i32 %xor, %b
+ %or = or i32 %and, %a
+ ret i32 %or
+}
+
+define i32 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: test40(
+; CHECK-NEXT:   %1 = xor i32 %a, -1 
+; CHECK-NEXT: %or = or i32 %1, %b
+ %and = and i32 %a, %b
+ %xor = xor i32 %a, -1
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test41(i32 %a, i32 %b) {
+; CHECK-LABEL: test41(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %and = and i32 %a, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test42(i32 %a, i32 %b) {
+; CHECK-LABEL: test42(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %and = and i32 %a, %b
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test43(i32 %a, i32 %b) {
+; CHECK-LABEL: test43(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %xor = xor i32 %a, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test44(i32 %a, i32 %b) {
+; CHECK-LABEL: test44(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %xor = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test45(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: test45(
+; CHECK-NEXT: %1 = and i32 %x, %z
+; CHECK-NEXT: %or1 = or i32 %1, %y
+; CHECK-NEXT: ret i32 %or1
+  %or = or i32 %y, %z
+  %and = and i32 %x, %or
+  %or1 = or i32 %and, %y
+  ret i32 %or1
+}
+
+define i1 @test46(i8 signext %c)  {
+  %c.off = add i8 %c, -97
+  %cmp1 = icmp ult i8 %c.off, 26
+  %c.off17 = add i8 %c, -65
+  %cmp2 = icmp ult i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 26
+}
+
+define i1 @test47(i8 signext %c)  {
+  %c.off = add i8 %c, -65
+  %cmp1 = icmp ule i8 %c.off, 26
+  %c.off17 = add i8 %c, -97
+  %cmp2 = icmp ule i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 27
+}
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
index cbb2f5f..6d8d40b 100644
--- a/test/Transforms/InstCombine/overflow-mul.ll
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -173,3 +173,16 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
   %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
   ret <4 x i32> %vcgez.i
 }
+
+@pr21445_data = external global i32
+define i1 @pr21445(i8 %a) {
+; CHECK-LABEL: @pr21445(
+; CHECK-NEXT:  %[[umul:.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 ptrtoint (i32* @pr21445_data to i8))
+; CHECK-NEXT:  %[[cmp:.*]] = extractvalue { i8, i1 } %[[umul]], 1
+; CHECK-NEXT:  ret i1 %[[cmp]]
+  %ext = zext i8 %a to i32
+  %mul = mul i32 %ext, zext (i8 ptrtoint (i32* @pr21445_data to i8) to i32)
+  %and = and i32 %mul, 255
+  %cmp = icmp ne i32 %mul, %and
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index d34600f..614387a 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -6,7 +6,6 @@ entry:
 
 for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
   %phi3 = sub <1 x i32> zeroinitializer, %local
   br label %cond.end
 
@@ -19,6 +18,7 @@ cond.end:
 
 cond.end47:
   %sum = add <1 x i32> %cond, <i32 92>
+; CHECK: sub <1 x i32> <i32 -92>, %cond
   %phi2 = sub <1 x i32> zeroinitializer, %sum
   br label %for.cond
 }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index d625f3b..6cf9f0f 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1,7 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; This test makes sure that these instructions are properly eliminated.
 ; PR1822
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 
 define i32 @test1(i32 %A, i32 %B) {
         %C = select i1 false, i32 %A, i32 %B            
@@ -916,9 +917,9 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i64 %x, 1
-; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = trunc i64 [[AND]] to i32
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 %x to i32
+; CHECK-NEXT: [[AND:%.+]] = and i32 [[TRUNC]], 1
+; CHECK-NEXT: [[OR:%.+]] = or i32 [[XOR]], %y
 ; CHECK-NEXT: ret i32 [[OR]]
 define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
   %and = and i64 %x, 1
@@ -957,11 +958,11 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 27
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
-; CHECK-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i32 [[AND]] to i8
-; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i8 [[TRUNC]], 8
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i8 [[XOR]], %y
+; CHECK-NEXT: [[LSHR:%.+]] = lshr i32 %x, 27
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i32 [[LSHR]] to i8
+; CHECK-NEXT: [[AND:%.+]] = and i8 [[TRUNC]], 8
+; CHECK-NEXT: [[XOR:%.+]] = xor i8 [[AND]], 8
+; CHECK-NEXT: [[OR:%.+]] = or i8 [[XOR]], %y
 ; CHECK-NEXT: ret i8 [[OR]]
 define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
   %and = and i32 %x, 1073741824
@@ -1108,10 +1109,11 @@ define i32 @test65(i64 %x) {
   ret i32 %3
 
 ; CHECK-LABEL: @test65(
-; CHECK: and i64 %x, 16
-; CHECK: trunc i64 %1 to i32
-; CHECK: lshr exact i32 %2, 3
-; CHECK: xor i32 %3, 42
+; CHECK: %[[TRUNC:.*]] = trunc i64 %x to i32
+; CHECK: %[[LSHR:.*]] = lshr i32 %[[TRUNC]], 3
+; CHECK: %[[AND:.*]] = and i32 %[[LSHR]], 2
+; CHECK: %[[XOR:.*]] = xor i32 %[[AND]], 42
+; CHECK: ret i32 %[[XOR]]
 }
 
 define i32 @test66(i64 %x) {
@@ -1236,3 +1238,150 @@ define i32 @test75(i32 %x) {
 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
 ; CHECK-NEXT: ret i32 [[SEL]]
 }
+
+@under_aligned = external global i32, align 1
+
+define i32 @test76(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test76(
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
+; CHECK: load i32* %[[P]]
+
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* @under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_memory(i32*)
+
+define i32 @test77(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test77(
+; CHECK: %[[A:.*]] = alloca i32, align 1
+; CHECK: call void @scribble_on_memory(i32* %[[A]])
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
+; CHECK: load i32* %[[P]]
+
+  %under_aligned = alloca i32, align 1
+  call void @scribble_on_memory(i32* %under_aligned)
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* %under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK:         %[[V1:.*]] = load i32* %x
+; CHECK-NEXT:    %[[V2:.*]] = load i32* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT:    ret i32 %[[S]]
+entry:
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test79(
+; CHECK:         %[[V1:.*]] = load float* %x
+; CHECK-NEXT:    %[[V2:.*]] = load float* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
+; CHECK-NEXT:    ret float %[[S]]
+entry:
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  store i32 0, i32* %x1
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test80(i1 %flag) {
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+; CHECK-LABEL: @test80(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    ret i32 %[[V]]
+entry:
+  %x = alloca i32
+  %y = alloca i32
+  call void @scribble_on_memory(i32* %x)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load i32* %x
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test81(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test81(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast i32 %[[V]] to float
+; CHECK-NEXT:    ret float %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_memory(i32* %x1)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load i32* %x1
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test82(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test82(
+; CHECK:         %[[X:.*]] = alloca float
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK-NEXT:    %[[X1:.*]] = bitcast float* %[[X]] to i32*
+; CHECK-NEXT:    %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
+; CHECK:         %[[V:.*]] = load float* %[[X]]
+; CHECK-NEXT:    store float %[[V]], float* %[[Y1]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast float %[[V]] to i32
+; CHECK-NEXT:    ret i32 %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_memory(i32* %x1)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load float* %x
+  store float %tmp, float* %y1
+  %p = select i1 %flag, i32* %x1, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
index fc58ffc..9bbd7db 100644
--- a/test/Transforms/InstCombine/strcmp-1.ll
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
index df30dd1..49b0955 100644
--- a/test/Transforms/InstCombine/strncmp-1.ll
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
@@ -73,7 +73,7 @@ define i32 @test6(i8* %str1, i8* %str2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
 ; CHECK: ret i32 [[RET]]
 
   %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index e7aff00..3a24074 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -32,7 +32,7 @@ define i32 @test3(i32 %x) nounwind {
 
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and i32 %x, 31
-; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: sub nsw i32 73, %and
 ; CHECK-NEXT: ret
 }
 
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 67b7c49..0e421f7 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -142,8 +142,9 @@ define i32 @test15(i32 %A, i32 %B) {
 	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A
-; CHECK: ret i32 %D
+; CHECK:      %[[sub:.*]] = sub i32 0, %A
+; CHECK-NEXT: %[[rem:.*]] = srem i32 %B, %[[sub]]
+; CHECK: ret i32 %[[rem]]
 }
 
 define i32 @test16(i32 %A) {
@@ -464,3 +465,88 @@ define i32 @test38(i32 %A) {
 ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
 ; CHECK-NEXT: ret i32 [[SEXT]]
 }
+
+define i32 @test39(i32 %A, i32 %x) {
+  %B = sub i32 0, %A
+  %C = sub nsw i32 %x, %B
+  ret i32 %C
+; CHECK-LABEL: @test39(
+; CHECK: %C = add i32 %x, %A
+; CHECK: ret i32 %C
+}
+
+define i16 @test40(i16 %a, i16 %b) {
+  %ashr = ashr i16 %a, 1
+  %ashr1 = ashr i16 %b, 1
+  %sub = sub i16 %ashr, %ashr1
+  ret i16 %sub
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i16 %a, 1
+; CHECK-NEXT: [[ASHR1:%.*]] = ashr i16 %b, 1
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i16 [[ASHR]], [[ASHR1]]
+; CHECK: ret i16 [[RET]]
+}
+
+define i32 @test41(i16 %a, i16 %b) {
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %sub = sub i32 %conv, %conv1
+  ret i32 %sub
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %a to i32
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i16 %b to i32
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i32 [[SEXT]], [[SEXT1]]
+; CHECK: ret i32 [[RET]]
+}
+
+define i4 @test42(i4 %x, i4 %y) {
+  %a = and i4 %y, 7
+  %b = and i4 %x, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[AND1:%.*]] = and i4 %x, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i4 [[AND]], [[AND1]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i4 @test43(i4 %x, i4 %y) {
+  %a = or i4 %x, -8
+  %b = and i4 %y, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[OR:%.*]] = or i4 %x, -8
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nuw i4 [[OR]], [[AND]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i32 @test44(i32 %x) {
+  %sub = sub nsw i32 %x, 32768
+  ret i32 %sub
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 %x, -32768
+; CHECK: ret i32 [[ADD]]
+}
+
+define i32 @test45(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %xor = xor i32 %x, %y
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: %sub = and i32 %x, %y
+; CHECK: ret i32 %sub
+}
+
+define i32 @test46(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %sub = sub i32 %or, %x
+ ret i32 %sub
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %sub = and i32 %y, %x.not
+; CHECK: ret i32 %sub
+}
diff --git a/test/Transforms/InstCombine/vsx-unaligned.ll b/test/Transforms/InstCombine/vsx-unaligned.ll
new file mode 100644
index 0000000..26e0426
--- /dev/null
+++ b/test/Transforms/InstCombine/vsx-unaligned.ll
@@ -0,0 +1,44 @@
+; Verify that we can create unaligned loads and stores from VSX intrinsics.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = common global <4 x float> zeroinitializer, align 1
+@res_vf = common global <4 x float> zeroinitializer, align 1
+@vd = common global <2 x double> zeroinitializer, align 1
+@res_vd = common global <2 x double> zeroinitializer, align 1
+
+define void @test1() {
+entry:
+  %t1 = alloca <4 x float>*, align 8
+  %t2 = alloca <2 x double>*, align 8
+  store <4 x float>* @vf, <4 x float>** %t1, align 8
+  %0 = load <4 x float>** %t1, align 8
+  %1 = bitcast <4 x float>* %0 to i8*
+  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
+  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
+  %3 = load <4 x float>** %t1, align 8
+  %4 = bitcast <4 x float>* %3 to i8*
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
+  store <2 x double>* @vd, <2 x double>** %t2, align 8
+  %5 = load <2 x double>** %t2, align 8
+  %6 = bitcast <2 x double>* %5 to i8*
+  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
+  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
+  %8 = load <2 x double>** %t2, align 8
+  %9 = bitcast <2 x double>* %8 to i8*
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
+  ret void
+}
+
+; CHECK-LABEL: @test1
+; CHECK: %0 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
+; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
+; CHECK: %1 = load <2 x double>* @vd, align 1
+; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
+
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index d153e03..797c8f3 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -82,3 +82,93 @@ define i32 @test6(i32 %x) {
 ; CHECK: lshr i32 %x, 16
 ; CHECK: ret
 }
+
+
+; (A | B) ^ (~A) -> (A | ~B)
+define i32 @test7(i32 %a, i32 %b) {
+ %or = or i32 %a, %b
+ %neg = xor i32 %a, -1
+ %xor = xor i32 %or, %neg
+ ret i32 %xor
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (~A) ^ (A | B) -> (A | ~B)
+define i32 @test8(i32 %a, i32 %b) {
+ %neg = xor i32 %a, -1
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (A & B) ^ (A ^ B) -> (A | B)
+define i32 @test9(i32 %b, i32 %c) {
+ %and = and i32 %b, %c
+ %xor = xor i32 %b, %c
+ %xor2 = xor i32 %and, %xor
+ ret i32 %xor2
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10(i32 %b, i32 %c) {
+ %xor = xor i32 %b, %c
+ %and = and i32 %b, %c
+ %xor2 = xor i32 %xor, %and
+ ret i32 %xor2
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %and, %nega
+ ret i32 %xor
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+ %nega = xor i32 %a, -1
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %xor = xor i32 %nega, %and
+ ret i32 %xor
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+; (A ^ C) ^ (A | B) -> ((~A) & B) ^ C
+define i32 @test14(i32 %a, i32 %b, i32 %c) {
+ %neg = xor i32 %a, %c
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: %[[not:.*]] = xor i32 %a, -1
+; CHECK-NEXT: %[[and:.*]] = and i32 %[[not]], %b
+; CHECK-NEXT: %[[xor:.*]] = xor i32 %[[and]], %c
+; CHECK-NEXT: ret i32 %[[xor]]
+}