67 files changed, 2583 insertions, 234 deletions
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
index d2b2b00..854ec60 100644
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -1,7 +1,8 @@
 ; Instcombine was missing a test that caused it to make illegal transformations
 ; sometimes.  In this case, it transforms the sub into an add:
-; RUN: opt < %s -instcombine -S | grep sub
-;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sub
+
 define i32 @test(i32 %i, i32 %j) {
         %A = mul i32 %i, %j
         %B = sub i32 2, %A
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
index 22574f7..49e55c6 100644
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: add
 
 define i32 @test(i32 %A) {
         %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index c02d33c..bb9a818 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,6 +1,7 @@
-; This testcase can be simplified by "realizing" that alloca can never return 
+; This testcase can be simplified by "realizing" that alloca can never return
 ; null.
-; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | FileCheck %s
+; CHECK-NOT: br
 
 declare i32 @bitmap_clear(...)
 
diff --git a/test/Transforms/InstCombine/2006-10-20-mask.ll b/test/Transforms/InstCombine/2006-10-20-mask.ll
index 0aaa5e8..e9797ae 100644
--- a/test/Transforms/InstCombine/2006-10-20-mask.ll
+++ b/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: and
 
 define i64 @foo(i64 %tmp, i64 %tmp2) {
         %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index d3ba1e2..8ab50e2 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep mul | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: mul
+; CHECK: mul
 
 define <4 x float> @test(<4 x float> %V) {
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index 2665791..272753c 100644
--- a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
index c161bcc..6b4e89d 100644
--- a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
+++ b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 
 define double @fold(i1 %a, double %b) {
 %s = select i1 %a, double 0., double 1.
diff --git a/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
index a88c510..d85ef97 100644
--- a/test/Transforms/InstCombine/2008-02-13-MulURem.ll
+++ b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep rem
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1933
 
+; CHECK: rem
+
 define i32 @fold(i32 %a) {
   %s = mul i32 %a, 3
   %c = urem i32 %s, 3
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index ed20690..31b1719 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "xor"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR2389
 
+; CHECK: xor
+
 define i1 @test(i1 %a, i1 %b) {
   %A = add i1 %a, %b
   ret i1 %A
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index 949fc59..e354311 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin9.6"
 
 define i32 @test(i32* %P) nounwind {
 entry:
-  %Q = bitcast i32* %P to i32 addrspace(1)*
+  %Q = addrspacecast i32* %P to i32 addrspace(1)*
   store i32 0, i32 addrspace(1)* %Q, align 4
   ret i32 0
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 6f3df5b..4d185bf 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: bitcast
+; CHECK: addrspacecast
 
 @base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
-  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  call void @foo(i32* getelementptr (i32* addrspacecast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
   ret void
 }
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
new file mode 100644
index 0000000..d908b55
--- /dev/null
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8* %alloca, i32 %i
+  %load = load i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index e33ee9f..4d22c2c 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
@@ -47,6 +47,27 @@ entry:
 	ret <16 x i8> %tmp
 }
 
+@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
 ; When a load or store lacks an explicit alignment, add one.
 
 ; CHECK-LABEL: @test2(
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 9a80ad9..ae1cfa1 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,7 +1,7 @@
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
 
 declare void @use(...)
 
@@ -110,3 +110,22 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...)* @use(i32* %x)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 504391a..e88fd59 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -42,3 +42,15 @@ define <4 x i32> @test5(<4 x i32> %A) {
   %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
   ret <4 x i32> %2
 }
+
+; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
+define i32 @test6(i64 %x) nounwind {
+; CHECK: @test6
+; CHECK-NEXT: add i64 %x, 1
+; CHECK-NEXT: icmp ugt i64 %x.off, 1
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  %land.ext = zext i1 %.cmp1 to i32
+  ret i32 %land.ext
+}
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index f2ea601..cf24a44 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,6 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: select
 
 
 define i41 @test1(i1 %C) {
@@ -37,7 +38,7 @@ define i41 @test5(i41 %X) {
 
 define i1023 @test6(i1023 %X) {
     ;; ((X & 27) ? 27 : 0)
-    %Y = and i1023 %X, 64 
+    %Y = and i1023 %X, 64
     %t = icmp ne i1023 %Y, 0
     %V = select i1 %t, i1023 64, i1023 0
     ret i1023 %V
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index 28b0e9a..ed812e1 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -48,3 +48,44 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK-LABEL: @test4(
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %A, i32 1
+  ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK-LABEL: @test5(
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %A, i32 1
+  ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 4ef8790..c7a520b 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -144,3 +144,13 @@ define <2 x i16> @BitcastInsert(i32 %a) {
 ; CHECK-LABEL: @BitcastInsert(
 ; CHECK: bitcast i32 %a to <2 x i16>
 }
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+; CHECK: @test7
+; CHECK: bitcast
+; CHECK: load
+}
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 55833fb..e68c0ad 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,7 +1,7 @@
 ; Ignore stderr, we expect warnings there
 ; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Simple case, argument translatable without changing the value
 declare void @test1a(i8*)
@@ -15,6 +15,28 @@ define void @test1(i32* %A) {
   ret void
 }
 
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
 ; More complex case, translate argument because of resolution.  This is safe
 ; because we have the body of the function
 define void @test2a(i8 %A) {
@@ -135,3 +157,122 @@ entry:
 ; CHECK: call i8* bitcast
 }
 
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 52ea7b9..cac0ec1 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -708,6 +708,19 @@ define %s @test68(%s *%p, i64 %i) {
 ; CHECK-NEXT: ret %s
 }
 
+define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test68_as1(
+  %o = mul i32 %i, 12
+  %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr inbounds i8 addrspace(1)* %q, i32 %o
+; CHECK-NEXT: getelementptr %s addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
+  %l = load %s addrspace(1)* %r
+; CHECK-NEXT: load %s addrspace(1)*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
 define double @test69(double *%p, i64 %i) {
 ; CHECK-LABEL: @test69(
   %o = shl nsw i64 %i, 3
@@ -890,6 +903,20 @@ define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-NEXT: ret double
 }
 
+define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
+; CHECK-LABEL: @test80_as1(
+  %tmp = mul nsw i16 %i, 8
+; CHECK-NEXT: sext i16 %i to i32
+  %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
+  %l = load double addrspace(1)* %r
+; CHECK-NEXT: load double addrspace(1)*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
 define double @test81(double *%p, float %f) {
   %i = fptosi float %f to i64
   %q = bitcast double* %p to i8*
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 7910ea3..23006a8 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -1,7 +1,7 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:32:32-p2:16:16"
 
 ; This shouldn't convert to getelementptr because the relationship
 ; between the arithmetic and the layout of allocated memory is
@@ -27,6 +27,26 @@ define i1 @test2(i8* %a, i8* %b) {
         ret i1 %r
 }
 
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_same_int(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_same_int(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i16
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i16
+  %r = icmp eq i16 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_larger(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i32
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i32
+  %r = icmp eq i32 %tmpa, %tmpb
+  ret i1 %r
+}
+
 ; These casts should also be folded away.
 ; CHECK-LABEL: @test3(
 ; CHECK: icmp eq i8* %a, @global
@@ -43,11 +63,20 @@ define i1 @test4(i32 %A) {
   ret i1 %C
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %C = icmp eq i32 %A, 0
-; CHECK-NEXT: ret i1 %C 
+; CHECK-NEXT: ret i1 %C
 }
 
+define i1 @test4_as2(i16 %A) {
+; CHECK-LABEL: @test4_as2(
+; CHECK-NEXT: %C = icmp eq i16 %A, 0
+; CHECK-NEXT: ret i1 %C
+  %B = inttoptr i16 %A to i8 addrspace(2)*
+  %C = icmp eq i8 addrspace(2)* %B, null
+  ret i1 %C
+}
 
-; Pulling the cast out of the load allows us to eliminate the load, and then 
+
+; Pulling the cast out of the load allows us to eliminate the load, and then
 ; the whole array.
 
         %op = type { float }
@@ -69,11 +98,11 @@ define %unop* @test5(%op* %O) {
 ; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
 ; the address space.
 
-define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
-entry: 
-  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+define i8 @test6(i8 addrspace(1)* %source) {
+entry:
+  %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
   %tmp4 = load i8* %arrayidx223
   ret i8 %tmp4
 ; CHECK-LABEL: @test6(
 ; CHECK: load i8* %arrayidx223
-} 
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index cdf95ab..62cd5b3 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -24,9 +24,9 @@
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test3(
 entry:
-; CHECK: xor i32 %a, %b
-; CHECK: lshr i32 %0, 31
-; CHECK: xor i32 %1, 1
+; CHECK: [[XOR1:%.*]] = xor i32 %a, %b
+; CHECK: [[SHIFT:%.*]] = lshr i32 [[XOR1]], 31
+; CHECK: [[XOR2:%.*]] = xor i32 [[SHIFT]], 1
         %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
         %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
         %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
         ret i32 %3
 ; CHECK-NOT: icmp
 ; CHECK-NOT: zext
-; CHECK: ret i32 %2
+; CHECK: ret i32 [[XOR2]]
 }
 
 ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
new file mode 100644
index 0000000..9f21d54
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -0,0 +1,232 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+@g = addrspace(3) global i32 89
+
+@const_zero_i8_as1 = addrspace(1) constant i8 0
+@const_zero_i32_as1 = addrspace(1) constant i32 0
+
+@const_zero_i8_as2 = addrspace(2) constant i8 0
+@const_zero_i32_as2 = addrspace(2) constant i32 0
+
+@const_zero_i8_as3 = addrspace(3) constant i8 0
+@const_zero_i32_as3 = addrspace(3) constant i32 0
+
+; Test constant folding of inttoptr (ptrtoint constantexpr)
+; The intermediate integer size is the same as the pointer size
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
+; CHECK-NEXT: ret i32 addrspace(3)* @const_zero_i32_as3
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+; The intermediate integer size is larger than the pointer size
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
+; CHECK-NEXT: ret i32 addrspace(2)* @const_zero_i32_as2
+  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; Different address spaces that are the same size, but they are
+; different so nothing should happen
+define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
+; CHECK-NEXT: ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to i32 addrspace(4)*
+  ret i32 addrspace(4)* %y
+}
+
+; Make sure we don't introduce a bitcast between different sized
+; address spaces when folding this
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
+; CHECK-NEXT: ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; The intermediate integer size is too small, nothing should happen
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+define i8 @const_fold_ptrtoint() {
+; CHECK-LABEL: @const_fold_ptrtoint(
+; CHECK-NEXT: ret i8 4
+  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+}
+
+; Test that mask happens when the destination pointer is smaller than
+; the original
+define i8 @const_fold_ptrtoint_mask() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask(
+; CHECK-NEXT: ret i8 1
+  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+}
+
+; Address space 0 is too small for the correct mask, should mask with
+; 64-bits instead of 32
+define i64 @const_fold_ptrtoint_mask_small_as0() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
+; CHECK: ret i64 -1
+  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+}
+
+define i32 addrspace(3)* @const_inttoptr() {
+; CHECK-LABEL: @const_inttoptr(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+  %p = inttoptr i16 4 to i32 addrspace(3)*
+  ret i32 addrspace(3)* %p
+}
+
+define i16 @const_ptrtoint() {
+; CHECK-LABEL: @const_ptrtoint(
+; CHECK-NEXT: ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+  %i = ptrtoint i32 addrspace(3)* @g to i16
+  ret i16 %i
+}
+
+define i16 @const_inttoptr_ptrtoint() {
+; CHECK-LABEL: @const_inttoptr_ptrtoint(
+; CHECK-NEXT: ret i16 9
+  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+}
+
+define i1 @constant_fold_cmp_constantexpr_inttoptr() {
+; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr_null(i16 %i) {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null_2() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint() {
+; CHECK-LABEL: @constant_fold_ptrtoint(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr() {
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+@g_float_as3 = addrspace(3) global float zeroinitializer
+@g_v4f_as3 = addrspace(3) global <4 x float> zeroinitializer
+
+define float @constant_fold_bitcast_ftoi_load() {
+; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
+; CHECK: load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  ret float %a
+}
+
+define i32 @constant_fold_bitcast_itof_load() {
+; CHECK-LABEL: @constant_fold_bitcast_itof_load(
+; CHECK: load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  ret i32 %a
+}
+
+define <4 x i32> @constant_fold_bitcast_vector_as() {
+; CHECK-LABEL: @constant_fold_bitcast_vector_as(
+; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
+; CHECK: bitcast <4 x float> %1 to <4 x i32>
+  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
+  ret <4 x i32> %a
+}
+
+@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
+
+define i32 @test_cast_gep_small_indices_as() {
+; CHECK-LABEL: @test_cast_gep_small_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+
+@constant_fold_global_ptr = addrspace(3) global %struct.foo {
+  float 0.0,
+  float 0.0,
+  [4 x i32] zeroinitializer,
+  i32 addrspace(3)* getelementptr ([10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+}
+
+define i32 @test_cast_gep_large_indices_as() {
+; CHECK-LABEL: @test_cast_gep_large_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+define i32 @test_constant_cast_gep_struct_indices_as() {
+; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+  %x = getelementptr %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@constant_data_as3 = addrspace(3) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+
+define i32 @test_read_data_from_global_as3() {
+; CHECK-LABEL: @test_read_data_from_global_as3(
+; CHECK-NEXT: ret i32 2
+  %x = getelementptr [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@a = addrspace(1) constant i32 9
+@b = addrspace(1) constant i32 23
+@c = addrspace(1) constant i32 34
+@d = addrspace(1) constant i32 99
+
+@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
+@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+
+define i32 @constant_through_array_as_ptrs() {
+; CHECK-LABEL: @constant_through_array_as_ptrs(
+; CHECK-NEXT: ret i32 34
+  %p = load i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32 addrspace(1)* %a, align 4
+  ret i32 %b
+}
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index 9f82e66..5fb5602 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Constant folding should fix notionally out-of-bounds indices
 ; and add inbounds keywords.
@@ -72,3 +72,21 @@ entry:
   ret i64 %E
   ; CHECK: ret i64 1000
 }
+
+@X_as1 = addrspace(1) global [1000 x i8] zeroinitializer, align 16
+
+define i16 @test2_as1() {
+; CHECK-LABEL: @test2_as1(
+  ; CHECK: ret i16 1000
+
+entry:
+  %A = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 1, i64 0) to i8 addrspace(1)*
+  %B = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 0, i64 0) to i8 addrspace(1)*
+
+  %B2 = ptrtoint i8 addrspace(1)* %B to i16
+  %C = sub i16 0, %B2
+  %D = getelementptr i8 addrspace(1)* %A, i16 %C
+  %E = ptrtoint i8 addrspace(1)* %D to i16
+
+  ret i16 %E
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index a76c353..2e3785f 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -12,15 +12,17 @@ define void @foo() nounwind ssp {
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
 !0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
 !6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 2f080bf..75082dc 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -2,7 +2,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare i8* @foo(i8*, i32, i64, i64) nounwind
 
@@ -23,18 +23,19 @@ entry:
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
   %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
 !2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -55,3 +56,4 @@ entry:
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
 !29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index e5448ee..5cacb59 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -263,6 +263,7 @@ define double @sin_test2(float %f) nounwind readnone {
    ret double %call
 ; CHECK: call double @sin(double %conv)
 }
+
 define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: sqrt_test
    %conv = fpext float %f to double
@@ -272,6 +273,15 @@ define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: call float @sqrtf(float %f)
 }
 
+define float @sqrt_int_test(float %f) nounwind readnone {
+; CHECK: sqrt_int_test
+   %conv = fpext float %f to double
+   %call = call double @llvm.sqrt.f64(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @llvm.sqrt.f32(float %f)
+}
+
 define double @sqrt_test2(float %f) nounwind readnone {
 ; CHECK: sqrt_test2
    %conv = fpext float %f to double
@@ -331,3 +341,6 @@ declare double @acos(double) nounwind readnone
 declare double @acosh(double) nounwind readnone
 declare double @asin(double) nounwind readnone
 declare double @asinh(double) nounwind readnone
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 6645d99..46bb605 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt  -instcombine -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
 define void @foo(i32) {
+; CHECK-LABEL: @foo(
+; CHECK: alloca
+; CHECK: align 16
 	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
 	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
 	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
@@ -11,8 +15,24 @@ define void @foo(i32) {
 	%7 = getelementptr { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
 	%8 = getelementptr [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
 	store i16 0, i16* %8, align 16
-        call void @bar(i16* %8)
+    call void @bar(i16* %8)
 	ret void
 }
 
 declare void @bar(i16*)
+
+define void @foo_as1(i32 %a, [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b) {
+; CHECK-LABEL: @foo_as1(
+; CHECK: align 16
+  %1 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b, i32 0, i32 0        ; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+  %2 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }> addrspace(1)* %1, i32 0, i32 0      ; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+  %3 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } addrspace(1)* %2, i32 0, i32 0        ; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+  %4 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } addrspace(1)* %3 to { [8 x i16] } addrspace(1)*     ; <{ [8 x i16] }*> [#uses=1]
+  %5 = getelementptr { [8 x i16] } addrspace(1)* %4, i32 0, i32 0     ; <[8 x i16]*> [#uses=1]
+  %6 = getelementptr [8 x i16] addrspace(1)* %5, i32 0, i32 0     ; <i16*> [#uses=1]
+  store i16 0, i16 addrspace(1)* %6, align 16
+  call void @bar_as1(i16 addrspace(1)* %6)
+  ret void
+}
+
+declare void @bar_as1(i16 addrspace(1)*)
diff --git a/test/Transforms/InstCombine/err-rep-cold.ll b/test/Transforms/InstCombine/err-rep-cold.ll
new file mode 100644
index 0000000..0cbafc4
--- /dev/null
+++ b/test/Transforms/InstCombine/err-rep-cold.ll
@@ -0,0 +1,77 @@
+; Test the static branch probability heuristics for error-reporting functions.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stdout = external global %struct._IO_FILE*
+@stderr = external global %struct._IO_FILE*
+@.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  br label %return
+
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stdout, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+; CHECK: attributes #[[AT1]] = { cold nounwind }
+; CHECK: attributes #[[AT2]] = { cold }
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index a9a7015..d8ba2a5 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -202,6 +202,18 @@ define float @fmul2(float %f1) {
 ; CHECK: fdiv fast float 1.200000e+07, %f1
 }
 
+; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
+@fmul2_external = external global float
+define float @fmul2_disable(float %f1) {
+  %div = fdiv fast float 1.000000e+00, %f1 
+  store float %div, float* @fmul2_external
+  %mul = fmul fast float %div, 2.000000e+00
+  ret float %mul
+; CHECK-LABEL: @fmul2_disable
+; CHECK: store
+; CHECK: fmul fast
+}
+
 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
 define float @fmul3(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 8f0b38f..1dec11d 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,7 +1,7 @@
 ; Test that the ffs* library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index cf57bed..402ee52 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -70,3 +70,26 @@ define float @test7(float %x, float %y) {
 ; CHECK-LABEL: @test7(
 ; CHECK: fsub float -0.000000e+00, %x
 }
+
+; Don't crash when attempting to cast a constant FMul to an instruction.
+define void @test8(i32* %inout) {
+entry:
+  %0 = load i32* %inout, align 4
+  %conv = uitofp i32 %0 to float
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
+  %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> zeroinitializer, %1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 2cb970b..b58d9dc 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: select
 
 define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
                  <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 09f0532..05d1b48 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -31,4 +31,16 @@ define half @test4(float %a) {
   ret half %c
 }
 
+; CHECK: test5
+define half @test5(float %a, float %b, float %c) {
+; CHECK: fcmp ogt
+; CHECK: fptrunc
+; CHECK: select
+; CHECK: half 0xH3C00
+  %d = fcmp ogt float %a, %b
+  %e = select i1 %d, float %c, float 1.0
+  %f = fptrunc float %e to half
+  ret half %f
+}
+
 declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 1b7c104..3f6a314 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the fprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index dfe12db..24c355d 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-pc-win32"
 define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
 ST:
   %A = getelementptr inbounds %myStruct addrspace(1)* %p, i64 0
-  %B = bitcast %myStruct addrspace(1)* %A to %myStruct*
+  %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct* %B, i32 0, i32 1
   %D = getelementptr inbounds [3 x float]* %C, i32 0, i32 2
   %E = load float* %D, align 4
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 90f144a..c29a7dc 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32"
+
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
 %struct.B = type { double }
@@ -8,15 +9,23 @@ target datalayout = "e-p:64:64"
 
 
 @Global = constant [10 x i8] c"helloworld"
+@Global_as1 = addrspace(1) constant [10 x i8] c"helloworld"
 
 ; Test noop elimination
 define i32* @test1(i32* %I) {
-        %A = getelementptr i32* %I, i64 0 
+        %A = getelementptr i32* %I, i64 0
         ret i32* %A
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32* %I
 }
 
+define i32 addrspace(1)* @test1_as1(i32 addrspace(1)* %I) {
+  %A = getelementptr i32 addrspace(1)* %I, i64 0
+  ret i32 addrspace(1)* %A
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i32 addrspace(1)* %I
+}
+
 ; Test noop elimination
 define i32* @test2(i32* %I) {
         %A = getelementptr i32* %I
@@ -36,7 +45,7 @@ define i32* @test3(i32* %I) {
 
 ; Test that two getelementptr insts fold
 define i32* @test4({ i32 }* %I) {
-        %A = getelementptr { i32 }* %I, i64 1 
+        %A = getelementptr { i32 }* %I, i64 1
         %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
 ; CHECK-LABEL: @test4(
@@ -45,17 +54,53 @@ define i32* @test4({ i32 }* %I) {
 
 define void @test5(i8 %B) {
         ; This should be turned into a constexpr instead of being an instruction
-        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4
         store i8 %B, i8* %A
         ret void
 ; CHECK-LABEL: @test5(
 ; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
+define void @test5_as1(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4
+        store i8 %B, i8 addrspace(1)* %A
+        ret void
+; CHECK-LABEL: @test5_as1(
+; CHECK: store i8 %B, i8 addrspace(1)* getelementptr inbounds ([10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4)
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT: store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0), align 8
+; CHECK-NEXT: ret void
+  %A = getelementptr %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT: store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2), align 4
+
+; CHECK-NEXT: ret void
+  %A = getelementptr [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
 
 define i32* @test7(i32* %I, i64 %C, i64 %D) {
-        %A = getelementptr i32* %I, i64 %C 
-        %B = getelementptr i32* %A, i64 %D 
+        %A = getelementptr i32* %I, i64 %C
+        %B = getelementptr i32* %A, i64 %D
         ret i32* %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %A.sum = add i64 %C, %D
@@ -64,8 +109,8 @@ define i32* @test7(i32* %I, i64 %C, i64 %D) {
 
 define i8* @test8([10 x i32]* %X) {
         ;; Fold into the cast.
-        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
-        %B = bitcast i32* %A to i8*     
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0
+        %B = bitcast i32* %A to i8*
         ret i8* %B
 ; CHECK-LABEL: @test8(
 ; CHECK: bitcast [10 x i32]* %X to i8*
@@ -73,7 +118,7 @@ define i8* @test8([10 x i32]* %X) {
 
 define i32 @test9() {
         %A = getelementptr { i32, double }* null, i32 0, i32 1
-        %B = ptrtoint double* %A to i32        
+        %B = ptrtoint double* %A to i32
         ret i32 %B
 ; CHECK-LABEL: @test9(
 ; CHECK: ret i32 8
@@ -83,15 +128,15 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
         %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
         %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
         ;; seteq x, y
-        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
         ret i1 %tmp.4
 ; CHECK-LABEL: @test10(
 ; CHECK: icmp eq { i32, i32 }* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
-        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
-        %Q = icmp eq i32* %P, null             
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0
+        %Q = icmp eq i32* %P, null
         ret i1 %Q
 ; CHECK-LABEL: @test11(
 ; CHECK: icmp eq { i32, i32 }* %X, null
@@ -105,11 +150,11 @@ entry:
   store i32 10, i32* %g3, align 4
 
   %g4 = getelementptr %struct.A* %a, i32 0, i32 0
-  
+
   %new_a = bitcast %struct.B* %g4 to %struct.A*
 
-  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
-  %a_a = load i32* %g5, align 4	
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1
+  %a_a = load i32* %g5, align 4
   ret i32 %a_a
 ; CHECK-LABEL:      @test12(
 ; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
@@ -129,8 +174,68 @@ define i1 @test13(i64 %X, %S* %P) {
 ; CHECK:    %C = icmp eq i64 %X, -1
 }
 
-
-@G = external global [3 x i8]      
+define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test13_vector(
+; CHECK-NEXT: shl nuw <2 x i64> %X, <i64 2, i64 2>
+; CHECK-NEXT: add <2 x i64> %A.idx, <i64 4, i64 4>
+; CHECK-NEXT: icmp eq <2 x i64> %A.offs, zeroinitializer
+  %A = getelementptr inbounds <2 x %S*> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
+  %B = getelementptr inbounds <2 x %S*> %P, <2 x i64> <i64 0, i64 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_as1(i16 %X, %S addrspace(1)* %P) {
+; CHECK-LABEL: @test13_as1(
+; CHECK-NEXT:  %C = icmp eq i16 %X, -1
+; CHECK-NEXT: ret i1 %C
+  %A = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 0
+  %C = icmp eq i32 addrspace(1)* %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x %S addrspace(1)*> %P) {
+; CHECK-LABEL: @test13_vector_as1(
+; CHECK-NEXT: shl nuw <2 x i16> %X, <i16 2, i16 2>
+; CHECK-NEXT: add <2 x i16> %A.idx, <i16 4, i16 4>
+; CHECK-NEXT: icmp eq <2 x i16> %A.offs, zeroinitializer
+; CHECK-NEXT: ret <2 x i1>
+  %A = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
+  %B = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32 addrspace(1)*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_i32(i32 %X, %S* %P) {
+; CHECK-LABEL: @test13_i32(
+; CHECK: %C = icmp eq i32 %X, -1
+  %A = getelementptr inbounds %S* %P, i32 0, i32 1, i32 %X
+  %B = getelementptr inbounds %S* %P, i32 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i16(i16 %X, %S* %P) {
+; CHECK-LABEL: @test13_i16(
+; CHECK: %C = icmp eq i16 %X, -1
+  %A = getelementptr inbounds %S* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S* %P, i16 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i128(i128 %X, %S* %P) {
+; CHECK-LABEL: @test13_i128(
+; CHECK: %C = icmp eq i64 %1, -1
+  %A = getelementptr inbounds %S* %P, i128 0, i32 1, i128 %X
+  %B = getelementptr inbounds %S* %P, i128 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+
+@G = external global [3 x i8]
 define i8* @test14(i32 %Idx) {
         %idx = zext i32 %Idx to i64
         %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
@@ -151,7 +256,7 @@ define i32 *@test15(i64 %X) {
 
 
 define i32* @test16(i32* %X, i32 %Idx) {
-        %R = getelementptr i32* %X, i32 %Idx       
+        %R = getelementptr i32* %X, i32 %Idx
         ret i32* %R
 ; CHECK-LABEL: @test16(
 ; CHECK: sext i32 %Idx to i64
@@ -164,7 +269,7 @@ define i1 @test17(i16* %P, i32 %I, i32 %J) {
         %C = icmp ult i16* %X, %Y
         ret i1 %C
 ; CHECK-LABEL: @test17(
-; CHECK: %C = icmp slt i32 %I, %J 
+; CHECK: %C = icmp slt i32 %I, %J
 }
 
 define i1 @test18(i16* %P, i32 %I) {
@@ -175,6 +280,55 @@ define i1 @test18(i16* %P, i32 %I) {
 ; CHECK: %C = icmp slt i32 %I, 0
 }
 
+; Larger than the pointer size for a non-zero address space
+define i1 @test18_as1(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than the pointer size for a non-zero address space
+define i1 @test18_as1_i32(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1_i32(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than pointer size
+define i1 @test18_i16(i16* %P, i16 %I) {
+; CHECK-LABEL: @test18_i16(
+; CHECK: %C = icmp slt i16 %I, 0
+  %X = getelementptr inbounds i16* %P, i16 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Same as pointer size
+define i1 @test18_i64(i16* %P, i64 %I) {
+; CHECK-LABEL: @test18_i64(
+; CHECK: %C = icmp slt i64 %I, 0
+  %X = getelementptr inbounds i16* %P, i64 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Larger than the pointer size
+define i1 @test18_i128(i16* %P, i128 %I) {
+; CHECK-LABEL: @test18_i128(
+; CHECK: %C = icmp slt i64 %1, 0
+  %X = getelementptr inbounds i16* %P, i128 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
 define i32 @test19(i32* %P, i32 %A, i32 %B) {
         %tmp.4 = getelementptr inbounds i32* %P, i32 %A
         %tmp.9 = getelementptr inbounds i32* %P, i32 %B
@@ -194,6 +348,15 @@ define i32 @test20(i32* %P, i32 %A, i32 %B) {
 ; CHECK: icmp eq i32 %A, 0
 }
 
+define i32 @test20_as1(i32 addrspace(1)* %P, i32 %A, i32 %B) {
+  %tmp.4 = getelementptr inbounds i32 addrspace(1)* %P, i32 %A
+  %tmp.6 = icmp eq i32 addrspace(1)* %tmp.4, %P
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+; CHECK-LABEL: @test20_as1(
+; CHECK: icmp eq i16 %1, 0
+}
+
 
 define i32 @test21() {
         %pbob1 = alloca %intstruct
@@ -210,8 +373,8 @@ define i32 @test21() {
 @B = global i32 2               ; <i32*> [#uses=1]
 
 define i1 @test22() {
-        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
-                           getelementptr (i32* @B, i64 2) 
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1),
+                           getelementptr (i32* @B, i64 2)
         ret i1 %C
 ; CHECK-LABEL: @test22(
 ; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
@@ -262,15 +425,15 @@ define i1 @test26(i8* %arr) {
 
 define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
 entry:
-	%from_addr = alloca %struct.siginfo_t*	
-	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%from_addr = alloca %struct.siginfo_t*
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8
 	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
 	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
-	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
 	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
 	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
 	%tmp349350 = bitcast i8** %tmp349 to i32*
-	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp351 = load i32* %tmp349350, align 8
 	%tmp360 = call i32 asm sideeffect "...",
         "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
          %struct.__large_struct* null, i32 -14, i32 0 )
@@ -280,28 +443,28 @@ entry:
 
 ; PR1978
 	%struct.x = type <{ i8 }>
-@.str = internal constant [6 x i8] c"Main!\00"	
-@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+@.str = internal constant [6 x i8] c"Main!\00"
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"
 
 define i32 @test28() nounwind  {
 entry:
 	%orientations = alloca [1 x [1 x %struct.x]]
-	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind
 	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
 	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
 	br label %bb10
 
 bb10:
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
-	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
-	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
 	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
 	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
 	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
 	%indvar.next = add i32 %indvar, 1
 	br i1 %tmp84, label %bb17, label %bb10
 
-bb17:	
+bb17:
 	ret i32 0
 ; CHECK-LABEL: @test28(
 ; CHECK: icmp eq i32 %indvar, 0
@@ -318,7 +481,7 @@ declare i32 @printf(i8*, ...)
 	%T = type <{ i64, i64, i64 }>
 define i32 @test29(i8* %start, i32 %X) nounwind {
 entry:
-	%tmp3 = load i64* null		
+	%tmp3 = load i64* null
 	%add.ptr = getelementptr i8* %start, i64 %tmp3
 	%tmp158 = load i32* null
 	%add.ptr159 = getelementptr %T* null, i32 %tmp158
@@ -356,7 +519,7 @@ declare void @test30f(i32*)
 define i1 @test31(i32* %A) {
         %B = getelementptr i32* %A, i32 1
         %C = getelementptr i32* %A, i64 1
-        %V = icmp eq i32* %B, %C 
+        %V = icmp eq i32* %B, %C
         ret i1 %V
 ; CHECK-LABEL: @test31(
 ; CHECK: ret i1 true
@@ -372,7 +535,7 @@ define i8* @test32(i8* %v) {
 	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
 	%E = bitcast i8* %D to i8**
 	store i8* %v, i8** %E
-	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2
 	%G = load i8** %F
 	ret i8* %G
 ; CHECK-LABEL: @test32(
@@ -384,23 +547,46 @@ define i8* @test32(i8* %v) {
 %struct.Key = type { { i32, i32 } }
 %struct.anon = type <{ i8, [3 x i8], i32 }>
 
-define i32 *@test33(%struct.Key *%A) {
-	%B = bitcast %struct.Key* %A to %struct.anon*
-        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
-	ret i32 *%C
+define i32* @test33(%struct.Key* %A) {
 ; CHECK-LABEL: @test33(
 ; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+  %B = bitcast %struct.Key* %A to %struct.anon*
+  %C = getelementptr %struct.anon* %B, i32 0, i32 2
+  ret i32* %C
 }
 
+define i32 addrspace(1)* @test33_as1(%struct.Key addrspace(1)* %A) {
+; CHECK-LABEL: @test33_as1(
+; CHECK: getelementptr %struct.Key addrspace(1)* %A, i16 0, i32 0, i32 1
+  %B = bitcast %struct.Key addrspace(1)* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
+define i32 addrspace(1)* @test33_array_as1([10 x i32] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_as1(
+; CHECK: getelementptr [10 x i32] addrspace(1)* %A, i16 0, i16 2
+  %B = bitcast [10 x i32] addrspace(1)* %A to [5 x i32] addrspace(1)*
+  %C = getelementptr [5 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+; Make sure the GEP indices use the right pointer sized integer
+define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_struct_as1(
+; CHECK: getelementptr [10 x %struct.Key] addrspace(1)* %A, i16 0, i16 1, i32 0, i32 0
+  %B = bitcast [10 x %struct.Key] addrspace(1)* %A to [20 x i32] addrspace(1)*
+  %C = getelementptr [20 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
-	%A = alloca %T2, align 8	
+	%A = alloca %T2, align 8
 	%mrv_gep = bitcast %T2* %A to i64*
 	%B = getelementptr %T2* %A, i64 0, i32 0
-        
+
       	store i64 %V, i64* %mrv_gep
 	%C = load i8** %B, align 8
 	ret i8* %C
@@ -519,4 +705,88 @@ define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
 ; CHECK-NEXT: icmp ult  [1 x i8]* %a, %b
 }
 
+define i8 @test_gep_bitcast_as1(i32 addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_as1(
+; CHECK: getelementptr i32 addrspace(1)* %arr, i16 %N
+; CHECK: bitcast
+  %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 4
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+; The element size of the array matches the element size of the pointer
+define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i64*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64* %cast, i64 %V
+  %x = load i64* %t
+  ret i64 %x
+}
+
+; The element size of the array is different the element size of the pointer
+define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i8*
+  %V = mul i64 %N, 8
+  %t = getelementptr i8* %cast, i64 %V
+  %x = load i8* %t
+  ret i8 %x
+}
+
+define i64 @test_gep_bitcast_array_same_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i64 addrspace(1)* %cast, i16 %V
+  %x = load i64 addrspace(1)* %t
+  ret i64 %x
+}
+
+define i8 @test_gep_bitcast_array_different_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+define i64 @test40() {
+  %array = alloca [3 x i32], align 4
+  %gep = getelementptr inbounds [3 x i32]* %array, i64 0, i64 2
+  %gepi8 = bitcast i32* %gep to i8*
+  %p = ptrtoint [3 x i32]* %array to i64
+  %np = sub i64 0, %p
+  %gep2 = getelementptr i8* %gepi8, i64 %np
+  %ret = ptrtoint i8* %gep2 to i64
+  ret i64 %ret
+
+; CHECK-LABEL: @test40
+; CHECK-NEXT: ret i64 8
+}
+
+define i16 @test41([3 x i32] addrspace(1)* %array) {
+  %gep = getelementptr inbounds [3 x i32] addrspace(1)* %array, i16 0, i16 2
+  %gepi8 = bitcast i32 addrspace(1)* %gep to i8 addrspace(1)*
+  %p = ptrtoint [3 x i32] addrspace(1)* %array to i16
+  %np = sub i16 0, %p
+  %gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np
+  %ret = ptrtoint i8 addrspace(1)* %gep2 to i16
+  ret i16 %ret
+
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i16 8
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
new file mode 100644
index 0000000..d5d8cbc
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+define i1 @masked_and_notallzeroes(i32 %A) {
+; CHECK-LABEL: @masked_and_notallzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 0
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notallones(i32 %A) {
+; CHECK-LABEL: @masked_and_notallones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 39
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allones(i32 %A) {
+; CHECK-LABEL: @masked_or_allones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 39
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA(i32 %A) {
+; CHECK-LABEL: @masked_and_notA
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp ne i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, %A
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A(i32 %A) {
+; CHECK-LABEL: @masked_or_A
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, %A
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised
+; CHECK: [[MASK:%.*]] = and i32 %A, 15
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 15
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @nomask_lhs(i32 %in) {
+; CHECK-LABEL: @nomask_lhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %tst1 = icmp eq i32 %in, 0
+
+  %masked = and i32 %in, 1
+  %tst2 = icmp eq i32 %masked, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+
+define i1 @nomask_rhs(i32 %in) {
+; CHECK-LABEL: @nomask_rhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %masked = and i32 %in, 1
+  %tst1 = icmp eq i32 %masked, 0
+
+  %tst2 = icmp eq i32 %in, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index dfeac67..12a4744 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout =
-"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
@@ -79,7 +79,7 @@ entry:
 
 define i1 @test8(i32 %x){
 entry:
-  %a = add i32 %x, -1 
+  %a = add i32 %x, -1
   %b = icmp eq i32 %a, %x
   ret i1 %b
 ; CHECK-LABEL: @test8(
@@ -89,7 +89,7 @@ entry:
 define i1 @test9(i32 %x)  {
 entry:
   %a = add i32 %x, -2
-  %b = icmp ugt i32 %x, %a 
+  %b = icmp ugt i32 %x, %a
   ret i1 %b
 ; CHECK-LABEL: @test9(
 ; CHECK: icmp ugt i32 %x, 1
@@ -98,10 +98,9 @@ entry:
 
 define i1 @test10(i32 %x){
 entry:
-  %a = add i32 %x, -1      
-  %b = icmp slt i32 %a, %x 
+  %a = add i32 %x, -1
+  %b = icmp slt i32 %a, %x
   ret i1 %b
-  
 ; CHECK-LABEL: @test10(
 ; CHECK: %b = icmp ne i32 %x, -2147483648
 ; CHECK: ret i1 %b
@@ -234,6 +233,18 @@ define i1 @test24(i64 %i) {
   ret i1 %cmp
 }
 
+@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
+
+; CHECK: @test24_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: %cmp = icmp eq i16 %1, 1000
+; CHECK: ret i1 %cmp
+define i1 @test24_as1(i64 %i) {
+  %p1 = getelementptr inbounds i32 addrspace(1)* getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
+  ret i1 %cmp
+}
+
 ; CHECK-LABEL: @test25(
 ; X + Z > Y + Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %x, %y
@@ -473,7 +484,7 @@ define <2 x i1> @test49(<2 x i32> %tmp3) {
 entry:
   %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
   %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
-  ret <2 x i1> %cmp  
+  ret <2 x i1> %cmp
 }
 
 ; PR9343 #7
@@ -512,12 +523,12 @@ define i1 @test52(i32 %x1) nounwind {
 
 ; PR9838
 ; CHECK-LABEL: @test53(
-; CHECK-NEXT: ashr exact
-; CHECK-NEXT: ashr
+; CHECK-NEXT: sdiv exact
+; CHECK-NEXT: sdiv
 ; CHECK-NEXT: icmp
 define i1 @test53(i32 %a, i32 %b) nounwind {
- %x = ashr exact i32 %a, 30
- %y = ashr i32 %b, 30
+ %x = sdiv exact i32 %a, 30
+ %y = sdiv i32 %b, 30
  %z = icmp eq i32 %x, %y
  ret i1 %z
 }
@@ -603,6 +614,21 @@ define i1 @test59(i8* %foo) {
 ; CHECK: ret i1 true
 }
 
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59_as1
+; CHECK: %[[GEP:.+]] = getelementptr inbounds i8 addrspace(1)* %foo, i16 8
+; CHECK: ptrtoint i8 addrspace(1)* %[[GEP]] to i16
+; CHECK: ret i1 true
+}
+
 define i1 @test60(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr inbounds i32* %bit, i64 %i
@@ -616,6 +642,21 @@ define i1 @test60(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: trunc i64 %j to i16
+; CHECK: %gep1.idx = shl nuw i16 %{{.+}}, 2
+; CHECK-NEXT: icmp sgt i16 %{{.+}}, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
@@ -629,6 +670,19 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61_as1
+; CHECK: icmp ult i8 addrspace(1)* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test62(i8* %a) {
   %arrayidx1 = getelementptr inbounds i8* %a, i64 1
   %arrayidx2 = getelementptr inbounds i8* %a, i64 10
@@ -638,6 +692,15 @@ define i1 @test62(i8* %a) {
 ; CHECK-NEXT: ret i1 true
 }
 
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT: ret i1 true
+  %arrayidx1 = getelementptr inbounds i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
 define i1 @test63(i8 %a, i32 %b) nounwind {
   %z = zext i8 %a to i32
   %t = and i32 %b, 255
@@ -999,6 +1062,15 @@ define i1 @test71(i8* %x) {
   ret i1 %c
 }
 
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT: ret i1 false
+  %a = getelementptr i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
 ; CHECK-LABEL: @icmp_shl_1_V_ult_32(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1199,3 +1271,88 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
   %cmp = icmp uge i32 %sub, 4
   ret i1 %cmp
 }
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %sub = sub i32 %X, %Y
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %shift = lshr i32 %sub, 4
+  %resfalse = trunc i32 %shift to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse2
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %sub1 = sub i32 %X, %Y
+  %add = add i32 %sub, %sub1
+  %restrue = trunc i32 %add to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_lshr_lshr_eq
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ult i32 %z.unshifted, 1073741824
+define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) nounwind {
+ %x = lshr i32 %a, 30
+ %y = lshr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
+}
+
+; CHECK-LABEL: @icmp_ashr_ashr_ne
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ugt i32 %z.unshifted, 255
+define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
+ %x = ashr i32 %a, 8
+ %y = ashr i32 %b, 8
+ %z = icmp ne i32 %x, %y
+ ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/InstCombine/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 95dc48c..9810026 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,18 +1,75 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck -check-prefix=NODL %s
+; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck -check-prefix=P32 %s
 
-@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
 @GD = internal constant [6 x double]
    [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
+%Foo = type { i32, i32, i32, i32 }
+
+@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
+
+@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
+                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
+                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
+                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+
+
 define i1 @test1(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test1(
+; NODL-NEXT: %R = icmp eq i32 %X, 9
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test1(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+
+; P32-LABEL: @test1_noinbounds(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_i64(i64 %X) {
+  %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds_i64(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+
+; P32-LABEL: @test1_noinbounds_i64(
+; P32: %R = icmp eq i32 %1, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_as1(i32 %x) {
+  %p = getelementptr [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
+  %q = load i16 addrspace(1)* %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+; P32-LABEL: @test1_noinbounds_as1(
+; P32-NEXT: trunc i32 %x to i16
+; P32-NEXT: %r = icmp eq i16 %1, 9
+; P32-NEXT: ret i1 %r
 }
 
 define i1 @test2(i32 %X) {
@@ -20,9 +77,9 @@ define i1 @test2(i32 %X) {
   %Q = load i16* %P
   %R = icmp slt i16 %Q, 85
   ret i1 %R
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: %R = icmp ne i32 %X, 4
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test2(
+; NODL-NEXT: %R = icmp ne i32 %X, 4
+; NODL-NEXT: ret i1 %R
 }
 
 define i1 @test3(i32 %X) {
@@ -30,9 +87,14 @@ define i1 @test3(i32 %X) {
   %Q = load double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %R = icmp eq i32 %X, 1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test3(
+; NODL-NEXT: %R = icmp eq i32 %X, 1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test3(
+; P32-NEXT: %R = icmp eq i32 %X, 1
+; P32-NEXT: ret i1 %R
+
 }
 
 define i1 @test4(i32 %X) {
@@ -40,11 +102,17 @@ define i1 @test4(i32 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: lshr i32 933, %X
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test4(
+; NODL-NEXT: lshr i32 933, %X
+; NODL-NEXT: and i32 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i32 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4(
+; P32-NEXT: lshr i32 933, %X
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test4_i16(i16 %X) {
@@ -52,11 +120,19 @@ define i1 @test4_i16(i16 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4_i16(
-; CHECK-NEXT: lshr i16 933, %X
-; CHECK-NEXT: and i16 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+
+; NODL-LABEL: @test4_i16(
+; NODL-NEXT: lshr i16 933, %X
+; NODL-NEXT: and i16 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i16 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4_i16(
+; P32-NEXT: sext i16 %X to i32
+; P32-NEXT: lshr i32 933, %1
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test5(i32 %X) {
@@ -64,11 +140,17 @@ define i1 @test5(i32 %X) {
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 69
   ret i1 %R
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: icmp eq i32 %X, 2
-; CHECK-NEXT: icmp eq i32 %X, 7
-; CHECK-NEXT: %R = or i1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test5(
+; NODL-NEXT: icmp eq i32 %X, 2
+; NODL-NEXT: icmp eq i32 %X, 7
+; NODL-NEXT: %R = or i1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test5(
+; P32-NEXT: icmp eq i32 %X, 2
+; P32-NEXT: icmp eq i32 %X, 7
+; P32-NEXT: %R = or i1
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test6(i32 %X) {
@@ -76,10 +158,15 @@ define i1 @test6(i32 %X) {
   %Q = load double* %P
   %R = fcmp ogt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test6(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ult i32 {{.*}}, 3
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test6(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ult i32 {{.*}}, 3
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test7(i32 %X) {
@@ -87,10 +174,15 @@ define i1 @test7(i32 %X) {
   %Q = load double* %P
   %R = fcmp olt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test7(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test7(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test8(i32 %X) {
@@ -99,10 +191,15 @@ define i1 @test8(i32 %X) {
   %R = and i16 %Q, 3
   %S = icmp eq i16 %R, 0
   ret i1 %S
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test8(
+; NODL-NEXT: and i32 %X, -2
+; NODL-NEXT: icmp eq i32 {{.*}}, 8
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test8(
+; P32-NEXT: and i32 %X, -2
+; P32-NEXT: icmp eq i32 {{.*}}, 8
+; P32-NEXT: ret i1
 }
 
 @GA = internal constant [4 x { i32, i32 } ] [
@@ -117,8 +214,161 @@ define i1 @test9(i32 %X) {
   %Q = load i32* %P
   %R = icmp eq i32 %Q, 1
   ret i1 %R
-; CHECK-LABEL: @test9(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test9(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: icmp ult i32 {{.*}}, 2
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test9(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: icmp ult i32 {{.*}}, 2
+; P32-NEXT: ret i1
+}
+
+define i1 @test10_struct(i32 %x) {
+; NODL-LABEL: @test10_struct(
+; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct(
+; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_noinbounds(
+; NODL: getelementptr %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds(
+; P32: getelementptr %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index < ptr size
+define i1 @test10_struct_i16(i16 %x){
+; NODL-LABEL: @test10_struct_i16(
+; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index > ptr size
+define i1 @test10_struct_i64(i64 %x){
+; NODL-LABEL: @test10_struct_i64(
+; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+
+; P32-LABEL: @test10_struct_i64(
+; P32: %1 = trunc i64 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_noinbounds_i16(
+; NODL: getelementptr %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr(i32 %x) {
+; NODL-LABEL: @test10_struct_arr(
+; NODL-NEXT: %r = icmp ne i32 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr(
+; P32-NEXT: %r = icmp ne i32 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds(
+; NODL-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds(
+; P32-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_i16(
+; NODL-NEXT: %r = icmp ne i16 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i64(i64 %x) {
+; NODL-LABEL: @test10_struct_arr_i64(
+; NODL-NEXT: %r = icmp ne i64 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i64(
+; P32-NEXT: trunc i64 %x to i32
+; P32-NEXT: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds_i16(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
+; FIXME: Should be no trunc?
+; NODL-LABEL: @test10_struct_arr_noinbounds_i64(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i64(
+; P32: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
 }
diff --git a/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
new file mode 100644
index 0000000..2d88bed
--- /dev/null
+++ b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -0,0 +1,112 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+
+define i32 @test_as0(i32 addrspace(0)* %a) {
+; CHECK-LABEL: @test_as0(
+; CHECK: %arrayidx = getelementptr i32* %a, i32 1
+  %arrayidx = getelementptr i32 addrspace(0)* %a, i64 1
+  %y = load i32 addrspace(0)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %a) {
+; CHECK-LABEL: @test_as1(
+; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %a, i64 1
+  %arrayidx = getelementptr i32 addrspace(1)* %a, i32 1
+  %y = load i32 addrspace(1)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_as2(
+; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %a, i8 1
+  %arrayidx = getelementptr i32 addrspace(2)* %a, i32 1
+  %y = load i32 addrspace(2)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %a) {
+; CHECK-LABEL: @test_as3(
+; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %a, i16 1
+  %arrayidx = getelementptr i32 addrspace(3)* %a, i32 1
+  %y = load i32 addrspace(3)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_combine_ptrtoint(
+; CHECK-NEXT: %y = load i32 addrspace(2)* %a
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint i32 addrspace(2)* %a to i8
+  %castback = inttoptr i8 %cast to i32 addrspace(2)*
+  %y = load i32 addrspace(2)* %castback, align 4
+  ret i32 %y
+}
+
+define i8 @test_combine_inttoptr(i8 %a) {
+; CHECK-LABEL: @test_combine_inttoptr(
+; CHECK-NEXT: ret i8 %a
+  %cast = inttoptr i8 %a to i32 addrspace(2)*
+  %castback = ptrtoint i32 addrspace(2)* %cast to i8
+  ret i8 %castback
+}
+
+define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
+; CHECK-LABEL: @test_combine_vector_ptrtoint(
+; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
+; CHECK-NEXT: %y = load i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
+  %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
+  %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
+  %y = load i32 addrspace(2)* %p, align 4
+  ret i32 %y
+}
+
+define <2 x i8> @test_combine_vector_inttoptr(<2 x i8> %a) {
+; CHECK-LABEL: @test_combine_vector_inttoptr(
+; CHECK-NEXT: ret <2 x i8> %a
+  %cast = inttoptr <2 x i8> %a to <2 x i32 addrspace(2)*>
+  %castback = ptrtoint <2 x i32 addrspace(2)*> %cast to <2 x i8>
+  ret <2 x i8> %castback
+}
+
+; Check that the GEP index is changed to the address space integer type (i64 -> i8)
+define i32 addrspace(2)* @shrink_gep_constant_index_64_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_constant_index_32_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_32_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i32 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(3)* @shrink_gep_constant_index_64_as3(i32 addrspace(3)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as3(
+; CHECK-NEXT: getelementptr i32 addrspace(3)* %p, i16 1
+  %ret = getelementptr i32 addrspace(3)* %p, i64 1
+  ret i32 addrspace(3)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_variable_index_64_as2(i32 addrspace(2)* %p, i64 %idx) {
+; CHECK-LABEL: @shrink_gep_variable_index_64_as2(
+; CHECK-NEXT: %1 = trunc i64 %idx to i8
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 %1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 %idx
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(1)* @grow_gep_variable_index_8_as1(i32 addrspace(1)* %p, i8 %idx) {
+; CHECK-LABEL: @grow_gep_variable_index_8_as1(
+; CHECK-NEXT: %1 = sext i8 %idx to i64
+; CHECK-NEXT: getelementptr i32 addrspace(1)* %p, i64 %1
+  %ret = getelementptr i32 addrspace(1)* %p, i8 %idx
+  ret i32 addrspace(1)* %ret
+}
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
new file mode 100644
index 0000000..9cb6884
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+declare i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+
+@array_as2 = private addrspace(2) global [60 x i8] zeroinitializer, align 4
+
+@array_as1_pointers = private global [10 x i32 addrspace(1)*] zeroinitializer, align 4
+@array_as2_pointers = private global [24 x i32 addrspace(2)*] zeroinitializer, align 4
+@array_as3_pointers = private global [42 x i32 addrspace(3)*] zeroinitializer, align 4
+
+@array_as2_as1_pointer_pointers = private global [16 x i32 addrspace(2)* addrspace(1)*] zeroinitializer, align 4
+
+
+@a_as3 = private addrspace(3) global [60 x i8] zeroinitializer, align 1
+
+define i32 @foo_as3() nounwind {
+; CHECK-LABEL: @foo_as3(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i16 @foo_as3_i16() nounwind {
+; CHECK-LABEL: @foo_as3_i16(
+; CHECK-NEXT: ret i16 60
+  %1 = call i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i16 %1
+}
+
+@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+define i32 @foo_alias() nounwind {
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i32 @array_as2_size() {
+; CHECK-LABEL: @array_as2_size(
+; CHECK-NEXT: ret i32 60
+  %bc = bitcast [60 x i8] addrspace(2)* @array_as2 to i8 addrspace(2)*
+  %1 = call i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as1() {
+; CHECK-LABEL: @pointer_array_as1(
+; CHECK-NEXT: ret i32 80
+  %bc = addrspacecast [10 x i32 addrspace(1)*]* @array_as1_pointers to i8 addrspace(1)*
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as2() {
+; CHECK-LABEL: @pointer_array_as2(
+; CHECK-NEXT: ret i32 24
+  %bc = bitcast [24 x i32 addrspace(2)*]* @array_as2_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as3() {
+; CHECK-LABEL: @pointer_array_as3(
+; CHECK-NEXT: ret i32 84
+  %bc = bitcast [42 x i32 addrspace(3)*]* @array_as3_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_pointer_array_as2_as1() {
+; CHECK-LABEL: @pointer_pointer_array_as2_as1(
+; CHECK-NEXT: ret i32 128
+  %bc = bitcast [16 x i32 addrspace(2)* addrspace(1)*]* @array_as2_as1_pointer_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index b5351e9..6459032 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -5,11 +5,10 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
 @.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
-
 define i32 @foo() nounwind {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i32 60
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -17,7 +16,7 @@ define i8* @bar() nounwind {
 ; CHECK-LABEL: @bar(
 entry:
   %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   %cmp = icmp ne i32 %0, -1
 ; CHECK: br i1 true
   br i1 %cmp, label %cond.true, label %cond.false
@@ -34,7 +33,7 @@ cond.false:
 define i32 @f() nounwind {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT: ret i32 0
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -43,16 +42,16 @@ define i32 @f() nounwind {
 define i1 @baz() nounwind {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT: objectsize
-  %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
 
 define void @test1(i8* %q, i32 %x) nounwind noinline {
 ; CHECK-LABEL: @test1(
-; CHECK: objectsize.i32
+; CHECK: objectsize.i32.p0i8
 entry:
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -68,7 +67,7 @@ entry:
 define i32 @test2() nounwind {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 34
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
   ret i32 %1
 }
 
@@ -77,7 +76,7 @@ define i32 @test2() nounwind {
 
 declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
 
@@ -89,7 +88,7 @@ entry:
 bb11:
   %0 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
   %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) ; <i32> [#uses=1]
   %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
 ; CHECK: unreachable
   unreachable
@@ -111,7 +110,7 @@ define i32 @test4(i8** %esc) nounwind ssp {
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
@@ -126,7 +125,7 @@ define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
@@ -138,7 +137,7 @@ define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
@@ -155,7 +154,7 @@ define i32 @test7(i8** %esc) {
   %alloc = call noalias i8* @malloc(i32 48) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 32
   ret i32 %objsize
 }
@@ -167,7 +166,7 @@ define i32 @test8(i8** %esc) {
   %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
@@ -179,7 +178,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 define i32 @test9(i8** %esc) {
   %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -188,7 +187,7 @@ define i32 @test9(i8** %esc) {
 define i32 @test10(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 4
   ret i32 %1
 }
@@ -197,7 +196,7 @@ define i32 @test10(i8** %esc) {
 define i32 @test11(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -206,7 +205,7 @@ define i32 @test11(i8** %esc) {
 define i32 @test12(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -215,7 +214,7 @@ define i32 @test12(i8** %esc) {
 define i32 @test13(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -229,8 +228,8 @@ entry:
 xpto:
   %select = select i1 %bool, i8* %select, i8* %a
   %select2 = select i1 %bool, i8* %a, i8* %select2
-  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
   %2 = add i32 %0, %1
 ; CHECK: ret i32 undef
   ret i32 %2
@@ -249,7 +248,7 @@ entry:
 xpto:
   %gep2 = getelementptr i8* %gep, i32 1
   %gep = getelementptr i8* %gep2, i32 1
-  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
 ; CHECK: ret i32 undef
   ret i32 %o
 
@@ -263,7 +262,7 @@ return:
 ; CHECK-NEXT: ret i32 60
 define i32 @test18() {
   %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
 
@@ -273,6 +272,7 @@ define i32 @test18() {
 ; CHECK: llvm.objectsize
 define i32 @test19() {
   %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
+
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
new file mode 100644
index 0000000..51f955c
--- /dev/null
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;CHECK: @and_consts
+;CHECK: and i32 %k, 12
+;CHECK: icmp ne i32 %0, 12
+;CHECK: ret
+define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+;CHECK: @foo1_and
+;CHECK:  shl i32 1, %c1
+;CHECK-NEXT:  shl i32 1, %c2
+;CHECK-NEXT:  or i32
+;CHECK-NEXT:  and i32
+;CHECK-NEXT:  icmp ne i32 %1, %0
+;CHECK: ret
+define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
diff --git a/test/Transforms/InstCombine/phi-select-constexpr.ll b/test/Transforms/InstCombine/phi-select-constexpr.ll
new file mode 100644
index 0000000..054e069
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-select-constexpr.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+; CHECK-LABEL: final:
+; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index 0fdafeb..9f1d073 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -151,4 +151,17 @@ define double @test_simplify16(double %x) {
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
 
+declare double @llvm.pow.f64(double %Val, double %Power)
+define double @test_simplify17(double %x) {
+; CHECK-LABEL: @test_simplify17(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
 ; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/pow-3.ll b/test/Transforms/InstCombine/pow-3.ll
new file mode 100644
index 0000000..1c5cf91
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-3.ll
@@ -0,0 +1,12 @@
+; Test that the pow won't get simplified to sqrt(fabs) when they are not available.
+;
+; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s
+
+declare double @llvm.pow.f64(double %Val, double %Power)
+
+define double @test_simplify_unavailable(double %x) {
+; CHECK-LABEL: @test_simplify_unavailable(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: call double @llvm.pow.f64(double %x, double 5.000000e-01)
+  ret double %retval
+}
diff --git a/test/Transforms/InstCombine/pr17827.ll b/test/Transforms/InstCombine/pr17827.ll
new file mode 100644
index 0000000..a8b5926
--- /dev/null
+++ b/test/Transforms/InstCombine/pr17827.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
+entry:
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK:  %andp = shl i8 %p, 5
+; CHECK-NEXT: %shl = and i8 %andp, -64
+; CHECK-NEXT:  %cmp = icmp slt i8 %shl, 32
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
+entry:
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK: icmp eq i8 %andp, 0
+define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
+entry:
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK: ret i1 true
+define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
+entry:
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 59d0f16..c98ddd5 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the printf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index a76addc..5b9deb4 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -instcombine -S | grep select | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: select
+; CHECK: select
 
 ; Make sure instcombine don't fold select into operands. We don't want to emit
 ; select of two integers unless it's selecting 0 / 1.
diff --git a/test/Transforms/InstCombine/select-extractelement.ll b/test/Transforms/InstCombine/select-extractelement.ll
new file mode 100644
index 0000000..e7ea851
--- /dev/null
+++ b/test/Transforms/InstCombine/select-extractelement.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @v4float_user(<4 x float>) #0
+
+
+
+define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  ret float %extract
+}
+
+; Multiple extractelements
+define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_two_select(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; Select has an extra non-extractelement user, don't change it
+define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select_user(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect_user(
+; CHECK: select <4 x i1> {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+; Extract from a vector select
+define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect(
+; CHECK-NOT: select <4 x i1>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %select, i32 0
+  ret float %extract
+}
+
+; Multiple extractelements from a vector select
+define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_two_vselect(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; All the vector selects should be decomposed into scalar selects
+; Test multiple extractelements
+define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_vector_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+entry:
+  %0 = extractelement <4 x i32> %c, i32 0
+  %tobool = icmp ne i32 %0, 0
+  %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
+  %1 = extractelement <4 x float> %a.sink, i32 0
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = extractelement <4 x i32> %c, i32 1
+  %tobool1 = icmp ne i32 %3, 0
+  %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
+  %4 = extractelement <4 x float> %a.sink1, i32 1
+  %5 = insertelement <4 x float> %2, float %4, i32 1
+  %6 = extractelement <4 x i32> %c, i32 2
+  %tobool6 = icmp ne i32 %6, 0
+  %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
+  %7 = extractelement <4 x float> %a.sink2, i32 2
+  %8 = insertelement <4 x float> %5, float %7, i32 2
+  %9 = extractelement <4 x i32> %c, i32 3
+  %tobool11 = icmp ne i32 %9, 0
+  %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
+  %10 = extractelement <4 x float> %a.sink3, i32 3
+  %11 = insertelement <4 x float> %8, float %10, i32 3
+  ret <4 x float> %11
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c7809f7..1458bde 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -986,6 +986,16 @@ define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
   ret i32 %select
 }
 
+; We can't combine here, because the cmp is scalar and the or vector.
+; Just make sure we don't assert.
+define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select i1 %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 0bdab13..b1082f0 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -744,3 +744,39 @@ define i32 @test62(i32 %x) {
 ; CHECK-LABEL: @test62(
 ; CHECK: ashr exact i32 %x, 3
 }
+
+; PR17026
+; CHECK-LABEL: @test63(
+; CHECK-NOT: sh
+; CHECK: ret
+define void @test63(i128 %arg) {
+bb:
+  br i1 undef, label %bb1, label %bb12
+
+bb1:                                              ; preds = %bb11, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb1
+  br i1 undef, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = lshr i128 %arg, 36893488147419103232
+  %tmp4 = shl i128 %tmp, 0
+  %tmp5 = or i128 %tmp4, undef
+  %tmp6 = trunc i128 %tmp5 to i16
+  br label %bb8
+
+bb7:                                              ; preds = %bb2
+  br i1 undef, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb7, %bb3
+  %tmp9 = phi i16 [ %tmp6, %bb3 ], [ undef, %bb7 ]
+  %tmp10 = icmp eq i16 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb8
+  br i1 undef, label %bb1, label %bb12
+
+bb12:                                             ; preds = %bb11, %bb8, %bb
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
new file mode 100644
index 0000000..0d1a602
--- /dev/null
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -0,0 +1,91 @@
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+
+
+attributes #0 = { readnone nounwind }
+
+declare float @__sinpif(float %x) #0
+declare float @__cospif(float %x) #0 
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0 
+
+@var32 = global float 0.0
+@var64 = global double 0.0
+
+define float @test_instbased_f32() {
+       %val = load float* @var32
+       %sin = call float @__sinpif(float %val) #0
+       %cos = call float @__cospif(float %val) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define float @test_constant_f32() {
+       %sin = call float @__sinpif(float 1.0) #0
+       %cos = call float @__cospif(float 1.0) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define double @test_instbased_f64() {
+       %val = load double* @var64
+       %sin = call double @__sinpi(double %val) #0
+       %cos = call double @__cospi(double %val) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_constant_f64() {
+       %sin = call double @__sinpi(double 1.0) #0
+       %cos = call double @__cospi(double 1.0) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index 6d0ab13..78dd7aa 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the sprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 9b666b9..b64c800 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,7 +113,8 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index a6d5585..8a02529 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -61,7 +61,7 @@ define i8* @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
 ; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   ret i8* %ret
@@ -75,7 +75,7 @@ define i8* @test_simplify6() {
 
 ; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
 ; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -93,4 +93,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
index 5efab9e..d2c9894 100644
--- a/test/Transforms/InstCombine/strchr-1.ll
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -52,3 +52,14 @@ define void @test_simplify4(i32 %chr) {
   store i8* %dst, i8** @chp
   ret void
 }
+
+define void @test_simplify5() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 5b98cf8..8e7fec7 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -61,7 +61,7 @@ define void @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret void
 }
@@ -73,7 +73,7 @@ define i8* @test_simplify6() {
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
 ; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -91,4 +91,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
index a0bdb22..4615f5f 100644
--- a/test/Transforms/InstCombine/strrchr-1.ll
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -42,6 +42,17 @@ define void @test_simplify3() {
   ret void
 }
 
+define void @test_simplify4() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
 define void @test_nosimplify1(i32 %chr) {
 ; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i8* @strrchr
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index d7a26fa..c80e31a 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -35,10 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !"float", metadata !1}
+; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
+; CHECK: !1 = metadata !{metadata !"float", metadata !2}
 
 !0 = metadata !{metadata !"Simple C/C++ TBAA"}
 !1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !5, metadata !5, i64 0}
 !3 = metadata !{i64 0, i64 4, metadata !2}
 !4 = metadata !{i64 0, i64 8, null}
+!5 = metadata !{metadata !"float", metadata !0}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 5449656..36c523b 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,34 +1,34 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 ; Optimize subtracts.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-	%B = sub i32 %A, %A	
+	%B = sub i32 %A, %A
 	ret i32 %B
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
 define i32 @test2(i32 %A) {
-	%B = sub i32 %A, 0	
+	%B = sub i32 %A, 0
 	ret i32 %B
 ; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test3(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sub i32 0, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test4(i32 %A, i32 %x) {
-	%B = sub i32 0, %A	
-	%C = sub i32 %x, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 %x, %B
 	ret i32 %C
 ; CHECK-LABEL: @test4(
 ; CHECK: %C = add i32 %x, %A
@@ -36,8 +36,8 @@ define i32 @test4(i32 %A, i32 %x) {
 }
 
 define i32 @test5(i32 %A, i32 %B, i32 %C) {
-	%D = sub i32 %B, %C	
-	%E = sub i32 %A, %D	
+	%D = sub i32 %B, %C
+	%E = sub i32 %A, %D
 	ret i32 %E
 ; CHECK-LABEL: @test5(
 ; CHECK: %D1 = sub i32 %C, %B
@@ -46,17 +46,17 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
 }
 
 define i32 @test6(i32 %A, i32 %B) {
-	%C = and i32 %A, %B	
-	%D = sub i32 %A, %C	
+	%C = and i32 %A, %B
+	%D = sub i32 %A, %C
 	ret i32 %D
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: xor i32 %B, -1
-; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: %D = and i32
 ; CHECK-NEXT: ret i32 %D
 }
 
 define i32 @test7(i32 %A) {
-	%B = sub i32 -1, %A	
+	%B = sub i32 -1, %A
 	ret i32 %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %B = xor i32 %A, -1
@@ -64,8 +64,8 @@ define i32 @test7(i32 %A) {
 }
 
 define i32 @test8(i32 %A) {
-	%B = mul i32 9, %A	
-	%C = sub i32 %B, %A	
+	%B = mul i32 9, %A
+	%C = sub i32 %B, %A
 	ret i32 %C
 ; CHECK-LABEL: @test8(
 ; CHECK: %C = shl i32 %A, 3
@@ -73,8 +73,8 @@ define i32 @test8(i32 %A) {
 }
 
 define i32 @test9(i32 %A) {
-	%B = mul i32 3, %A	
-	%C = sub i32 %A, %B	
+	%B = mul i32 3, %A
+	%C = sub i32 %A, %B
 	ret i32 %C
 ; CHECK-LABEL: @test9(
 ; CHECK: %C = mul i32 %A, -2
@@ -82,9 +82,9 @@ define i32 @test9(i32 %A) {
 }
 
 define i32 @test10(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = sub i32 0, %B	
-	%E = mul i32 %C, %D	
+	%C = sub i32 0, %A
+	%D = sub i32 0, %B
+	%E = mul i32 %C, %D
 	ret i32 %E
 ; CHECK-LABEL: @test10(
 ; CHECK: %E = mul i32 %A, %B
@@ -92,8 +92,8 @@ define i32 @test10(i32 %A, i32 %B) {
 }
 
 define i32 @test10a(i32 %A) {
-	%C = sub i32 0, %A	
-	%E = mul i32 %C, 7	
+	%C = sub i32 0, %A
+	%E = mul i32 %C, 7
 	ret i32 %E
 ; CHECK-LABEL: @test10a(
 ; CHECK: %E = mul i32 %A, -7
@@ -101,8 +101,8 @@ define i32 @test10a(i32 %A) {
 }
 
 define i1 @test11(i8 %A, i8 %B) {
-	%C = sub i8 %A, %B	
-	%cD = icmp ne i8 %C, 0	
+	%C = sub i8 %A, %B
+	%cD = icmp ne i8 %C, 0
 	ret i1 %cD
 ; CHECK-LABEL: @test11(
 ; CHECK: %cD = icmp ne i8 %A, %B
@@ -110,8 +110,8 @@ define i1 @test11(i8 %A, i8 %B) {
 }
 
 define i32 @test12(i32 %A) {
-	%B = ashr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = ashr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test12(
 ; CHECK: %C = lshr i32 %A, 31
@@ -119,8 +119,8 @@ define i32 @test12(i32 %A) {
 }
 
 define i32 @test13(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = lshr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test13(
 ; CHECK: %C = ashr i32 %A, 31
@@ -128,9 +128,9 @@ define i32 @test13(i32 %A) {
 }
 
 define i32 @test14(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = bitcast i32 %B to i32	
-	%D = sub i32 0, %C	
+	%B = lshr i32 %A, 31
+	%C = bitcast i32 %B to i32
+	%D = sub i32 0, %C
 	ret i32 %D
 ; CHECK-LABEL: @test14(
 ; CHECK: %D = ashr i32 %A, 31
@@ -138,17 +138,17 @@ define i32 @test14(i32 %A) {
 }
 
 define i32 @test15(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = srem i32 %B, %C	
+	%C = sub i32 0, %A
+	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A 
+; CHECK: %D = srem i32 %B, %A
 ; CHECK: ret i32 %D
 }
 
 define i32 @test16(i32 %A) {
-	%X = sdiv i32 %A, 1123	
-	%Y = sub i32 0, %X	
+	%X = sdiv i32 %A, 1123
+	%Y = sub i32 0, %X
 	ret i32 %Y
 ; CHECK-LABEL: @test16(
 ; CHECK: %Y = sdiv i32 %A, -1123
@@ -158,8 +158,8 @@ define i32 @test16(i32 %A) {
 ; Can't fold subtract here because negation it might oveflow.
 ; PR3142
 define i32 @test17(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sdiv i32 %B, 1234	
+	%B = sub i32 0, %A
+	%C = sdiv i32 %B, 1234
 	ret i32 %C
 ; CHECK-LABEL: @test17(
 ; CHECK: %B = sub i32 0, %A
@@ -168,25 +168,25 @@ define i32 @test17(i32 %A) {
 }
 
 define i64 @test18(i64 %Y) {
-	%tmp.4 = shl i64 %Y, 2	
-	%tmp.12 = shl i64 %Y, 2	
-	%tmp.8 = sub i64 %tmp.4, %tmp.12	
+	%tmp.4 = shl i64 %Y, 2
+	%tmp.12 = shl i64 %Y, 2
+	%tmp.8 = sub i64 %tmp.4, %tmp.12
 	ret i64 %tmp.8
 ; CHECK-LABEL: @test18(
 ; CHECK: ret i64 0
 }
 
 define i32 @test19(i32 %X, i32 %Y) {
-	%Z = sub i32 %X, %Y	
-	%Q = add i32 %Z, %Y	
+	%Z = sub i32 %X, %Y
+	%Q = add i32 %Z, %Y
 	ret i32 %Q
 ; CHECK-LABEL: @test19(
 ; CHECK: ret i32 %X
 }
 
 define i1 @test20(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g	
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
 	ret i1 %tmp.4
 ; CHECK-LABEL: @test20(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -194,8 +194,8 @@ define i1 @test20(i32 %g, i32 %h) {
 }
 
 define i1 @test21(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g		
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
         ret i1 %tmp.4
 ; CHECK-LABEL: @test21(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -204,9 +204,9 @@ define i1 @test21(i32 %g, i32 %h) {
 
 ; PR2298
 define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
-	%tmp2 = sub i32 0, %a	
-	%tmp4 = sub i32 0, %b	
-	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	%tmp2 = sub i32 0, %a
+	%tmp4 = sub i32 0, %b
+	%tmp5 = icmp eq i32 %tmp2, %tmp4
 	ret i1 %tmp5
 ; CHECK-LABEL: @test22(
 ; CHECK: %tmp5 = icmp eq i32 %b, %a
@@ -227,6 +227,19 @@ define i32 @test23(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i32
 }
 
+define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test23_as1
+; CHECK-NEXT: = trunc i16 %A to i8
+; CHECK-NEXT: ret i8
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %D = trunc i16 %C to i8
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %F = trunc i16 %E to i8
+  %G = sub i8 %D, %F
+  ret i8 %G
+}
+
 define i64 @test24(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -237,6 +250,16 @@ define i64 @test24(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i64 %A
 }
 
+define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24_as1
+; CHECK-NEXT: ret i16 %A
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %C, %E
+  ret i16 %G
+}
+
 define i64 @test24a(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -245,9 +268,21 @@ define i64 @test24a(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24a(
 ; CHECK-NEXT: sub i64 0, %A
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
+define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24a_as1
+; CHECK-NEXT: sub i16 0, %A
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %E, %C
+  ret i16 %G
+}
+
+
 @Arr = external global [42 x i16]
 
 define i64 @test24b(i8* %P, i64 %A){
@@ -257,7 +292,7 @@ define i64 @test24b(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24b(
 ; CHECK-NEXT: shl nuw i64 %A, 1
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
 
@@ -269,7 +304,21 @@ define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: add i64 {{.*}}, -84
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
+}
+
+@Arr_as1 = external addrspace(1) global [42 x i16]
+
+define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
+; CHECK: @test25_as1
+; CHECK-NEXT: %1 = trunc i64 %A to i16
+; CHECK-NEXT: shl nuw i16 %1, 1
+; CHECK-NEXT: add i16 {{.*}}, -84
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
+  %C = ptrtoint i16 addrspace(1)* %B to i16
+  %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16)
+  ret i16 %G
 }
 
 define i32 @test26(i32 %x) {
@@ -327,3 +376,19 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: sub i64 %gep1.idx, %j
 ; CHECK-NEXT: ret i64
 }
+
+define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test30_as1(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: sub i16 %gep1.idx, %j
+; CHECK-NEXT: ret i16
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16
+  %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16
+  %sub = sub i16 %cast1, %cast2
+  ret i16 %sub
+}
+
+
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 166066a..3daf72e 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | not grep extractelement
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: extractelement
 
 define i32 @test(float %f) {
         %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index e35fa5e..3b94920 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A"
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: ret <4 x i32> %A
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 738e05b..3ee43dc 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -200,3 +200,31 @@ define void @test14(i16 %conv10) {
   %sext = sext <2 x i1> %cmp to <2 x i16>
   ret void
 }
+
+; Check that sequences of insert/extract element are 
+; collapsed into valid shuffle instruction with correct shuffle indexes.
+ 
+define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+  %tmp1 = extractelement <4 x float> %LHS, i32 0
+  %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+  %tmp3 = extractelement <4 x float> %RHS, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+  ret <4 x float> %tmp4
+}
+ 
+define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+  %tmp0 = extractelement <4 x float> %LHS, i32 3
+  %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+  %tmp4 = extractelement <4 x float> %RHS, i32 2
+  %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+  ret <4 x float> %tmp5
+}
+
diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll
index df3ac93..e6e79e2 100644
--- a/test/Transforms/InstCombine/win-math.ll
+++ b/test/Transforms/InstCombine/win-math.ll
@@ -273,3 +273,23 @@ define float @float_round(float %x) nounwind readnone {
     ret float %3
 }
 
+declare float @powf(float, float)
+; win32 lacks sqrtf&fabsf, win64 lacks fabsf
+define float @float_powsqrt(float %x) nounwind readnone {
+; WIN32-LABEL: @float_powsqrt(
+; WIN32-NOT: float @sqrtf
+; WIN32: float @powf
+; WIN64-LABEL: @float_powsqrt(
+; WIN64-NOT: float @sqrtf
+; WIN64: float @powf
+; MINGW32-LABEL: @float_powsqrt(
+; MINGW32: float @sqrtf
+; MINGW32: float @fabsf
+; MINGW32-NOT: float @powf
+; MINGW64-LABEL: @float_powsqrt(
+; MINGW64: float @sqrtf
+; MINGW64: float @fabsf
+; MINGW64-NOT: float @powf
+    %1 = call float @powf(float %x, float 0.5)
+    ret float %1
+}