322 files changed, 10773 insertions, 1581 deletions
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ADCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ArgumentPromotion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
new file mode 100644
index 0000000..4688a83
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -argpromotion -S
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+entry:
+  %0 = load i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+entry:
+; CHECK-LABEL: main
+; CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa ![[I32:[0-9]+]]
+; CHECK: %g.val = load i32* @g, align 4, !tbaa ![[I32]]
+; CHECK: %c.val = load i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
+  %0 = load i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"long", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !6, i64 0}
+!6 = metadata !{metadata !"int", metadata !3, i64 0}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !9, metadata !9, i64 0}
+!9 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+; CHECK: ![[I32]] = metadata !{metadata ![[I32_TYPE:[0-9]+]], metadata ![[I32_TYPE]], i64 0}
+; CHECK: ![[I32_TYPE]] = metadata !{metadata !"int", metadata !{{.*}}, i64 0}
+; CHECK: ![[LONG]] = metadata !{metadata ![[LONG_TYPE:[0-9]+]], metadata ![[LONG_TYPE]], i64 0}
+; CHECK: ![[LONG_TYPE]] = metadata !{metadata !"long", metadata !{{.*}}, i64 0}
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
index 07cc5d8..42bd0ff 100644
--- a/test/Transforms/BBVectorize/X86/pr15289.ll
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -45,7 +45,7 @@ entry:
   %13 = fmul double %3, %12
   %14 = fmul double %3, undef
   %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
-  store double %13, double* %15, align 8, !tbaa !0
+  store double %13, double* %15, align 8
   %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
   %17 = fmul double undef, %8
   %18 = fmul double %17, undef
@@ -54,7 +54,7 @@ entry:
   %21 = fmul double %3, %19
   %22 = fsub double -0.000000e+00, %21
   %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
-  store double %22, double* %23, align 8, !tbaa !0
+  store double %22, double* %23, align 8
   %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
   %25 = fmul double undef, 0x3FE42F601A8C6794
   %26 = fmul double undef, 2.000000e+00
@@ -62,7 +62,7 @@ entry:
   %28 = fmul double %6, undef
   %29 = fsub double undef, %28
   %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
-  store double undef, double* %30, align 8, !tbaa !0
+  store double undef, double* %30, align 8
   %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
   %32 = fmul double undef, %17
   %33 = fmul double undef, %17
@@ -71,7 +71,7 @@ entry:
   %36 = fsub double undef, %35
   %37 = fmul double %3, %34
   %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
-  store double %37, double* %38, align 8, !tbaa !0
+  store double %37, double* %38, align 8
   %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
   %40 = fmul double undef, %8
   %41 = fmul double undef, %40
@@ -79,20 +79,17 @@ entry:
   %43 = fsub double undef, %42
   %44 = fmul double %3, %43
   %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
-  store double %13, double* %45, align 8, !tbaa !0
+  store double %13, double* %45, align 8
   %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
   %47 = fsub double -0.000000e+00, %14
-  store double %47, double* %16, align 8, !tbaa !0
-  store double undef, double* %24, align 8, !tbaa !0
-  store double -0.000000e+00, double* %31, align 8, !tbaa !0
-  store double undef, double* %39, align 8, !tbaa !0
-  store double undef, double* %46, align 8, !tbaa !0
+  store double %47, double* %16, align 8
+  store double undef, double* %24, align 8
+  store double -0.000000e+00, double* %31, align 8
+  store double undef, double* %39, align 8
+  store double undef, double* %46, align 8
   ret void
 }
 
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
-!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
new file mode 100644
index 0000000..34b1d4e
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -0,0 +1,144 @@
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+
+; Function Attrs: uwtable
+declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
+newFuncRoot:
+  br label %arrayctor.cont
+
+arrayctor.cont.ret.exitStub:                      ; preds = %arrayctor.cont
+  ret void
+
+; CHECK-LABEL: @main_arrayctor.cont
+; CHECK: <2 x double>
+; CHECK: @_ZL12printQBezier7QBezier
+; CHECK: store double %mul8.i, double* %x3.i, align 16
+; CHECK: load double* %x3.i, align 16
+; CHECK: ret
+
+arrayctor.cont:                                   ; preds = %newFuncRoot
+  %ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
+  %ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
+  %ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
+  %ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
+  %ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
+  %ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
+  %ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
+  %ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
+  %add.ptr = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
+  %v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v0)
+  %v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v1)
+  %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v2)
+  %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
+  %x2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  %v4 = load double* %x2.i, align 16
+  %x3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  %v5 = load double* %x3.i, align 16
+  %add.i = fadd double %v4, %v5
+  %mul.i = fmul double 5.000000e-01, %add.i
+  %x1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  %v6 = load double* %x1.i, align 16
+  %add3.i = fadd double %v4, %v6
+  %mul4.i = fmul double 5.000000e-01, %add3.i
+  %x25.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
+  store double %mul4.i, double* %x25.i, align 16
+  %v7 = load double* %x3.i, align 16
+  %x4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  %v8 = load double* %x4.i, align 16
+  %add7.i = fadd double %v7, %v8
+  %mul8.i = fmul double 5.000000e-01, %add7.i
+  store double %mul8.i, double* %x3.i, align 16
+  %v9 = load double* %x1.i, align 16
+  %x111.i = getelementptr inbounds %class.QBezier.15* %add.ptr, i64 0, i32 0
+  store double %v9, double* %x111.i, align 16
+  %v10 = load double* %x25.i, align 16
+  %add15.i = fadd double %mul.i, %v10
+  %mul16.i = fmul double 5.000000e-01, %add15.i
+  %x317.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
+  store double %mul16.i, double* %x317.i, align 16
+  %v11 = load double* %x3.i, align 16
+  %add19.i = fadd double %mul.i, %v11
+  %mul20.i = fmul double 5.000000e-01, %add19.i
+  store double %mul20.i, double* %x2.i, align 16
+  %v12 = load double* %x317.i, align 16
+  %add24.i = fadd double %v12, %mul20.i
+  %mul25.i = fmul double 5.000000e-01, %add24.i
+  store double %mul25.i, double* %x1.i, align 16
+  %x427.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
+  store double %mul25.i, double* %x427.i, align 16
+  %y2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  %v13 = load double* %y2.i, align 8
+  %y3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  %v14 = load double* %y3.i, align 8
+  %add28.i = fadd double %v13, %v14
+  %div.i = fmul double 5.000000e-01, %add28.i
+  %y1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  %v15 = load double* %y1.i, align 8
+  %add30.i = fadd double %v13, %v15
+  %mul31.i = fmul double 5.000000e-01, %add30.i
+  %y232.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
+  store double %mul31.i, double* %y232.i, align 8
+  %v16 = load double* %y3.i, align 8
+  %y4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  %v17 = load double* %y4.i, align 8
+  %add34.i = fadd double %v16, %v17
+  %mul35.i = fmul double 5.000000e-01, %add34.i
+  store double %mul35.i, double* %y3.i, align 8
+  %v18 = load double* %y1.i, align 8
+  %y138.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
+  store double %v18, double* %y138.i, align 8
+  %v19 = load double* %y232.i, align 8
+  %add42.i = fadd double %div.i, %v19
+  %mul43.i = fmul double 5.000000e-01, %add42.i
+  %y344.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
+  store double %mul43.i, double* %y344.i, align 8
+  %v20 = load double* %y3.i, align 8
+  %add46.i = fadd double %div.i, %v20
+  %mul47.i = fmul double 5.000000e-01, %add46.i
+  store double %mul47.i, double* %y2.i, align 8
+  %v21 = load double* %y344.i, align 8
+  %add51.i = fadd double %v21, %mul47.i
+  %mul52.i = fmul double 5.000000e-01, %add51.i
+  store double %mul52.i, double* %y1.i, align 8
+  %y454.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
+  store double %mul52.i, double* %y454.i, align 8
+  %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
+  call void @llvm.lifetime.end(i64 64, i8* %v0)
+  call void @llvm.lifetime.end(i64 64, i8* %v1)
+  call void @llvm.lifetime.end(i64 64, i8* %v2)
+  br label %arrayctor.cont.ret.exitStub
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
new file mode 100644
index 0000000..9ebdb73
--- /dev/null
+++ b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: <2 x double>
+  %X1 = fsub double %A1, %B1
+  %X2 = fsub double %A2, %B2
+  %Y1 = fmul double %X1, %A1
+  %Y2 = fmul double %X2, %A2
+  %Z1 = fadd double %Y1, %B1
+  %Z2 = fadd double %Y2, %B2
+  %R  = fmul double %Z1, %Z2
+  ret double %R
+}
diff --git a/test/Transforms/BlockPlacement/basictest.ll b/test/Transforms/BlockPlacement/basictest.ll
deleted file mode 100644
index 47b5079..0000000
--- a/test/Transforms/BlockPlacement/basictest.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -block-placement -disable-output -print-function 2> /dev/null
-
-define i32 @test() {
-        br i1 true, label %X, label %Y
-
-A:              ; preds = %Y, %X
-        ret i32 0
-
-X:              ; preds = %0
-        br label %A
-
-Y:              ; preds = %0
-        br label %A
-}
-
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/BlockPlacement/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/CodeExtractor/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/CodeGenPrepare/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ConstProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 795dc07..d05db47 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
-; RUN: opt < %s -default-data-layout="E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
+; RUN: opt < %s -default-data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -default-data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
 
 ; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
 @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
@@ -155,7 +155,7 @@ entry:
 @test12g = private constant [6 x i8] c"a\00b\00\00\00"
 
 define i16 @test12() {
-  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) 
+  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
   ret i16 %a
 
 ; 0x0062
@@ -194,6 +194,20 @@ entry:
 ; BE: ret i64 1
 }
 
+; Check with address space pointers
+@g6_as1 = constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* inttoptr (i16 1 to i8 addrspace(1)*), i8 addrspace(1)* inttoptr (i16 2 to i8 addrspace(1)*)]
+define i16 @test14_as1() nounwind {
+entry:
+  %tmp = load i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
+  ret i16 %tmp
+
+; LE: @test14_as1
+; LE: ret i16 1
+
+; BE: @test14_as1
+; BE: ret i16 1
+}
+
 define i64 @test15() nounwind {
 entry:
   %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
diff --git a/test/Transforms/ConstantMerge/align.ll b/test/Transforms/ConstantMerge/align.ll
new file mode 100644
index 0000000..c1cbcb3
--- /dev/null
+++ b/test/Transforms/ConstantMerge/align.ll
@@ -0,0 +1,28 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Test that with a TD we do merge and mark the alignment as 4
+@T1A = internal unnamed_addr constant i32 1
+@T1B = internal unnamed_addr constant i32 1, align 2
+; CHECK: @T1B = internal unnamed_addr constant i32 1, align 4
+
+define void @test1(i32** %P1, i32** %P2) {
+  store i32* @T1A, i32** %P1
+  store i32* @T1B, i32** %P2
+  ret void
+}
+
+
+; Test that even with a TD we set the alignment to the maximum if both constants
+; have explicit alignments.
+@T2A = internal unnamed_addr constant i32 2, align 1
+@T2B = internal unnamed_addr constant i32 2, align 2
+; CHECK: @T2B = internal unnamed_addr constant i32 2, align 2
+
+define void @test2(i32** %P1, i32** %P2) {
+  store i32* @T2A, i32** %P1
+  store i32* @T2B, i32** %P2
+  ret void
+}
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ConstantMerge/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 4cb742d..26982db 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -44,11 +44,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !28} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 524305, metadata !28, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 524303, metadata !28, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 524326, metadata !28, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
@@ -61,7 +62,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !14 = metadata !{i32 524299, metadata !28, metadata !1, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
 !19 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
@@ -74,3 +75,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !27 = metadata !{i32 26, i32 0, metadata !25, null}
 !28 = metadata !{metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/"}
 !29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 21de114..7bdcbf5 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -35,13 +35,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !8, metadata !9}
 !5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
@@ -61,3 +62,4 @@ entry:
 !18 = metadata !{i32 786443, metadata !20, metadata !8, i32 5, i32 23, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !19 = metadata !{i32 5, i32 30, metadata !18, null}
 !20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/linkage.ll b/test/Transforms/DeadArgElim/linkage.ll
new file mode 100644
index 0000000..f475484
--- /dev/null
+++ b/test/Transforms/DeadArgElim/linkage.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+; rdar://11546243
+%struct.A = type { i8 }
+
+define available_externally void @_Z17externallyDefinedP1A(%struct.A* %a) {
+entry:
+  call void @_Z3foov()
+  ret void
+}
+
+declare void @_Z3foov()
+
+define void @_Z4testP1A(%struct.A* %a) {
+; CHECK: @_Z4testP1A
+; CHECK: @_Z17externallyDefinedP1A(%struct.A* %a)
+
+entry:
+  call void @_Z17externallyDefinedP1A(%struct.A* %a)
+  ret void
+}
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/DeadArgElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
new file mode 100644
index 0000000..9df8801
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -0,0 +1,261 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+; If there are two stores to the same location, DSE should be able to remove
+; the first store if the two stores are separated by no more than 98
+; instructions. The existence of debug intrinsics between the stores should
+; not affect this instruction limit.
+
+@x = global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @test_within_limit() {
+entry:
+  ; The first store; later there is a second store to the same location,
+  ; so this store should be optimized away by DSE.
+  ; CHECK-NOT: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 98 dummy instructions between the two stores
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+
+  ; Insert a meaningless dbg.value intrinsic; it should have no
+  ; effect on the working of DSE in any way.
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10)
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+define i32 @test_outside_limit() {
+entry:
+  ; The first store; later there is a second store to the same location
+  ; CHECK: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 99 dummy instructions between the two stores; this is
+  ; one too many instruction for the DSE to take place.
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+  %98 = bitcast i32 0 to i32
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/home/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_within_limit", metadata !"test_within_limit", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @test_within_limit, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!12 = metadata !{i32* undef}
+
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/DeadStoreElimination/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DebugIR/lit.local.cfg b/test/Transforms/DebugIR/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/DebugIR/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/DebugIR/simple-addrspace.ll b/test/Transforms/DebugIR/simple-addrspace.ll
new file mode 100644
index 0000000..6bea9b2
--- /dev/null
+++ b/test/Transforms/DebugIR/simple-addrspace.ll
@@ -0,0 +1,13 @@
+; RUN: opt -debug-ir -S %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16"
+
+define void @foo(i32 addrspace(1)*) nounwind {
+  ret void
+}
+
+; Make sure the pointer size is 16
+
+; CHECK: metadata !"i32 addrspace(1)*", i32 0, i64 16, i64 2, i64 0, i32 0
+
+
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/EarlyCSE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
index adb7bce..9fba7a9 100644
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=POSIX %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
 
 declare i8* @fopen(i8*, i8*)
 ; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]] 
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/FunctionAttrs/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 9453e1e..ed3a5bd 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -12,6 +12,7 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
 !0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
@@ -20,8 +21,9 @@ entry:
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 
 
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GCOVProfiling/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index a90290f..2f1bd70 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -16,6 +16,7 @@ define void @test() {
 
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786449, metadata !11, i32 4, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
 !2 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
@@ -23,8 +24,9 @@ define void @test() {
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !10, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
 !6 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 ;; !9 is added through the echo line at the top.
 !10 = metadata !{metadata !"<stdin>", metadata !"."}
 !11 = metadata !{metadata !"version", metadata !"/usr/local/google/home/nlewycky"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index a1cc008..5a15f0e 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -2,7 +2,7 @@
 
 @last = external global [65 x i32*]
 
-define i32 @NextRootMove(i32 %wtm) {
+define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
 entry:
         %A = alloca i32*
 	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
@@ -15,12 +15,14 @@ entry:
 	br label %cond_true116
 
 cond_true116:
-	br i1 false, label %cond_true128, label %cond_true145
+   %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %cond_true128, label %cond_true145
 
 cond_true128:
 	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17625, i32** %A
-	br i1 false, label %bb98.backedge, label %return.loopexit
+   %cmp1 = icmp eq i32 %x, %z
+	br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
 
 bb98.backedge:
 	br label %cond_true116
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 4f07868..ce83fa4 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -3,10 +3,11 @@
 
 @g_3 = external global i8		; <i8*> [#uses=2]
 
-define i8 @func_1() nounwind  {
+define i8 @func_1(i32 %x, i32 %y) nounwind  {
 entry:
   %A = alloca i8
-	br i1 false, label %ifelse, label %ifthen
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %ifelse, label %ifthen
 
 ifthen:		; preds = %entry
 	br label %ifend
@@ -14,9 +15,6 @@ ifthen:		; preds = %entry
 ifelse:		; preds = %entry
 	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
         store i8 %tmp3, i8* %A
-	br label %forcond.thread
-
-forcond.thread:		; preds = %ifelse
 	br label %afterfor
 
 forcond:		; preds = %forinc
diff --git a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
index 4613bc4..298f274 100644
--- a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
+++ b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
@@ -19,10 +19,10 @@ bb1:
   br i1 undef, label %bb3, label %bb15
 
 ; CHECK: bb1:
-; CHECK: %tmp16 = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
+; CHECK: [[TMP:%.*]] = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
 
 ; CHECK: bb1.bb15_crit_edge:
-; CHECK: %tmp17.pre = load i8* %tmp16, align 1
+; CHECK: %tmp17.pre = load i8* [[TMP]], align 1
 
 bb3:
   call void @isalnum()
diff --git a/test/Transforms/GVN/cond_br.ll b/test/Transforms/GVN/cond_br.ll
new file mode 100644
index 0000000..918e7d4
--- /dev/null
+++ b/test/Transforms/GVN/cond_br.ll
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+@y = external global i32
+@z = external global i32
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo(i32 %x) {
+; CHECK: @foo(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: call void @bar(i32 %.pre)
+
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ]
+  tail call void @bar(i32 %1)
+  ret void
+}
+
+define void @foo2(i32 %x) {
+; CHECK: @foo2(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: tail call void @bar(i32 %.pre)
+entry:
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @z, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ]
+  tail call void @bar(i32 %0)
+  ret void
+}
+
+declare void @bar(i32)
diff --git a/test/Transforms/GVN/cond_br2.ll b/test/Transforms/GVN/cond_br2.ll
new file mode 100644
index 0000000..27e6f75
--- /dev/null
+++ b/test/Transforms/GVN/cond_br2.ll
@@ -0,0 +1,140 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4testv() #0 {
+; CHECK: @_Z4testv()
+; CHECK: invoke.cont:
+; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14
+; CHECK: Retry.i10:
+
+entry:
+  %sv = alloca %"class.llvm::SmallVector", align 16
+  %0 = bitcast %"class.llvm::SmallVector"* %sv to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0) #1
+  %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3
+  %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8*
+  store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4
+  %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2
+  %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8*
+  store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  %2 = load i8** %EndX.i, align 8, !tbaa !4
+  %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i
+  br i1 %cmp.i, label %Retry.i, label %if.end.i
+
+Retry.i:                                          ; preds = %.noexc, %entry
+  %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ]
+  %new.isnull.i = icmp eq i8* %3, null
+  br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  %4 = bitcast i8* %3 to i32*
+  store i32 1, i32* %4, align 4, !tbaa !5
+  br label %invoke.cont
+
+if.end.i:                                         ; preds = %entry
+  %5 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4)
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %if.end.i
+  %.pre.i = load i8** %EndX.i, align 8, !tbaa !4
+  br label %Retry.i
+
+invoke.cont:                                      ; preds = %new.notnull.i, %Retry.i
+  %add.ptr.i = getelementptr inbounds i8* %3, i64 4
+  store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4
+  %6 = load i8** %CapacityX.i, align 16, !tbaa !4
+  %cmp.i8 = icmp ult i8* %add.ptr.i, %6
+  br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14
+
+Retry.i10:                                        ; preds = %if.end.i14
+  %.pre.i13 = load i8** %EndX.i, align 8, !tbaa !4
+  %new.isnull.i9 = icmp eq i8* %.pre.i13, null
+  br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11
+
+new.notnull.i11:                                  ; preds = %invoke.cont, %Retry.i10
+  %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ]
+  %8 = bitcast i8* %7 to i32*
+  store i32 2, i32* %8, align 4, !tbaa !5
+  br label %invoke.cont2
+
+if.end.i14:                                       ; preds = %invoke.cont
+  %9 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4)
+          to label %Retry.i10 unwind label %lpad
+
+invoke.cont2:                                     ; preds = %new.notnull.i11, %Retry.i10
+  %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ]
+  %add.ptr.i12 = getelementptr inbounds i8* %10, i64 4
+  store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4
+  invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv)
+          to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:                                     ; preds = %invoke.cont2
+  %11 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i19 = icmp eq i8* %11, %1
+  br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20
+
+if.then.i.i.i20:                                  ; preds = %invoke.cont3
+  call void @free(i8* %11) #1
+  br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21
+
+_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21:          ; preds = %invoke.cont3, %if.then.i.i.i20
+  call void @llvm.lifetime.end(i64 64, i8* %0) #1
+  ret void
+
+lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
+  %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %13 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i = icmp eq i8* %13, %1
+  br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad
+  call void @free(i8* %13) #1
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %if.then.i.i.i, %lpad
+  resume { i8*, i32 } %12
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GVN/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll
index 1d0dadf..2c92699 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/local-pre.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre"
 
-define i32 @main(i32 %p) {
+define i32 @main(i32 %p, i32 %q) {
 block1:
-  
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %p, %q 
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = add i32 %p, 1
diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll
index e93a62a..d2d2fd7 100644
--- a/test/Transforms/GVN/malloc-load-removal.ll
+++ b/test/Transforms/GVN/malloc-load-removal.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 declare i8* @malloc(i64) nounwind
 
-define noalias i8* @test() nounwind uwtable ssp {
+define noalias i8* @test1() nounwind uwtable ssp {
 entry:
   %call = tail call i8* @malloc(i64 100) nounwind
   %0 = load i8* %call, align 1
@@ -21,11 +21,36 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret i8* %call
 
-; CHECK-LABEL: @test(
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: load
 ; CHECK-NOT: icmp
 
-; CHECK_NO_LIBCALLS-LABEL: @test(
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: icmp
+}
+
+declare i8* @_Znwm(i64) nounwind
+
+define noalias i8* @test2() nounwind uwtable ssp {
+entry:
+  %call = tail call i8* @_Znwm(i64 100) nounwind
+  %0 = load i8* %call, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i8 0, i8* %call, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i8* %call
+
+; CHECK-LABEL: @test2(
+; CHECK-NOT: load
+; CHECK-NOT: icmp
+
+; CHECK_NO_LIBCALLS-LABEL: @test2(
 ; CHECK_NO_LIBCALLS: load
 ; CHECK_NO_LIBCALLS: icmp
 }
diff --git a/test/Transforms/GVN/pr17732.ll b/test/Transforms/GVN/pr17732.ll
new file mode 100644
index 0000000..606a195
--- /dev/null
+++ b/test/Transforms/GVN/pr17732.ll
@@ -0,0 +1,30 @@
+; RUN: opt -gvn -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.with_array = type { [2 x i8], i32, i8 }
+%struct.with_vector = type { <2 x i8>, i32, i8 }
+
+@main.obj_with_array = private unnamed_addr constant { [2 x i8], i32, i8, [3 x i8] } { [2 x i8] zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@array_with_zeroinit = common global %struct.with_array zeroinitializer, align 4
+
+@main.obj_with_vector = private unnamed_addr constant { <2 x i8>, i32, i8, [3 x i8] } { <2 x i8> zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@vector_with_zeroinit = common global %struct.with_vector zeroinitializer, align 4
+
+define i32 @main() {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %0 = load i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
+
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %1 = load i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
+  %conv0 = sext i8 %0 to i32
+  %conv1 = sext i8 %1 to i32
+  %and = and i32 %conv0, %conv1
+  ret i32 %and
+; CHECK-LABEL: define i32 @main(
+; CHECK: ret i32 1
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Transforms/GVN/pr17852.ll b/test/Transforms/GVN/pr17852.ll
new file mode 100644
index 0000000..e95ff7f
--- /dev/null
+++ b/test/Transforms/GVN/pr17852.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -gvn
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.S0 = type { [2 x i8], [2 x i8], [4 x i8], [2 x i8], i32, i32, i32, i32 }
+define void @fn1(%struct.S0* byval align 8 %p1) {
+  br label %for.cond
+for.cond:                                         ; preds = %1, %0
+  br label %for.end
+  %f2 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f9 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  br label %for.cond
+for.end:                                          ; preds = %for.cond
+  br i1 true, label %if.else, label %if.then
+if.then:                                          ; preds = %for.end
+  %f22 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f7 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 5
+  %tmp7 = load i32* %f7, align 8
+  br label %if.end40
+if.else:                                          ; preds = %for.end
+  br i1 false, label %for.cond18, label %if.then6
+if.then6:                                         ; preds = %if.else
+  %f3 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %tmp10 = bitcast %struct.S0* %p1 to i16*
+  %f5 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp11 = bitcast [2 x i8]* %f5 to i16*
+  %bf.load13 = load i16* %tmp11, align 8
+  br label %if.end36
+for.cond18:                                       ; preds = %if.else
+  call void @fn4()
+  br i1 true, label %if.end, label %if.end36
+if.end:                                           ; preds = %for.cond18
+  %f321 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f925 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  %f526 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp15 = bitcast [2 x i8]* %f526 to i16*
+  %bf.load27 = load i16* %tmp15, align 8
+  %tmp16 = bitcast %struct.S0* %p1 to i16*
+  br label %if.end36
+if.end36:                                         ; preds = %if.end, %for.cond18, %if.then6
+  %f537 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp17 = bitcast [2 x i8]* %f537 to i16*
+  %bf.load38 = load i16* %tmp17, align 8
+  %bf.clear39 = and i16 %bf.load38, -16384
+  br label %if.end40
+if.end40:                                         ; preds = %if.end36, %if.then
+  %f6 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 4
+  %tmp18 = load i32* %f6, align 4
+  call void @fn2(i32 %tmp18)
+  %f8 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 6
+  %tmp19 = load i32* %f8, align 4
+  %tobool41 = icmp eq i32 %tmp19, 0
+  br i1 true, label %if.end50, label %if.then42
+if.then42:                                        ; preds = %if.end40
+  %tmp20 = bitcast %struct.S0* %p1 to i16*
+  %f547 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp21 = bitcast [2 x i8]* %f547 to i16*
+  %bf.load48 = load i16* %tmp21, align 8
+  br label %if.end50
+if.end50:                                         ; preds = %if.then42, %if.end40
+  %f551 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp22 = bitcast [2 x i8]* %f551 to i16*
+  %bf.load52 = load i16* %tmp22, align 8
+  %bf.clear53 = and i16 %bf.load52, -16384
+  ret void
+}
+declare void @fn2(i32)
+declare void @fn4()
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
index e52772b..c52ed96 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -25,6 +25,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-!0 = metadata !{metadata !"short", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/Transforms/GVN/rle-nonlocal.ll b/test/Transforms/GVN/rle-nonlocal.ll
index 6b74e9a..8229aaa 100644
--- a/test/Transforms/GVN/rle-nonlocal.ll
+++ b/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,8 +1,9 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-define i32 @main(i32** %p) {
+define i32 @main(i32** %p, i32 %x, i32 %y) {
 block1:
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp , label %block2, label %block3
 
 block2:
  %a = load i32** %p
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index 71aa548..923cd03 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,9 +1,10 @@
 ; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
 
-define i32 @main(i32* %p) {
+define i32 @main(i32* %p, i32 %x, i32 %y) {
 block1:
   %z = load i32* %p
-	br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  br label %block4
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 8787dd5..8d289b0 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
-; RUN: opt < %s -default-data-layout="E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -195,6 +195,7 @@ Cont:
 }
 
 @GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
+@GCst_as1 = addrspace(1) constant {i32, float, i32 } { i32 42, float 14., i32 97 }
 
 ; memset -> float forwarding.
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
@@ -209,7 +210,18 @@ entry:
 ; CHECK: ret float 1.400000e+01
 }
 
-
+; memcpy from address space 1
+define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK-LABEL: @memcpy_to_float_local_as1(
+; CHECK-NOT: load
+; CHECK: ret float 1.400000e+01
+}
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -357,13 +369,14 @@ Cont:
 ; CHECK: ret i8 %A
 }
 
-define i32 @chained_load(i32** %p) {
+define i32 @chained_load(i32** %p, i32 %x, i32 %y) {
 block1:
   %A = alloca i32*
 
   %z = load i32** %p
   store i32* %z, i32** %A
-  br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = load i32** %p
@@ -427,10 +440,11 @@ TY:
   ret i32 0
 }
 
-define i32 @phi_trans3(i32* %p) {
+define i32 @phi_trans3(i32* %p, i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @phi_trans3(
 block1:
-  br i1 true, label %block2, label %block3
+  %cmpxy = icmp eq i32 %x, %y
+  br i1 %cmpxy, label %block2, label %block3
 
 block2:
  store i32 87, i32* %p
@@ -443,7 +457,7 @@ block3:
 
 block4:
   %A = phi i32 [-1, %block2], [42, %block3]
-  br i1 true, label %block5, label %exit
+  br i1 %cmpxy, label %block5, label %exit
   
 ; CHECK: block4:
 ; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
@@ -451,11 +465,11 @@ block4:
 
 block5:
   %B = add i32 %A, 1
-  br i1 true, label %block6, label %exit
+  br i1 %cmpxy, label %block6, label %exit
   
 block6:
   %C = getelementptr i32* %p, i32 %B
-  br i1 true, label %block7, label %exit
+  br i1 %cmpxy, label %block7, label %exit
   
 block7:
   %D = load i32* %C
@@ -645,6 +659,8 @@ entry:
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Load -> Store dependency which isn't interfered with by a call that happens
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 85fe39a..d6412fc 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -13,7 +13,7 @@ define i32 @test1(i8* %p, i8* %q) {
 
 define i32 @test2(i8* %p, i8* %q) {
 ; CHECK: @test2(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !0
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -23,7 +23,7 @@ define i32 @test2(i8* %p, i8* %q) {
 
 define i32 @test3(i8* %p, i8* %q) {
 ; CHECK: @test3(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !3
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !3
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -33,7 +33,7 @@ define i32 @test3(i8* %p, i8* %q) {
 
 define i32 @test4(i8* %p, i8* %q) {
 ; CHECK: @test4(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !1
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -43,7 +43,7 @@ define i32 @test4(i8* %p, i8* %q) {
 
 define i32 @test5(i8* %p, i8* %q) {
 ; CHECK: @test5(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !1
@@ -53,7 +53,7 @@ define i32 @test5(i8* %p, i8* %q) {
 
 define i32 @test6(i8* %p, i8* %q) {
 ; CHECK: @test6(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -74,8 +74,18 @@ define i32 @test7(i8* %p, i8* %q) {
 
 declare i32 @foo(i8*) readonly
 
-!0 = metadata !{metadata !"C", metadata !1}
-!1 = metadata !{metadata !"A", metadata !2}
+; CHECK: [[TAGC]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
+; CHECK: [[TYPEC]] = metadata !{metadata !"C", metadata [[TYPEA:!.*]]}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+; CHECK: [[TAGB]] = metadata !{metadata [[TYPEB:!.*]], metadata [[TYPEB]], i64 0}
+; CHECK: [[TYPEB]] = metadata !{metadata !"B", metadata [[TYPEA]]}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA]], metadata [[TYPEA]], i64 0}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
+!1 = metadata !{metadata !6, metadata !6, i64 0}
 !2 = metadata !{metadata !"tbaa root", null}
-!3 = metadata !{metadata !"B", metadata !1}
-!4 = metadata !{metadata !"another root", null}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !"C", metadata !6}
+!6 = metadata !{metadata !"A", metadata !2}
+!7 = metadata !{metadata !"B", metadata !6}
+!8 = metadata !{metadata !"another root", null}
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GlobalDCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 390e77a..0867ca9 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -4,7 +4,7 @@
 
 ; RUN: opt < %s -globalopt -S > %t
 ; Check that the new global values still have their address space
-; RUN: cat %t | grep addrspace.*global
+; RUN: cat %t | grep 'addrspace.*global'
 
 @struct = internal addrspace(1) global { i32, i32 } zeroinitializer
 @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer 
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index e08320b..0108960 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
 !4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !2, metadata !2}
 !7 = metadata !{i32 5, i32 0, metadata !8, null}
 !8 = metadata !{i32 458763, metadata !20, metadata !4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -71,7 +71,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{i32 14, i32 0, metadata !14, null}
 !14 = metadata !{i32 458763, metadata !20, metadata !15, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{metadata !2}
 !18 = metadata !{i32 15, i32 0, metadata !14, null}
 !19 = metadata !{i32 16, i32 0, metadata !14, null}
diff --git a/test/Transforms/GlobalOpt/array-elem-refs.ll b/test/Transforms/GlobalOpt/array-elem-refs.ll
new file mode 100644
index 0000000..ec472b0
--- /dev/null
+++ b/test/Transforms/GlobalOpt/array-elem-refs.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { i8, i8 }
+
+@c = internal global i8** bitcast (i8* getelementptr (i8* bitcast ([8 x i8*]* @b to i8*), i64 48) to i8**), align 8
+@b = internal global [8 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i64 1)], align 16
+@a = internal global %struct.S zeroinitializer, align 1
+
+; Function Attrs: nounwind uwtable
+define signext i8 @foo() #0 {
+entry:
+  %0 = load i8*** @c, align 8
+  %1 = load i8** %0, align 8
+  %2 = load i8* %1, align 1
+  ret i8 %2
+
+; CHECK-LABEL: @foo
+; CHECK: ret i8 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
index 4c3f439..ac05bfd 100644
--- a/test/Transforms/GlobalOpt/atomic.ll
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -1,10 +1,25 @@
 ; RUN: opt -globalopt < %s -S -o - | FileCheck %s
 
 @GV1 = internal global i64 1
+@GV2 = internal global i32 0
+
 ; CHECK: @GV1 = internal unnamed_addr constant i64 1
+; CHECK: @GV2 = internal unnamed_addr global i32 0
 
 define void @test1() {
 entry:
   %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
   ret void
 }
+
+; PR17163
+define void @test2a() {
+entry:
+  store atomic i32 10, i32* @GV2 seq_cst, align 4
+  ret void
+}
+define i32 @test2b() {
+entry:
+  %atomic-load = load atomic i32* @GV2 seq_cst, align 4
+  ret i32 %atomic-load
+}
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GlobalOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/IPConstantProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
new file mode 100644
index 0000000..e4c31d1
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -indvars -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+
+; Derived from ptriv in lftr-reuse.ll
+define void @ptriv_as2(i8 addrspace(2)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as2(
+entry:
+  %idx.trunc = trunc i32 %n to i8
+  %add.ptr = getelementptr inbounds i8 addrspace(2)* %base, i8 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(2)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(2)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(2)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(2)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(2)* %p.02 to i8
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(2)* %base to i8
+  %sub.ptr.sub = sub i8 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  store i8 %sub.ptr.sub, i8 addrspace(2)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(2)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(2)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @ptriv_as3(i8 addrspace(3)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as3(
+entry:
+  %idx.trunc = trunc i32 %n to i16
+  %add.ptr = getelementptr inbounds i8 addrspace(3)* %base, i16 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(3)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(3)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(3)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(3)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(3)* %p.02 to i16
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(3)* %base to i16
+  %sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %conv = trunc i16 %sub.ptr.sub to i8
+  store i8 %conv, i8 addrspace(3)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(3)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(3)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/IndVarSimplify/lftr-zext.ll b/test/Transforms/IndVarSimplify/lftr-zext.ll
new file mode 100644
index 0000000..32fa61a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-zext.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@data = common global [240 x i8] zeroinitializer, align 16
+
+define void @foo(i8* %a) nounwind uwtable ssp {
+; CHECK: %exitcond
+; CHECK-NOT: ([240 x i8]* @data, i64 0, i64 -16)
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %i.0 = phi i8 [ 0, %0 ], [ %5, %1 ]
+  %p.0 = phi i8* [ getelementptr inbounds ([240 x i8]* @data, i64 0, i64 0), %0 ], [ %4, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = getelementptr inbounds i8* %p.0, i64 1
+  store i8 %3, i8* %p.0, align 1
+  %5 = add i8 %i.0, 1
+  %6 = icmp ult i8 %5, -16
+  br i1 %6, label %1, label %7
+
+; <label>:7                                       ; preds = %1
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/IndVarSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
index abf1bc3..5d2c8c7 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
-;
-; Testcase distilled from 256.bzip2
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | FileCheck %s
 
-define i32 @main() {
+; Testcase distilled from 256.bzip2
+; CHECK-LABEL: @test1
+; CHECK-NOT: br
+define i32 @test1() {
 entry:
         br label %loopentry
 
@@ -19,3 +20,28 @@ loopexit:               ; preds = %loopentry
         ret i32 %tmp.2
 }
 
+
+; PR12377
+; CHECK-LABEL: @test2
+; CHECK: [[VAR1:%.+]] = add i32 %arg, -11
+; CHECK: [[VAR2:%.+]] = lshr i32 [[VAR1]], 1
+; CHECK: [[VAR3:%.+]] = add i32 [[VAR2]], 1
+; CHECK: [[VAR4:%.+]] = phi i32 [ 0, %bb ], [ [[VAR3]], %bb1.preheader ]
+; CHECK: ret i32 [[VAR4]]
+define i32 @test2(i32 %arg) {
+bb:
+  %tmp = icmp ugt i32 %arg, 10
+  br i1 %tmp, label %bb1, label %bb7
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32 [ %tmp5, %bb1 ], [ 0, %bb ]
+  %tmp3 = phi i32 [ %tmp4, %bb1 ], [ %arg, %bb ]
+  %tmp4 = add i32 %tmp3, -2
+  %tmp5 = add i32 %tmp2, 1
+  %tmp6 = icmp ugt i32 %tmp4, 10
+  br i1 %tmp6, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1, %bb
+  %tmp8 = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ]
+  ret i32 %tmp8
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
index da38de5..af01fe5 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -1,9 +1,4 @@
 ; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1
-; XFAIL: *
-
-; Indvars can't evaluate this loop, because ScalarEvolution can't compute
-; an exact trip count, because it doesn't know if dividing by the stride will
-; have a remainder. It could be done with more aggressive VRP though.
 
 define i32 @test(i32 %x_offs) nounwind readnone {
 entry:
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 507f695..0576692 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -223,13 +223,18 @@ entry:
   %halfLim = ashr i32 %limit, 2
   br label %loop
 
-; Test cloning an or, which is not an OverflowBinaryOperator.
+; This test originally checked that the OR instruction was cloned. Now the
+; ScalarEvolution is able to understand the loop evolution and that '%iv' at the
+; end of the loop is an even value. Thus '%val' is computed at the end of the
+; loop and the OR instruction is replaced by an ADD keeping the result
+; equivalent.
 ;
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: or i64
+; CHECK: icmp slt i32
 ; CHECK: exit:
+; CHECK: add i64
 loop:
   %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
   %t1 = sext i32 %iv to i64
diff --git a/test/Transforms/Inline/alloca-merge-align-nodl.ll b/test/Transforms/Inline/alloca-merge-align-nodl.ll
index 203f52b..301505f 100644
--- a/test/Transforms/Inline/alloca-merge-align-nodl.ll
+++ b/test/Transforms/Inline/alloca-merge-align-nodl.ll
@@ -8,13 +8,13 @@ define void @foo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -23,13 +23,13 @@ define void @foo0(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -40,13 +40,13 @@ define void @goo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -62,9 +62,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -80,14 +80,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-
diff --git a/test/Transforms/Inline/alloca-merge-align.ll b/test/Transforms/Inline/alloca-merge-align.ll
index d789c79..d357b3c 100644
--- a/test/Transforms/Inline/alloca-merge-align.ll
+++ b/test/Transforms/Inline/alloca-merge-align.ll
@@ -9,13 +9,13 @@ define void @foo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -24,13 +24,13 @@ define void @foo0(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -39,13 +39,13 @@ define void @foo1(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 1
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -56,13 +56,13 @@ define void @goo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -78,9 +78,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -96,9 +96,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -114,14 +114,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @foo1(%struct.s* byval %tmpcast)
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
new file mode 100644
index 0000000..53fb13f
--- /dev/null
+++ b/test/Transforms/Inline/attributes.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i32 @noattr_callee(i32 %i) {
+  ret i32 %i
+}
+
+define i32 @sanitize_address_callee(i32 %i) sanitize_address {
+  ret i32 %i
+}
+
+define i32 @sanitize_thread_callee(i32 %i) sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @sanitize_memory_callee(i32 %i) sanitize_memory {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_callee(i32 %i) alwaysinline {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_address_callee(i32 %i) alwaysinline sanitize_address {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_thread_callee(i32 %i) alwaysinline sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_memory {
+  ret i32 %i
+}
+
+
+; Check that:
+;  * noattr callee is inlined into noattr caller,
+;  * sanitize_(address|memory|thread) callee is not inlined into noattr caller,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_no_sanitize_address(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_address(
+; CHECK-NEXT: @sanitize_address_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_memory(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_memory(
+; CHECK-NEXT: @sanitize_memory_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_thread(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_thread(
+; CHECK-NEXT: @sanitize_thread_callee
+; CHECK-NEXT: ret i32
+}
+
+
+; Check that:
+;  * noattr callee is not inlined into sanitize_(address|memory|thread) caller,
+;  * sanitize_(address|memory|thread) callee is inlined into the caller with the same attribute,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_sanitize_address(i32 %arg) sanitize_address {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_address(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_memory(i32 %arg) sanitize_memory {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_memory(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_thread(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index e601faf..d7597ad 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -104,3 +104,26 @@ entry:
 ; CHECK: ret i32 4
 }
 
+%struct.S0 = type { i32 }
+
+@b = global %struct.S0 { i32 1 }, align 4
+@a = common global i32 0, align 4
+
+define internal void @f5(%struct.S0* byval nocapture readonly align 4 %p) {
+entry:
+	store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+	%f2 = getelementptr inbounds %struct.S0* %p, i64 0, i32 0
+	%0 = load i32* %f2, align 4
+	store i32 %0, i32* @a, align 4
+	ret void
+}
+
+define i32 @test5() {
+entry:
+	tail call void @f5(%struct.S0* byval align 4 @b)
+	%0 = load i32* @a, align 4
+	ret i32 %0
+; CHECK: @test5()
+; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+; CHECK-NOT: load i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+}
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 97c52af..7f30ffb 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
 ; CHECK: Number of functions inlined
 
-; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s
 ; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/Transforms/Inline/inline-invoke-with-asm-call.ll b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
new file mode 100644
index 0000000..876f8d7
--- /dev/null
+++ b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target triple = "x86_64-apple-darwin"
+
+; In inliner, we assume that inline asm does not throw. This testing case makes
+; sure that the inliner does not convert "call asm" to "invoke asm".
+; rdar://15317907
+; CHECK-LABEL: @caller
+; Make sure we are generating "call asm" instead of "invoke asm".
+; CHECK: call void asm
+; CHECK-LABEL: @callee_with_asm
+define void @caller() {
+  br i1 undef, label %1, label %4
+
+; <label>:1
+  invoke void @callee_with_asm()
+          to label %4 unwind label %2
+
+; <label>:2
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } undef
+
+; <label>:4
+  ret void
+}
+
+define void @callee_with_asm() {
+  call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+  ret void
+}
+
+declare i32 @__objc_personality_v0(...)
diff --git a/test/Transforms/Inline/inline-optnone.ll b/test/Transforms/Inline/inline-optnone.ll
new file mode 100644
index 0000000..9b99c45
--- /dev/null
+++ b/test/Transforms/Inline/inline-optnone.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Test that functions with attribute optnone are not inlined.
+; Also test that only functions with attribute alwaysinline are
+; valid candidates for inlining if the caller has the optnone attribute.
+
+; Function Attrs: alwaysinline nounwind readnone uwtable
+define i32 @alwaysInlineFunction(i32 %a) #0 {
+entry:
+  %mul = mul i32 %a, %a
+  ret i32 %mul
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @simpleFunction(i32 %a) #1 {
+entry:
+  %add = add i32 %a, %a
+  ret i32 %add
+}
+
+; Function Attrs: nounwind noinline optnone readnone uwtable
+define i32 @OptnoneFunction(i32 %a) #2 {
+entry:
+  %0 = tail call i32 @alwaysInlineFunction(i32 %a)
+  %1 = tail call i32 @simpleFunction(i32 %a)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @OptnoneFunction
+; CHECK-NOT: call i32 @alwaysInlineFunction(i32 %a)
+; CHECK: call i32 @simpleFunction(i32 %a)
+; CHECK: ret
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a) #1 {
+entry:
+  %0 = tail call i32 @OptnoneFunction(i32 5)
+  %1 = tail call i32 @simpleFunction(i32 6)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @bar
+; CHECK: call i32 @OptnoneFunction(i32 5)
+; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: ret
+
+
+attributes #0 = { alwaysinline nounwind readnone uwtable }
+attributes #1 = { nounwind readnone uwtable }
+attributes #2 = { nounwind noinline optnone readnone uwtable }
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
index f316c91..678ee82 100644
--- a/test/Transforms/Inline/inline_returns_twice.ll
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -4,38 +4,81 @@
 ; if they are themselve marked as such.
 
 declare i32 @a() returns_twice
-declare i32 @b() returns_twice
 
-define i32 @f() {
+define i32 @inner1() {
 entry:
   %call = call i32 @a() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @g() {
+define i32 @outer1() {
 entry:
-; CHECK-LABEL: define i32 @g(
-; CHECK: call i32 @f()
-; CHECK-NOT: call i32 @a()
-  %call = call i32 @f()
+; CHECK-LABEL: define i32 @outer1(
+; CHECK: call i32 @inner1()
+  %call = call i32 @inner1()
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @h() returns_twice {
+define i32 @inner2() returns_twice {
 entry:
-  %call = call i32 @b() returns_twice
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @outer2() {
+entry:
+; CHECK-LABEL: define i32 @outer2(
+; CHECK: call i32 @a()
+  %call = call i32 @inner2() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @i() {
+define i32 @inner3() {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer3() {
+entry:
+; CHECK-LABEL: define i32 @outer3(
+; CHECK: call i32 @inner3()
+  %call = call i32 @inner3()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner4() returns_twice {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer4() {
 entry:
-; CHECK-LABEL: define i32 @i(
-; CHECK: call i32 @b()
-; CHECK-NOT: call i32 @h()
-  %call = call i32 @h() returns_twice
+; CHECK-LABEL: define i32 @outer4(
+; CHECK: invoke i32 @a()
+  %call = call i32 @inner4() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
diff --git a/test/Transforms/Inline/invoke-cost.ll b/test/Transforms/Inline/invoke-cost.ll
new file mode 100644
index 0000000..84d33ad
--- /dev/null
+++ b/test/Transforms/Inline/invoke-cost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+@glbl = external global i32
+
+declare void @f()
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare void @_ZSt9terminatev()
+
+define void @inner1() {
+entry:
+  invoke void @f() to label %cont1 unwind label %terminate.lpad
+
+cont1:
+  invoke void @f() to label %cont2 unwind label %terminate.lpad
+
+cont2:
+  invoke void @f() to label %cont3 unwind label %terminate.lpad
+
+cont3:
+  invoke void @f() to label %cont4 unwind label %terminate.lpad
+
+cont4:
+  ret void
+
+terminate.lpad:
+  landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+define void @outer1() {
+; CHECK-LABEL: @outer1(
+;
+; This call should not get inlined because inner1 actually calls a function
+; many times, but it only does so through invoke as opposed to call.
+;
+; CHECK: call void @inner1
+  call void @inner1()
+  ret void
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Inline/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
index 01b42da..af42bc7 100644
--- a/test/Transforms/Inline/ptr-diff.ll
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64"
 
 define i32 @outer1() {
 ; CHECK-LABEL: @outer1(
@@ -56,3 +56,46 @@ else:
   %t = load i32* %begin
   ret i32 %t
 }
+
+; The inttoptrs are free since it is a smaller integer to a larger
+; pointer size
+define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(1)*
+  %p2 = inttoptr i32 %b to i32 addrspace(1)*
+  %p3 = inttoptr i32 %c to i32 addrspace(1)*
+  %t1 = load i32 addrspace(1)* %p1
+  %t2 = load i32 addrspace(1)* %p2
+  %t3 = load i32 addrspace(1)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_free_cost_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_free_cost_user(
+; CHECK-NOT: call
+  %x = call i32 @inttoptr_free_cost(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
+; The inttoptrs have a cost since it is a larger integer to a smaller
+; pointer size
+define i32 @inttoptr_cost_smaller_ptr(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(2)*
+  %p2 = inttoptr i32 %b to i32 addrspace(2)*
+  %p3 = inttoptr i32 %c to i32 addrspace(2)*
+  %t1 = load i32 addrspace(2)* %p1
+  %t2 = load i32 addrspace(2)* %p2
+  %t3 = load i32 addrspace(2)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_cost_smaller_ptr_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_cost_smaller_ptr_user(
+; CHECK: call
+  %x = call i32 @inttoptr_cost_smaller_ptr(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
index d2b2b00..854ec60 100644
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -1,7 +1,8 @@
 ; Instcombine was missing a test that caused it to make illegal transformations
 ; sometimes.  In this case, it transforms the sub into an add:
-; RUN: opt < %s -instcombine -S | grep sub
-;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sub
+
 define i32 @test(i32 %i, i32 %j) {
         %A = mul i32 %i, %j
         %B = sub i32 2, %A
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
index 22574f7..49e55c6 100644
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: add
 
 define i32 @test(i32 %A) {
         %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index c02d33c..bb9a818 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,6 +1,7 @@
-; This testcase can be simplified by "realizing" that alloca can never return 
+; This testcase can be simplified by "realizing" that alloca can never return
 ; null.
-; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | FileCheck %s
+; CHECK-NOT: br
 
 declare i32 @bitmap_clear(...)
 
diff --git a/test/Transforms/InstCombine/2006-10-20-mask.ll b/test/Transforms/InstCombine/2006-10-20-mask.ll
index 0aaa5e8..e9797ae 100644
--- a/test/Transforms/InstCombine/2006-10-20-mask.ll
+++ b/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: and
 
 define i64 @foo(i64 %tmp, i64 %tmp2) {
         %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index d3ba1e2..8ab50e2 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep mul | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: mul
+; CHECK: mul
 
 define <4 x float> @test(<4 x float> %V) {
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index 2665791..272753c 100644
--- a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
index c161bcc..6b4e89d 100644
--- a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
+++ b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 
 define double @fold(i1 %a, double %b) {
 %s = select i1 %a, double 0., double 1.
diff --git a/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
index a88c510..d85ef97 100644
--- a/test/Transforms/InstCombine/2008-02-13-MulURem.ll
+++ b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep rem
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1933
 
+; CHECK: rem
+
 define i32 @fold(i32 %a) {
   %s = mul i32 %a, 3
   %c = urem i32 %s, 3
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index ed20690..31b1719 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "xor"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR2389
 
+; CHECK: xor
+
 define i1 @test(i1 %a, i1 %b) {
   %A = add i1 %a, %b
   ret i1 %A
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index 949fc59..e354311 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin9.6"
 
 define i32 @test(i32* %P) nounwind {
 entry:
-  %Q = bitcast i32* %P to i32 addrspace(1)*
+  %Q = addrspacecast i32* %P to i32 addrspace(1)*
   store i32 0, i32 addrspace(1)* %Q, align 4
   ret i32 0
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 6f3df5b..4d185bf 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: bitcast
+; CHECK: addrspacecast
 
 @base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
-  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  call void @foo(i32* getelementptr (i32* addrspacecast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
   ret void
 }
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
new file mode 100644
index 0000000..d908b55
--- /dev/null
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8* %alloca, i32 %i
+  %load = load i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index e33ee9f..4d22c2c 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
@@ -47,6 +47,27 @@ entry:
 	ret <16 x i8> %tmp
 }
 
+@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
 ; When a load or store lacks an explicit alignment, add one.
 
 ; CHECK-LABEL: @test2(
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 9a80ad9..ae1cfa1 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,7 +1,7 @@
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
 
 declare void @use(...)
 
@@ -110,3 +110,22 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...)* @use(i32* %x)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 504391a..e88fd59 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -42,3 +42,15 @@ define <4 x i32> @test5(<4 x i32> %A) {
   %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
   ret <4 x i32> %2
 }
+
+; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
+define i32 @test6(i64 %x) nounwind {
+; CHECK: @test6
+; CHECK-NEXT: add i64 %x, 1
+; CHECK-NEXT: icmp ugt i64 %x.off, 1
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  %land.ext = zext i1 %.cmp1 to i32
+  ret i32 %land.ext
+}
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index f2ea601..cf24a44 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,6 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: select
 
 
 define i41 @test1(i1 %C) {
@@ -37,7 +38,7 @@ define i41 @test5(i41 %X) {
 
 define i1023 @test6(i1023 %X) {
     ;; ((X & 27) ? 27 : 0)
-    %Y = and i1023 %X, 64 
+    %Y = and i1023 %X, 64
     %t = icmp ne i1023 %Y, 0
     %V = select i1 %t, i1023 64, i1023 0
     ret i1023 %V
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index 28b0e9a..ed812e1 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -48,3 +48,44 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK-LABEL: @test4(
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %A, i32 1
+  ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK-LABEL: @test5(
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %A, i32 1
+  ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 4ef8790..c7a520b 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -144,3 +144,13 @@ define <2 x i16> @BitcastInsert(i32 %a) {
 ; CHECK-LABEL: @BitcastInsert(
 ; CHECK: bitcast i32 %a to <2 x i16>
 }
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+; CHECK: @test7
+; CHECK: bitcast
+; CHECK: load
+}
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 55833fb..e68c0ad 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,7 +1,7 @@
 ; Ignore stderr, we expect warnings there
 ; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Simple case, argument translatable without changing the value
 declare void @test1a(i8*)
@@ -15,6 +15,28 @@ define void @test1(i32* %A) {
   ret void
 }
 
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
 ; More complex case, translate argument because of resolution.  This is safe
 ; because we have the body of the function
 define void @test2a(i8 %A) {
@@ -135,3 +157,122 @@ entry:
 ; CHECK: call i8* bitcast
 }
 
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 52ea7b9..cac0ec1 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -708,6 +708,19 @@ define %s @test68(%s *%p, i64 %i) {
 ; CHECK-NEXT: ret %s
 }
 
+define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test68_as1(
+  %o = mul i32 %i, 12
+  %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr inbounds i8 addrspace(1)* %q, i32 %o
+; CHECK-NEXT: getelementptr %s addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
+  %l = load %s addrspace(1)* %r
+; CHECK-NEXT: load %s addrspace(1)*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
 define double @test69(double *%p, i64 %i) {
 ; CHECK-LABEL: @test69(
   %o = shl nsw i64 %i, 3
@@ -890,6 +903,20 @@ define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-NEXT: ret double
 }
 
+define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
+; CHECK-LABEL: @test80_as1(
+  %tmp = mul nsw i16 %i, 8
+; CHECK-NEXT: sext i16 %i to i32
+  %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
+  %l = load double addrspace(1)* %r
+; CHECK-NEXT: load double addrspace(1)*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
 define double @test81(double *%p, float %f) {
   %i = fptosi float %f to i64
   %q = bitcast double* %p to i8*
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 7910ea3..23006a8 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -1,7 +1,7 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:32:32-p2:16:16"
 
 ; This shouldn't convert to getelementptr because the relationship
 ; between the arithmetic and the layout of allocated memory is
@@ -27,6 +27,26 @@ define i1 @test2(i8* %a, i8* %b) {
         ret i1 %r
 }
 
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_same_int(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_same_int(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i16
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i16
+  %r = icmp eq i16 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_larger(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i32
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i32
+  %r = icmp eq i32 %tmpa, %tmpb
+  ret i1 %r
+}
+
 ; These casts should also be folded away.
 ; CHECK-LABEL: @test3(
 ; CHECK: icmp eq i8* %a, @global
@@ -43,11 +63,20 @@ define i1 @test4(i32 %A) {
   ret i1 %C
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %C = icmp eq i32 %A, 0
-; CHECK-NEXT: ret i1 %C 
+; CHECK-NEXT: ret i1 %C
 }
 
+define i1 @test4_as2(i16 %A) {
+; CHECK-LABEL: @test4_as2(
+; CHECK-NEXT: %C = icmp eq i16 %A, 0
+; CHECK-NEXT: ret i1 %C
+  %B = inttoptr i16 %A to i8 addrspace(2)*
+  %C = icmp eq i8 addrspace(2)* %B, null
+  ret i1 %C
+}
 
-; Pulling the cast out of the load allows us to eliminate the load, and then 
+
+; Pulling the cast out of the load allows us to eliminate the load, and then
 ; the whole array.
 
         %op = type { float }
@@ -69,11 +98,11 @@ define %unop* @test5(%op* %O) {
 ; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
 ; the address space.
 
-define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
-entry: 
-  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+define i8 @test6(i8 addrspace(1)* %source) {
+entry:
+  %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
   %tmp4 = load i8* %arrayidx223
   ret i8 %tmp4
 ; CHECK-LABEL: @test6(
 ; CHECK: load i8* %arrayidx223
-} 
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index cdf95ab..62cd5b3 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -24,9 +24,9 @@
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test3(
 entry:
-; CHECK: xor i32 %a, %b
-; CHECK: lshr i32 %0, 31
-; CHECK: xor i32 %1, 1
+; CHECK: [[XOR1:%.*]] = xor i32 %a, %b
+; CHECK: [[SHIFT:%.*]] = lshr i32 [[XOR1]], 31
+; CHECK: [[XOR2:%.*]] = xor i32 [[SHIFT]], 1
         %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
         %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
         %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
         ret i32 %3
 ; CHECK-NOT: icmp
 ; CHECK-NOT: zext
-; CHECK: ret i32 %2
+; CHECK: ret i32 [[XOR2]]
 }
 
 ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
new file mode 100644
index 0000000..9f21d54
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -0,0 +1,232 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+@g = addrspace(3) global i32 89
+
+@const_zero_i8_as1 = addrspace(1) constant i8 0
+@const_zero_i32_as1 = addrspace(1) constant i32 0
+
+@const_zero_i8_as2 = addrspace(2) constant i8 0
+@const_zero_i32_as2 = addrspace(2) constant i32 0
+
+@const_zero_i8_as3 = addrspace(3) constant i8 0
+@const_zero_i32_as3 = addrspace(3) constant i32 0
+
+; Test constant folding of inttoptr (ptrtoint constantexpr)
+; The intermediate integer size is the same as the pointer size
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
+; CHECK-NEXT: ret i32 addrspace(3)* @const_zero_i32_as3
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+; The intermediate integer size is larger than the pointer size
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
+; CHECK-NEXT: ret i32 addrspace(2)* @const_zero_i32_as2
+  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; Different address spaces that are the same size, but they are
+; different so nothing should happen
+define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
+; CHECK-NEXT: ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to i32 addrspace(4)*
+  ret i32 addrspace(4)* %y
+}
+
+; Make sure we don't introduce a bitcast between different sized
+; address spaces when folding this
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
+; CHECK-NEXT: ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; The intermediate integer size is too small, nothing should happen
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+define i8 @const_fold_ptrtoint() {
+; CHECK-LABEL: @const_fold_ptrtoint(
+; CHECK-NEXT: ret i8 4
+  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+}
+
+; Test that mask happens when the destination pointer is smaller than
+; the original
+define i8 @const_fold_ptrtoint_mask() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask(
+; CHECK-NEXT: ret i8 1
+  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+}
+
+; Address space 0 is too small for the correct mask, should mask with
+; 64-bits instead of 32
+define i64 @const_fold_ptrtoint_mask_small_as0() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
+; CHECK: ret i64 -1
+  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+}
+
+define i32 addrspace(3)* @const_inttoptr() {
+; CHECK-LABEL: @const_inttoptr(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+  %p = inttoptr i16 4 to i32 addrspace(3)*
+  ret i32 addrspace(3)* %p
+}
+
+define i16 @const_ptrtoint() {
+; CHECK-LABEL: @const_ptrtoint(
+; CHECK-NEXT: ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+  %i = ptrtoint i32 addrspace(3)* @g to i16
+  ret i16 %i
+}
+
+define i16 @const_inttoptr_ptrtoint() {
+; CHECK-LABEL: @const_inttoptr_ptrtoint(
+; CHECK-NEXT: ret i16 9
+  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+}
+
+define i1 @constant_fold_cmp_constantexpr_inttoptr() {
+; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr_null(i16 %i) {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null_2() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint() {
+; CHECK-LABEL: @constant_fold_ptrtoint(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr() {
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+@g_float_as3 = addrspace(3) global float zeroinitializer
+@g_v4f_as3 = addrspace(3) global <4 x float> zeroinitializer
+
+define float @constant_fold_bitcast_ftoi_load() {
+; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
+; CHECK: load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  ret float %a
+}
+
+define i32 @constant_fold_bitcast_itof_load() {
+; CHECK-LABEL: @constant_fold_bitcast_itof_load(
+; CHECK: load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  ret i32 %a
+}
+
+define <4 x i32> @constant_fold_bitcast_vector_as() {
+; CHECK-LABEL: @constant_fold_bitcast_vector_as(
+; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
+; CHECK: bitcast <4 x float> %1 to <4 x i32>
+  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
+  ret <4 x i32> %a
+}
+
+@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
+
+define i32 @test_cast_gep_small_indices_as() {
+; CHECK-LABEL: @test_cast_gep_small_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+
+@constant_fold_global_ptr = addrspace(3) global %struct.foo {
+  float 0.0,
+  float 0.0,
+  [4 x i32] zeroinitializer,
+  i32 addrspace(3)* getelementptr ([10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+}
+
+define i32 @test_cast_gep_large_indices_as() {
+; CHECK-LABEL: @test_cast_gep_large_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+define i32 @test_constant_cast_gep_struct_indices_as() {
+; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+  %x = getelementptr %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@constant_data_as3 = addrspace(3) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+
+define i32 @test_read_data_from_global_as3() {
+; CHECK-LABEL: @test_read_data_from_global_as3(
+; CHECK-NEXT: ret i32 2
+  %x = getelementptr [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@a = addrspace(1) constant i32 9
+@b = addrspace(1) constant i32 23
+@c = addrspace(1) constant i32 34
+@d = addrspace(1) constant i32 99
+
+@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
+@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+
+define i32 @constant_through_array_as_ptrs() {
+; CHECK-LABEL: @constant_through_array_as_ptrs(
+; CHECK-NEXT: ret i32 34
+  %p = load i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32 addrspace(1)* %a, align 4
+  ret i32 %b
+}
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index 9f82e66..5fb5602 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Constant folding should fix notionally out-of-bounds indices
 ; and add inbounds keywords.
@@ -72,3 +72,21 @@ entry:
   ret i64 %E
   ; CHECK: ret i64 1000
 }
+
+@X_as1 = addrspace(1) global [1000 x i8] zeroinitializer, align 16
+
+define i16 @test2_as1() {
+; CHECK-LABEL: @test2_as1(
+  ; CHECK: ret i16 1000
+
+entry:
+  %A = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 1, i64 0) to i8 addrspace(1)*
+  %B = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 0, i64 0) to i8 addrspace(1)*
+
+  %B2 = ptrtoint i8 addrspace(1)* %B to i16
+  %C = sub i16 0, %B2
+  %D = getelementptr i8 addrspace(1)* %A, i16 %C
+  %E = ptrtoint i8 addrspace(1)* %D to i16
+
+  ret i16 %E
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index a76c353..2e3785f 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -12,15 +12,17 @@ define void @foo() nounwind ssp {
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
 !0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
 !6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 2f080bf..75082dc 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -2,7 +2,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare i8* @foo(i8*, i32, i64, i64) nounwind
 
@@ -23,18 +23,19 @@ entry:
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
   %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
 !2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -55,3 +56,4 @@ entry:
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
 !29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index e5448ee..5cacb59 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -263,6 +263,7 @@ define double @sin_test2(float %f) nounwind readnone {
    ret double %call
 ; CHECK: call double @sin(double %conv)
 }
+
 define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: sqrt_test
    %conv = fpext float %f to double
@@ -272,6 +273,15 @@ define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: call float @sqrtf(float %f)
 }
 
+define float @sqrt_int_test(float %f) nounwind readnone {
+; CHECK: sqrt_int_test
+   %conv = fpext float %f to double
+   %call = call double @llvm.sqrt.f64(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @llvm.sqrt.f32(float %f)
+}
+
 define double @sqrt_test2(float %f) nounwind readnone {
 ; CHECK: sqrt_test2
    %conv = fpext float %f to double
@@ -331,3 +341,6 @@ declare double @acos(double) nounwind readnone
 declare double @acosh(double) nounwind readnone
 declare double @asin(double) nounwind readnone
 declare double @asinh(double) nounwind readnone
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 6645d99..46bb605 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt  -instcombine -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
 define void @foo(i32) {
+; CHECK-LABEL: @foo(
+; CHECK: alloca
+; CHECK: align 16
 	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
 	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
 	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
@@ -11,8 +15,24 @@ define void @foo(i32) {
 	%7 = getelementptr { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
 	%8 = getelementptr [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
 	store i16 0, i16* %8, align 16
-        call void @bar(i16* %8)
+    call void @bar(i16* %8)
 	ret void
 }
 
 declare void @bar(i16*)
+
+define void @foo_as1(i32 %a, [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b) {
+; CHECK-LABEL: @foo_as1(
+; CHECK: align 16
+  %1 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b, i32 0, i32 0        ; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+  %2 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }> addrspace(1)* %1, i32 0, i32 0      ; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+  %3 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } addrspace(1)* %2, i32 0, i32 0        ; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+  %4 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } addrspace(1)* %3 to { [8 x i16] } addrspace(1)*     ; <{ [8 x i16] }*> [#uses=1]
+  %5 = getelementptr { [8 x i16] } addrspace(1)* %4, i32 0, i32 0     ; <[8 x i16]*> [#uses=1]
+  %6 = getelementptr [8 x i16] addrspace(1)* %5, i32 0, i32 0     ; <i16*> [#uses=1]
+  store i16 0, i16 addrspace(1)* %6, align 16
+  call void @bar_as1(i16 addrspace(1)* %6)
+  ret void
+}
+
+declare void @bar_as1(i16 addrspace(1)*)
diff --git a/test/Transforms/InstCombine/err-rep-cold.ll b/test/Transforms/InstCombine/err-rep-cold.ll
new file mode 100644
index 0000000..0cbafc4
--- /dev/null
+++ b/test/Transforms/InstCombine/err-rep-cold.ll
@@ -0,0 +1,77 @@
+; Test the static branch probability heuristics for error-reporting functions.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stdout = external global %struct._IO_FILE*
+@stderr = external global %struct._IO_FILE*
+@.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  br label %return
+
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stdout, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+; CHECK: attributes #[[AT1]] = { cold nounwind }
+; CHECK: attributes #[[AT2]] = { cold }
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index a9a7015..d8ba2a5 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -202,6 +202,18 @@ define float @fmul2(float %f1) {
 ; CHECK: fdiv fast float 1.200000e+07, %f1
 }
 
+; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
+@fmul2_external = external global float
+define float @fmul2_disable(float %f1) {
+  %div = fdiv fast float 1.000000e+00, %f1 
+  store float %div, float* @fmul2_external
+  %mul = fmul fast float %div, 2.000000e+00
+  ret float %mul
+; CHECK-LABEL: @fmul2_disable
+; CHECK: store
+; CHECK: fmul fast
+}
+
 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
 define float @fmul3(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 8f0b38f..1dec11d 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,7 +1,7 @@
 ; Test that the ffs* library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index cf57bed..402ee52 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -70,3 +70,26 @@ define float @test7(float %x, float %y) {
 ; CHECK-LABEL: @test7(
 ; CHECK: fsub float -0.000000e+00, %x
 }
+
+; Don't crash when attempting to cast a constant FMul to an instruction.
+define void @test8(i32* %inout) {
+entry:
+  %0 = load i32* %inout, align 4
+  %conv = uitofp i32 %0 to float
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
+  %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> zeroinitializer, %1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 2cb970b..b58d9dc 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: select
 
 define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
                  <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 09f0532..05d1b48 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -31,4 +31,16 @@ define half @test4(float %a) {
   ret half %c
 }
 
+; CHECK: test5
+define half @test5(float %a, float %b, float %c) {
+; CHECK: fcmp ogt
+; CHECK: fptrunc
+; CHECK: select
+; CHECK: half 0xH3C00
+  %d = fcmp ogt float %a, %b
+  %e = select i1 %d, float %c, float 1.0
+  %f = fptrunc float %e to half
+  ret half %f
+}
+
 declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 1b7c104..3f6a314 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the fprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index dfe12db..24c355d 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-pc-win32"
 define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
 ST:
   %A = getelementptr inbounds %myStruct addrspace(1)* %p, i64 0
-  %B = bitcast %myStruct addrspace(1)* %A to %myStruct*
+  %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct* %B, i32 0, i32 1
   %D = getelementptr inbounds [3 x float]* %C, i32 0, i32 2
   %E = load float* %D, align 4
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 90f144a..c29a7dc 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32"
+
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
 %struct.B = type { double }
@@ -8,15 +9,23 @@ target datalayout = "e-p:64:64"
 
 
 @Global = constant [10 x i8] c"helloworld"
+@Global_as1 = addrspace(1) constant [10 x i8] c"helloworld"
 
 ; Test noop elimination
 define i32* @test1(i32* %I) {
-        %A = getelementptr i32* %I, i64 0 
+        %A = getelementptr i32* %I, i64 0
         ret i32* %A
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32* %I
 }
 
+define i32 addrspace(1)* @test1_as1(i32 addrspace(1)* %I) {
+  %A = getelementptr i32 addrspace(1)* %I, i64 0
+  ret i32 addrspace(1)* %A
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i32 addrspace(1)* %I
+}
+
 ; Test noop elimination
 define i32* @test2(i32* %I) {
         %A = getelementptr i32* %I
@@ -36,7 +45,7 @@ define i32* @test3(i32* %I) {
 
 ; Test that two getelementptr insts fold
 define i32* @test4({ i32 }* %I) {
-        %A = getelementptr { i32 }* %I, i64 1 
+        %A = getelementptr { i32 }* %I, i64 1
         %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
 ; CHECK-LABEL: @test4(
@@ -45,17 +54,53 @@ define i32* @test4({ i32 }* %I) {
 
 define void @test5(i8 %B) {
         ; This should be turned into a constexpr instead of being an instruction
-        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4
         store i8 %B, i8* %A
         ret void
 ; CHECK-LABEL: @test5(
 ; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
+define void @test5_as1(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4
+        store i8 %B, i8 addrspace(1)* %A
+        ret void
+; CHECK-LABEL: @test5_as1(
+; CHECK: store i8 %B, i8 addrspace(1)* getelementptr inbounds ([10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4)
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT: store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0), align 8
+; CHECK-NEXT: ret void
+  %A = getelementptr %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT: store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2), align 4
+
+; CHECK-NEXT: ret void
+  %A = getelementptr [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
 
 define i32* @test7(i32* %I, i64 %C, i64 %D) {
-        %A = getelementptr i32* %I, i64 %C 
-        %B = getelementptr i32* %A, i64 %D 
+        %A = getelementptr i32* %I, i64 %C
+        %B = getelementptr i32* %A, i64 %D
         ret i32* %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %A.sum = add i64 %C, %D
@@ -64,8 +109,8 @@ define i32* @test7(i32* %I, i64 %C, i64 %D) {
 
 define i8* @test8([10 x i32]* %X) {
         ;; Fold into the cast.
-        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
-        %B = bitcast i32* %A to i8*     
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0
+        %B = bitcast i32* %A to i8*
         ret i8* %B
 ; CHECK-LABEL: @test8(
 ; CHECK: bitcast [10 x i32]* %X to i8*
@@ -73,7 +118,7 @@ define i8* @test8([10 x i32]* %X) {
 
 define i32 @test9() {
         %A = getelementptr { i32, double }* null, i32 0, i32 1
-        %B = ptrtoint double* %A to i32        
+        %B = ptrtoint double* %A to i32
         ret i32 %B
 ; CHECK-LABEL: @test9(
 ; CHECK: ret i32 8
@@ -83,15 +128,15 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
         %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
         %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
         ;; seteq x, y
-        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
         ret i1 %tmp.4
 ; CHECK-LABEL: @test10(
 ; CHECK: icmp eq { i32, i32 }* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
-        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
-        %Q = icmp eq i32* %P, null             
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0
+        %Q = icmp eq i32* %P, null
         ret i1 %Q
 ; CHECK-LABEL: @test11(
 ; CHECK: icmp eq { i32, i32 }* %X, null
@@ -105,11 +150,11 @@ entry:
   store i32 10, i32* %g3, align 4
 
   %g4 = getelementptr %struct.A* %a, i32 0, i32 0
-  
+
   %new_a = bitcast %struct.B* %g4 to %struct.A*
 
-  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
-  %a_a = load i32* %g5, align 4	
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1
+  %a_a = load i32* %g5, align 4
   ret i32 %a_a
 ; CHECK-LABEL:      @test12(
 ; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
@@ -129,8 +174,68 @@ define i1 @test13(i64 %X, %S* %P) {
 ; CHECK:    %C = icmp eq i64 %X, -1
 }
 
-
-@G = external global [3 x i8]      
+define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test13_vector(
+; CHECK-NEXT: shl nuw <2 x i64> %X, <i64 2, i64 2>
+; CHECK-NEXT: add <2 x i64> %A.idx, <i64 4, i64 4>
+; CHECK-NEXT: icmp eq <2 x i64> %A.offs, zeroinitializer
+  %A = getelementptr inbounds <2 x %S*> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
+  %B = getelementptr inbounds <2 x %S*> %P, <2 x i64> <i64 0, i64 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_as1(i16 %X, %S addrspace(1)* %P) {
+; CHECK-LABEL: @test13_as1(
+; CHECK-NEXT:  %C = icmp eq i16 %X, -1
+; CHECK-NEXT: ret i1 %C
+  %A = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 0
+  %C = icmp eq i32 addrspace(1)* %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x %S addrspace(1)*> %P) {
+; CHECK-LABEL: @test13_vector_as1(
+; CHECK-NEXT: shl nuw <2 x i16> %X, <i16 2, i16 2>
+; CHECK-NEXT: add <2 x i16> %A.idx, <i16 4, i16 4>
+; CHECK-NEXT: icmp eq <2 x i16> %A.offs, zeroinitializer
+; CHECK-NEXT: ret <2 x i1>
+  %A = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
+  %B = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32 addrspace(1)*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_i32(i32 %X, %S* %P) {
+; CHECK-LABEL: @test13_i32(
+; CHECK: %C = icmp eq i32 %X, -1
+  %A = getelementptr inbounds %S* %P, i32 0, i32 1, i32 %X
+  %B = getelementptr inbounds %S* %P, i32 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i16(i16 %X, %S* %P) {
+; CHECK-LABEL: @test13_i16(
+; CHECK: %C = icmp eq i16 %X, -1
+  %A = getelementptr inbounds %S* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S* %P, i16 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i128(i128 %X, %S* %P) {
+; CHECK-LABEL: @test13_i128(
+; CHECK: %C = icmp eq i64 %1, -1
+  %A = getelementptr inbounds %S* %P, i128 0, i32 1, i128 %X
+  %B = getelementptr inbounds %S* %P, i128 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+
+@G = external global [3 x i8]
 define i8* @test14(i32 %Idx) {
         %idx = zext i32 %Idx to i64
         %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
@@ -151,7 +256,7 @@ define i32 *@test15(i64 %X) {
 
 
 define i32* @test16(i32* %X, i32 %Idx) {
-        %R = getelementptr i32* %X, i32 %Idx       
+        %R = getelementptr i32* %X, i32 %Idx
         ret i32* %R
 ; CHECK-LABEL: @test16(
 ; CHECK: sext i32 %Idx to i64
@@ -164,7 +269,7 @@ define i1 @test17(i16* %P, i32 %I, i32 %J) {
         %C = icmp ult i16* %X, %Y
         ret i1 %C
 ; CHECK-LABEL: @test17(
-; CHECK: %C = icmp slt i32 %I, %J 
+; CHECK: %C = icmp slt i32 %I, %J
 }
 
 define i1 @test18(i16* %P, i32 %I) {
@@ -175,6 +280,55 @@ define i1 @test18(i16* %P, i32 %I) {
 ; CHECK: %C = icmp slt i32 %I, 0
 }
 
+; Larger than the pointer size for a non-zero address space
+define i1 @test18_as1(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than the pointer size for a non-zero address space
+define i1 @test18_as1_i32(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1_i32(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than pointer size
+define i1 @test18_i16(i16* %P, i16 %I) {
+; CHECK-LABEL: @test18_i16(
+; CHECK: %C = icmp slt i16 %I, 0
+  %X = getelementptr inbounds i16* %P, i16 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Same as pointer size
+define i1 @test18_i64(i16* %P, i64 %I) {
+; CHECK-LABEL: @test18_i64(
+; CHECK: %C = icmp slt i64 %I, 0
+  %X = getelementptr inbounds i16* %P, i64 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Larger than the pointer size
+define i1 @test18_i128(i16* %P, i128 %I) {
+; CHECK-LABEL: @test18_i128(
+; CHECK: %C = icmp slt i64 %1, 0
+  %X = getelementptr inbounds i16* %P, i128 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
 define i32 @test19(i32* %P, i32 %A, i32 %B) {
         %tmp.4 = getelementptr inbounds i32* %P, i32 %A
         %tmp.9 = getelementptr inbounds i32* %P, i32 %B
@@ -194,6 +348,15 @@ define i32 @test20(i32* %P, i32 %A, i32 %B) {
 ; CHECK: icmp eq i32 %A, 0
 }
 
+define i32 @test20_as1(i32 addrspace(1)* %P, i32 %A, i32 %B) {
+  %tmp.4 = getelementptr inbounds i32 addrspace(1)* %P, i32 %A
+  %tmp.6 = icmp eq i32 addrspace(1)* %tmp.4, %P
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+; CHECK-LABEL: @test20_as1(
+; CHECK: icmp eq i16 %1, 0
+}
+
 
 define i32 @test21() {
         %pbob1 = alloca %intstruct
@@ -210,8 +373,8 @@ define i32 @test21() {
 @B = global i32 2               ; <i32*> [#uses=1]
 
 define i1 @test22() {
-        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
-                           getelementptr (i32* @B, i64 2) 
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1),
+                           getelementptr (i32* @B, i64 2)
         ret i1 %C
 ; CHECK-LABEL: @test22(
 ; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
@@ -262,15 +425,15 @@ define i1 @test26(i8* %arr) {
 
 define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
 entry:
-	%from_addr = alloca %struct.siginfo_t*	
-	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%from_addr = alloca %struct.siginfo_t*
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8
 	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
 	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
-	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
 	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
 	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
 	%tmp349350 = bitcast i8** %tmp349 to i32*
-	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp351 = load i32* %tmp349350, align 8
 	%tmp360 = call i32 asm sideeffect "...",
         "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
          %struct.__large_struct* null, i32 -14, i32 0 )
@@ -280,28 +443,28 @@ entry:
 
 ; PR1978
 	%struct.x = type <{ i8 }>
-@.str = internal constant [6 x i8] c"Main!\00"	
-@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+@.str = internal constant [6 x i8] c"Main!\00"
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"
 
 define i32 @test28() nounwind  {
 entry:
 	%orientations = alloca [1 x [1 x %struct.x]]
-	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind
 	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
 	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
 	br label %bb10
 
 bb10:
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
-	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
-	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
 	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
 	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
 	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
 	%indvar.next = add i32 %indvar, 1
 	br i1 %tmp84, label %bb17, label %bb10
 
-bb17:	
+bb17:
 	ret i32 0
 ; CHECK-LABEL: @test28(
 ; CHECK: icmp eq i32 %indvar, 0
@@ -318,7 +481,7 @@ declare i32 @printf(i8*, ...)
 	%T = type <{ i64, i64, i64 }>
 define i32 @test29(i8* %start, i32 %X) nounwind {
 entry:
-	%tmp3 = load i64* null		
+	%tmp3 = load i64* null
 	%add.ptr = getelementptr i8* %start, i64 %tmp3
 	%tmp158 = load i32* null
 	%add.ptr159 = getelementptr %T* null, i32 %tmp158
@@ -356,7 +519,7 @@ declare void @test30f(i32*)
 define i1 @test31(i32* %A) {
         %B = getelementptr i32* %A, i32 1
         %C = getelementptr i32* %A, i64 1
-        %V = icmp eq i32* %B, %C 
+        %V = icmp eq i32* %B, %C
         ret i1 %V
 ; CHECK-LABEL: @test31(
 ; CHECK: ret i1 true
@@ -372,7 +535,7 @@ define i8* @test32(i8* %v) {
 	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
 	%E = bitcast i8* %D to i8**
 	store i8* %v, i8** %E
-	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2
 	%G = load i8** %F
 	ret i8* %G
 ; CHECK-LABEL: @test32(
@@ -384,23 +547,46 @@ define i8* @test32(i8* %v) {
 %struct.Key = type { { i32, i32 } }
 %struct.anon = type <{ i8, [3 x i8], i32 }>
 
-define i32 *@test33(%struct.Key *%A) {
-	%B = bitcast %struct.Key* %A to %struct.anon*
-        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
-	ret i32 *%C
+define i32* @test33(%struct.Key* %A) {
 ; CHECK-LABEL: @test33(
 ; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+  %B = bitcast %struct.Key* %A to %struct.anon*
+  %C = getelementptr %struct.anon* %B, i32 0, i32 2
+  ret i32* %C
 }
 
+define i32 addrspace(1)* @test33_as1(%struct.Key addrspace(1)* %A) {
+; CHECK-LABEL: @test33_as1(
+; CHECK: getelementptr %struct.Key addrspace(1)* %A, i16 0, i32 0, i32 1
+  %B = bitcast %struct.Key addrspace(1)* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
+define i32 addrspace(1)* @test33_array_as1([10 x i32] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_as1(
+; CHECK: getelementptr [10 x i32] addrspace(1)* %A, i16 0, i16 2
+  %B = bitcast [10 x i32] addrspace(1)* %A to [5 x i32] addrspace(1)*
+  %C = getelementptr [5 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+; Make sure the GEP indices use the right pointer sized integer
+define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_struct_as1(
+; CHECK: getelementptr [10 x %struct.Key] addrspace(1)* %A, i16 0, i16 1, i32 0, i32 0
+  %B = bitcast [10 x %struct.Key] addrspace(1)* %A to [20 x i32] addrspace(1)*
+  %C = getelementptr [20 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
-	%A = alloca %T2, align 8	
+	%A = alloca %T2, align 8
 	%mrv_gep = bitcast %T2* %A to i64*
 	%B = getelementptr %T2* %A, i64 0, i32 0
-        
+
       	store i64 %V, i64* %mrv_gep
 	%C = load i8** %B, align 8
 	ret i8* %C
@@ -519,4 +705,88 @@ define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
 ; CHECK-NEXT: icmp ult  [1 x i8]* %a, %b
 }
 
+define i8 @test_gep_bitcast_as1(i32 addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_as1(
+; CHECK: getelementptr i32 addrspace(1)* %arr, i16 %N
+; CHECK: bitcast
+  %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 4
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+; The element size of the array matches the element size of the pointer
+define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i64*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64* %cast, i64 %V
+  %x = load i64* %t
+  ret i64 %x
+}
+
+; The element size of the array is different the element size of the pointer
+define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i8*
+  %V = mul i64 %N, 8
+  %t = getelementptr i8* %cast, i64 %V
+  %x = load i8* %t
+  ret i8 %x
+}
+
+define i64 @test_gep_bitcast_array_same_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i64 addrspace(1)* %cast, i16 %V
+  %x = load i64 addrspace(1)* %t
+  ret i64 %x
+}
+
+define i8 @test_gep_bitcast_array_different_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+define i64 @test40() {
+  %array = alloca [3 x i32], align 4
+  %gep = getelementptr inbounds [3 x i32]* %array, i64 0, i64 2
+  %gepi8 = bitcast i32* %gep to i8*
+  %p = ptrtoint [3 x i32]* %array to i64
+  %np = sub i64 0, %p
+  %gep2 = getelementptr i8* %gepi8, i64 %np
+  %ret = ptrtoint i8* %gep2 to i64
+  ret i64 %ret
+
+; CHECK-LABEL: @test40
+; CHECK-NEXT: ret i64 8
+}
+
+define i16 @test41([3 x i32] addrspace(1)* %array) {
+  %gep = getelementptr inbounds [3 x i32] addrspace(1)* %array, i16 0, i16 2
+  %gepi8 = bitcast i32 addrspace(1)* %gep to i8 addrspace(1)*
+  %p = ptrtoint [3 x i32] addrspace(1)* %array to i16
+  %np = sub i16 0, %p
+  %gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np
+  %ret = ptrtoint i8 addrspace(1)* %gep2 to i16
+  ret i16 %ret
+
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i16 8
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
new file mode 100644
index 0000000..d5d8cbc
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+define i1 @masked_and_notallzeroes(i32 %A) {
+; CHECK-LABEL: @masked_and_notallzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 0
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notallones(i32 %A) {
+; CHECK-LABEL: @masked_and_notallones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 39
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allones(i32 %A) {
+; CHECK-LABEL: @masked_or_allones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 39
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA(i32 %A) {
+; CHECK-LABEL: @masked_and_notA
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp ne i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, %A
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A(i32 %A) {
+; CHECK-LABEL: @masked_or_A
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, %A
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised
+; CHECK: [[MASK:%.*]] = and i32 %A, 15
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 15
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @nomask_lhs(i32 %in) {
+; CHECK-LABEL: @nomask_lhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %tst1 = icmp eq i32 %in, 0
+
+  %masked = and i32 %in, 1
+  %tst2 = icmp eq i32 %masked, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+
+define i1 @nomask_rhs(i32 %in) {
+; CHECK-LABEL: @nomask_rhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %masked = and i32 %in, 1
+  %tst1 = icmp eq i32 %masked, 0
+
+  %tst2 = icmp eq i32 %in, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index dfeac67..12a4744 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout =
-"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
@@ -79,7 +79,7 @@ entry:
 
 define i1 @test8(i32 %x){
 entry:
-  %a = add i32 %x, -1 
+  %a = add i32 %x, -1
   %b = icmp eq i32 %a, %x
   ret i1 %b
 ; CHECK-LABEL: @test8(
@@ -89,7 +89,7 @@ entry:
 define i1 @test9(i32 %x)  {
 entry:
   %a = add i32 %x, -2
-  %b = icmp ugt i32 %x, %a 
+  %b = icmp ugt i32 %x, %a
   ret i1 %b
 ; CHECK-LABEL: @test9(
 ; CHECK: icmp ugt i32 %x, 1
@@ -98,10 +98,9 @@ entry:
 
 define i1 @test10(i32 %x){
 entry:
-  %a = add i32 %x, -1      
-  %b = icmp slt i32 %a, %x 
+  %a = add i32 %x, -1
+  %b = icmp slt i32 %a, %x
   ret i1 %b
-  
 ; CHECK-LABEL: @test10(
 ; CHECK: %b = icmp ne i32 %x, -2147483648
 ; CHECK: ret i1 %b
@@ -234,6 +233,18 @@ define i1 @test24(i64 %i) {
   ret i1 %cmp
 }
 
+@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
+
+; CHECK: @test24_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: %cmp = icmp eq i16 %1, 1000
+; CHECK: ret i1 %cmp
+define i1 @test24_as1(i64 %i) {
+  %p1 = getelementptr inbounds i32 addrspace(1)* getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
+  ret i1 %cmp
+}
+
 ; CHECK-LABEL: @test25(
 ; X + Z > Y + Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %x, %y
@@ -473,7 +484,7 @@ define <2 x i1> @test49(<2 x i32> %tmp3) {
 entry:
   %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
   %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
-  ret <2 x i1> %cmp  
+  ret <2 x i1> %cmp
 }
 
 ; PR9343 #7
@@ -512,12 +523,12 @@ define i1 @test52(i32 %x1) nounwind {
 
 ; PR9838
 ; CHECK-LABEL: @test53(
-; CHECK-NEXT: ashr exact
-; CHECK-NEXT: ashr
+; CHECK-NEXT: sdiv exact
+; CHECK-NEXT: sdiv
 ; CHECK-NEXT: icmp
 define i1 @test53(i32 %a, i32 %b) nounwind {
- %x = ashr exact i32 %a, 30
- %y = ashr i32 %b, 30
+ %x = sdiv exact i32 %a, 30
+ %y = sdiv i32 %b, 30
  %z = icmp eq i32 %x, %y
  ret i1 %z
 }
@@ -603,6 +614,21 @@ define i1 @test59(i8* %foo) {
 ; CHECK: ret i1 true
 }
 
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59_as1
+; CHECK: %[[GEP:.+]] = getelementptr inbounds i8 addrspace(1)* %foo, i16 8
+; CHECK: ptrtoint i8 addrspace(1)* %[[GEP]] to i16
+; CHECK: ret i1 true
+}
+
 define i1 @test60(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr inbounds i32* %bit, i64 %i
@@ -616,6 +642,21 @@ define i1 @test60(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: trunc i64 %j to i16
+; CHECK: %gep1.idx = shl nuw i16 %{{.+}}, 2
+; CHECK-NEXT: icmp sgt i16 %{{.+}}, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
@@ -629,6 +670,19 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61_as1
+; CHECK: icmp ult i8 addrspace(1)* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test62(i8* %a) {
   %arrayidx1 = getelementptr inbounds i8* %a, i64 1
   %arrayidx2 = getelementptr inbounds i8* %a, i64 10
@@ -638,6 +692,15 @@ define i1 @test62(i8* %a) {
 ; CHECK-NEXT: ret i1 true
 }
 
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT: ret i1 true
+  %arrayidx1 = getelementptr inbounds i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
 define i1 @test63(i8 %a, i32 %b) nounwind {
   %z = zext i8 %a to i32
   %t = and i32 %b, 255
@@ -999,6 +1062,15 @@ define i1 @test71(i8* %x) {
   ret i1 %c
 }
 
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT: ret i1 false
+  %a = getelementptr i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
 ; CHECK-LABEL: @icmp_shl_1_V_ult_32(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1199,3 +1271,88 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
   %cmp = icmp uge i32 %sub, 4
   ret i1 %cmp
 }
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %sub = sub i32 %X, %Y
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %shift = lshr i32 %sub, 4
+  %resfalse = trunc i32 %shift to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse2
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %sub1 = sub i32 %X, %Y
+  %add = add i32 %sub, %sub1
+  %restrue = trunc i32 %add to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_lshr_lshr_eq
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ult i32 %z.unshifted, 1073741824
+define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) nounwind {
+ %x = lshr i32 %a, 30
+ %y = lshr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
+}
+
+; CHECK-LABEL: @icmp_ashr_ashr_ne
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ugt i32 %z.unshifted, 255
+define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
+ %x = ashr i32 %a, 8
+ %y = ashr i32 %b, 8
+ %z = icmp ne i32 %x, %y
+ ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/InstCombine/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 95dc48c..9810026 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,18 +1,75 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck -check-prefix=NODL %s
+; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck -check-prefix=P32 %s
 
-@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
 @GD = internal constant [6 x double]
    [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
+%Foo = type { i32, i32, i32, i32 }
+
+@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
+
+@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
+                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
+                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
+                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+
+
 define i1 @test1(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test1(
+; NODL-NEXT: %R = icmp eq i32 %X, 9
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test1(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+
+; P32-LABEL: @test1_noinbounds(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_i64(i64 %X) {
+  %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds_i64(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+
+; P32-LABEL: @test1_noinbounds_i64(
+; P32: %R = icmp eq i32 %1, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_as1(i32 %x) {
+  %p = getelementptr [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
+  %q = load i16 addrspace(1)* %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+; P32-LABEL: @test1_noinbounds_as1(
+; P32-NEXT: trunc i32 %x to i16
+; P32-NEXT: %r = icmp eq i16 %1, 9
+; P32-NEXT: ret i1 %r
 }
 
 define i1 @test2(i32 %X) {
@@ -20,9 +77,9 @@ define i1 @test2(i32 %X) {
   %Q = load i16* %P
   %R = icmp slt i16 %Q, 85
   ret i1 %R
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: %R = icmp ne i32 %X, 4
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test2(
+; NODL-NEXT: %R = icmp ne i32 %X, 4
+; NODL-NEXT: ret i1 %R
 }
 
 define i1 @test3(i32 %X) {
@@ -30,9 +87,14 @@ define i1 @test3(i32 %X) {
   %Q = load double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %R = icmp eq i32 %X, 1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test3(
+; NODL-NEXT: %R = icmp eq i32 %X, 1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test3(
+; P32-NEXT: %R = icmp eq i32 %X, 1
+; P32-NEXT: ret i1 %R
+
 }
 
 define i1 @test4(i32 %X) {
@@ -40,11 +102,17 @@ define i1 @test4(i32 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: lshr i32 933, %X
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test4(
+; NODL-NEXT: lshr i32 933, %X
+; NODL-NEXT: and i32 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i32 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4(
+; P32-NEXT: lshr i32 933, %X
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test4_i16(i16 %X) {
@@ -52,11 +120,19 @@ define i1 @test4_i16(i16 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4_i16(
-; CHECK-NEXT: lshr i16 933, %X
-; CHECK-NEXT: and i16 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+
+; NODL-LABEL: @test4_i16(
+; NODL-NEXT: lshr i16 933, %X
+; NODL-NEXT: and i16 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i16 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4_i16(
+; P32-NEXT: sext i16 %X to i32
+; P32-NEXT: lshr i32 933, %1
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test5(i32 %X) {
@@ -64,11 +140,17 @@ define i1 @test5(i32 %X) {
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 69
   ret i1 %R
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: icmp eq i32 %X, 2
-; CHECK-NEXT: icmp eq i32 %X, 7
-; CHECK-NEXT: %R = or i1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test5(
+; NODL-NEXT: icmp eq i32 %X, 2
+; NODL-NEXT: icmp eq i32 %X, 7
+; NODL-NEXT: %R = or i1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test5(
+; P32-NEXT: icmp eq i32 %X, 2
+; P32-NEXT: icmp eq i32 %X, 7
+; P32-NEXT: %R = or i1
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test6(i32 %X) {
@@ -76,10 +158,15 @@ define i1 @test6(i32 %X) {
   %Q = load double* %P
   %R = fcmp ogt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test6(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ult i32 {{.*}}, 3
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test6(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ult i32 {{.*}}, 3
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test7(i32 %X) {
@@ -87,10 +174,15 @@ define i1 @test7(i32 %X) {
   %Q = load double* %P
   %R = fcmp olt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test7(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test7(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test8(i32 %X) {
@@ -99,10 +191,15 @@ define i1 @test8(i32 %X) {
   %R = and i16 %Q, 3
   %S = icmp eq i16 %R, 0
   ret i1 %S
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test8(
+; NODL-NEXT: and i32 %X, -2
+; NODL-NEXT: icmp eq i32 {{.*}}, 8
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test8(
+; P32-NEXT: and i32 %X, -2
+; P32-NEXT: icmp eq i32 {{.*}}, 8
+; P32-NEXT: ret i1
 }
 
 @GA = internal constant [4 x { i32, i32 } ] [
@@ -117,8 +214,161 @@ define i1 @test9(i32 %X) {
   %Q = load i32* %P
   %R = icmp eq i32 %Q, 1
   ret i1 %R
-; CHECK-LABEL: @test9(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test9(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: icmp ult i32 {{.*}}, 2
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test9(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: icmp ult i32 {{.*}}, 2
+; P32-NEXT: ret i1
+}
+
+define i1 @test10_struct(i32 %x) {
+; NODL-LABEL: @test10_struct(
+; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct(
+; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_noinbounds(
+; NODL: getelementptr %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds(
+; P32: getelementptr %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index < ptr size
+define i1 @test10_struct_i16(i16 %x){
+; NODL-LABEL: @test10_struct_i16(
+; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index > ptr size
+define i1 @test10_struct_i64(i64 %x){
+; NODL-LABEL: @test10_struct_i64(
+; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+
+; P32-LABEL: @test10_struct_i64(
+; P32: %1 = trunc i64 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_noinbounds_i16(
+; NODL: getelementptr %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr(i32 %x) {
+; NODL-LABEL: @test10_struct_arr(
+; NODL-NEXT: %r = icmp ne i32 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr(
+; P32-NEXT: %r = icmp ne i32 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds(
+; NODL-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds(
+; P32-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_i16(
+; NODL-NEXT: %r = icmp ne i16 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i64(i64 %x) {
+; NODL-LABEL: @test10_struct_arr_i64(
+; NODL-NEXT: %r = icmp ne i64 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i64(
+; P32-NEXT: trunc i64 %x to i32
+; P32-NEXT: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds_i16(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
+; FIXME: Should be no trunc?
+; NODL-LABEL: @test10_struct_arr_noinbounds_i64(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i64(
+; P32: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
 }
diff --git a/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
new file mode 100644
index 0000000..2d88bed
--- /dev/null
+++ b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -0,0 +1,112 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+
+define i32 @test_as0(i32 addrspace(0)* %a) {
+; CHECK-LABEL: @test_as0(
+; CHECK: %arrayidx = getelementptr i32* %a, i32 1
+  %arrayidx = getelementptr i32 addrspace(0)* %a, i64 1
+  %y = load i32 addrspace(0)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %a) {
+; CHECK-LABEL: @test_as1(
+; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %a, i64 1
+  %arrayidx = getelementptr i32 addrspace(1)* %a, i32 1
+  %y = load i32 addrspace(1)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_as2(
+; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %a, i8 1
+  %arrayidx = getelementptr i32 addrspace(2)* %a, i32 1
+  %y = load i32 addrspace(2)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %a) {
+; CHECK-LABEL: @test_as3(
+; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %a, i16 1
+  %arrayidx = getelementptr i32 addrspace(3)* %a, i32 1
+  %y = load i32 addrspace(3)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_combine_ptrtoint(
+; CHECK-NEXT: %y = load i32 addrspace(2)* %a
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint i32 addrspace(2)* %a to i8
+  %castback = inttoptr i8 %cast to i32 addrspace(2)*
+  %y = load i32 addrspace(2)* %castback, align 4
+  ret i32 %y
+}
+
+define i8 @test_combine_inttoptr(i8 %a) {
+; CHECK-LABEL: @test_combine_inttoptr(
+; CHECK-NEXT: ret i8 %a
+  %cast = inttoptr i8 %a to i32 addrspace(2)*
+  %castback = ptrtoint i32 addrspace(2)* %cast to i8
+  ret i8 %castback
+}
+
+define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
+; CHECK-LABEL: @test_combine_vector_ptrtoint(
+; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
+; CHECK-NEXT: %y = load i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
+  %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
+  %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
+  %y = load i32 addrspace(2)* %p, align 4
+  ret i32 %y
+}
+
+define <2 x i8> @test_combine_vector_inttoptr(<2 x i8> %a) {
+; CHECK-LABEL: @test_combine_vector_inttoptr(
+; CHECK-NEXT: ret <2 x i8> %a
+  %cast = inttoptr <2 x i8> %a to <2 x i32 addrspace(2)*>
+  %castback = ptrtoint <2 x i32 addrspace(2)*> %cast to <2 x i8>
+  ret <2 x i8> %castback
+}
+
+; Check that the GEP index is changed to the address space integer type (i64 -> i8)
+define i32 addrspace(2)* @shrink_gep_constant_index_64_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_constant_index_32_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_32_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i32 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(3)* @shrink_gep_constant_index_64_as3(i32 addrspace(3)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as3(
+; CHECK-NEXT: getelementptr i32 addrspace(3)* %p, i16 1
+  %ret = getelementptr i32 addrspace(3)* %p, i64 1
+  ret i32 addrspace(3)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_variable_index_64_as2(i32 addrspace(2)* %p, i64 %idx) {
+; CHECK-LABEL: @shrink_gep_variable_index_64_as2(
+; CHECK-NEXT: %1 = trunc i64 %idx to i8
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 %1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 %idx
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(1)* @grow_gep_variable_index_8_as1(i32 addrspace(1)* %p, i8 %idx) {
+; CHECK-LABEL: @grow_gep_variable_index_8_as1(
+; CHECK-NEXT: %1 = sext i8 %idx to i64
+; CHECK-NEXT: getelementptr i32 addrspace(1)* %p, i64 %1
+  %ret = getelementptr i32 addrspace(1)* %p, i8 %idx
+  ret i32 addrspace(1)* %ret
+}
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
new file mode 100644
index 0000000..9cb6884
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+declare i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+
+@array_as2 = private addrspace(2) global [60 x i8] zeroinitializer, align 4
+
+@array_as1_pointers = private global [10 x i32 addrspace(1)*] zeroinitializer, align 4
+@array_as2_pointers = private global [24 x i32 addrspace(2)*] zeroinitializer, align 4
+@array_as3_pointers = private global [42 x i32 addrspace(3)*] zeroinitializer, align 4
+
+@array_as2_as1_pointer_pointers = private global [16 x i32 addrspace(2)* addrspace(1)*] zeroinitializer, align 4
+
+
+@a_as3 = private addrspace(3) global [60 x i8] zeroinitializer, align 1
+
+define i32 @foo_as3() nounwind {
+; CHECK-LABEL: @foo_as3(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i16 @foo_as3_i16() nounwind {
+; CHECK-LABEL: @foo_as3_i16(
+; CHECK-NEXT: ret i16 60
+  %1 = call i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i16 %1
+}
+
+@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+define i32 @foo_alias() nounwind {
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i32 @array_as2_size() {
+; CHECK-LABEL: @array_as2_size(
+; CHECK-NEXT: ret i32 60
+  %bc = bitcast [60 x i8] addrspace(2)* @array_as2 to i8 addrspace(2)*
+  %1 = call i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as1() {
+; CHECK-LABEL: @pointer_array_as1(
+; CHECK-NEXT: ret i32 80
+  %bc = addrspacecast [10 x i32 addrspace(1)*]* @array_as1_pointers to i8 addrspace(1)*
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as2() {
+; CHECK-LABEL: @pointer_array_as2(
+; CHECK-NEXT: ret i32 24
+  %bc = bitcast [24 x i32 addrspace(2)*]* @array_as2_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as3() {
+; CHECK-LABEL: @pointer_array_as3(
+; CHECK-NEXT: ret i32 84
+  %bc = bitcast [42 x i32 addrspace(3)*]* @array_as3_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_pointer_array_as2_as1() {
+; CHECK-LABEL: @pointer_pointer_array_as2_as1(
+; CHECK-NEXT: ret i32 128
+  %bc = bitcast [16 x i32 addrspace(2)* addrspace(1)*]* @array_as2_as1_pointer_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index b5351e9..6459032 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -5,11 +5,10 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
 @.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
-
 define i32 @foo() nounwind {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i32 60
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -17,7 +16,7 @@ define i8* @bar() nounwind {
 ; CHECK-LABEL: @bar(
 entry:
   %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   %cmp = icmp ne i32 %0, -1
 ; CHECK: br i1 true
   br i1 %cmp, label %cond.true, label %cond.false
@@ -34,7 +33,7 @@ cond.false:
 define i32 @f() nounwind {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT: ret i32 0
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -43,16 +42,16 @@ define i32 @f() nounwind {
 define i1 @baz() nounwind {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT: objectsize
-  %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
 
 define void @test1(i8* %q, i32 %x) nounwind noinline {
 ; CHECK-LABEL: @test1(
-; CHECK: objectsize.i32
+; CHECK: objectsize.i32.p0i8
 entry:
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -68,7 +67,7 @@ entry:
 define i32 @test2() nounwind {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 34
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
   ret i32 %1
 }
 
@@ -77,7 +76,7 @@ define i32 @test2() nounwind {
 
 declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
 
@@ -89,7 +88,7 @@ entry:
 bb11:
   %0 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
   %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) ; <i32> [#uses=1]
   %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
 ; CHECK: unreachable
   unreachable
@@ -111,7 +110,7 @@ define i32 @test4(i8** %esc) nounwind ssp {
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
@@ -126,7 +125,7 @@ define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
@@ -138,7 +137,7 @@ define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
@@ -155,7 +154,7 @@ define i32 @test7(i8** %esc) {
   %alloc = call noalias i8* @malloc(i32 48) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 32
   ret i32 %objsize
 }
@@ -167,7 +166,7 @@ define i32 @test8(i8** %esc) {
   %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
@@ -179,7 +178,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 define i32 @test9(i8** %esc) {
   %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -188,7 +187,7 @@ define i32 @test9(i8** %esc) {
 define i32 @test10(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 4
   ret i32 %1
 }
@@ -197,7 +196,7 @@ define i32 @test10(i8** %esc) {
 define i32 @test11(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -206,7 +205,7 @@ define i32 @test11(i8** %esc) {
 define i32 @test12(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -215,7 +214,7 @@ define i32 @test12(i8** %esc) {
 define i32 @test13(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -229,8 +228,8 @@ entry:
 xpto:
   %select = select i1 %bool, i8* %select, i8* %a
   %select2 = select i1 %bool, i8* %a, i8* %select2
-  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
   %2 = add i32 %0, %1
 ; CHECK: ret i32 undef
   ret i32 %2
@@ -249,7 +248,7 @@ entry:
 xpto:
   %gep2 = getelementptr i8* %gep, i32 1
   %gep = getelementptr i8* %gep2, i32 1
-  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
 ; CHECK: ret i32 undef
   ret i32 %o
 
@@ -263,7 +262,7 @@ return:
 ; CHECK-NEXT: ret i32 60
 define i32 @test18() {
   %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
 
@@ -273,6 +272,7 @@ define i32 @test18() {
 ; CHECK: llvm.objectsize
 define i32 @test19() {
   %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
+
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
new file mode 100644
index 0000000..51f955c
--- /dev/null
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;CHECK: @and_consts
+;CHECK: and i32 %k, 12
+;CHECK: icmp ne i32 %0, 12
+;CHECK: ret
+define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+;CHECK: @foo1_and
+;CHECK:  shl i32 1, %c1
+;CHECK-NEXT:  shl i32 1, %c2
+;CHECK-NEXT:  or i32
+;CHECK-NEXT:  and i32
+;CHECK-NEXT:  icmp ne i32 %1, %0
+;CHECK: ret
+define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
diff --git a/test/Transforms/InstCombine/phi-select-constexpr.ll b/test/Transforms/InstCombine/phi-select-constexpr.ll
new file mode 100644
index 0000000..054e069
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-select-constexpr.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+; CHECK-LABEL: final:
+; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index 0fdafeb..9f1d073 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -151,4 +151,17 @@ define double @test_simplify16(double %x) {
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
 
+declare double @llvm.pow.f64(double %Val, double %Power)
+define double @test_simplify17(double %x) {
+; CHECK-LABEL: @test_simplify17(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
 ; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/pow-3.ll b/test/Transforms/InstCombine/pow-3.ll
new file mode 100644
index 0000000..1c5cf91
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-3.ll
@@ -0,0 +1,12 @@
+; Test that the pow won't get simplified to sqrt(fabs) when they are not available.
+;
+; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s
+
+declare double @llvm.pow.f64(double %Val, double %Power)
+
+define double @test_simplify_unavailable(double %x) {
+; CHECK-LABEL: @test_simplify_unavailable(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: call double @llvm.pow.f64(double %x, double 5.000000e-01)
+  ret double %retval
+}
diff --git a/test/Transforms/InstCombine/pr17827.ll b/test/Transforms/InstCombine/pr17827.ll
new file mode 100644
index 0000000..a8b5926
--- /dev/null
+++ b/test/Transforms/InstCombine/pr17827.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
+entry:
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK:  %andp = shl i8 %p, 5
+; CHECK-NEXT: %shl = and i8 %andp, -64
+; CHECK-NEXT:  %cmp = icmp slt i8 %shl, 32
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
+entry:
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK: icmp eq i8 %andp, 0
+define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
+entry:
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK: ret i1 true
+define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
+entry:
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 59d0f16..c98ddd5 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the printf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index a76addc..5b9deb4 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -instcombine -S | grep select | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: select
+; CHECK: select
 
 ; Make sure instcombine don't fold select into operands. We don't want to emit
 ; select of two integers unless it's selecting 0 / 1.
diff --git a/test/Transforms/InstCombine/select-extractelement.ll b/test/Transforms/InstCombine/select-extractelement.ll
new file mode 100644
index 0000000..e7ea851
--- /dev/null
+++ b/test/Transforms/InstCombine/select-extractelement.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @v4float_user(<4 x float>) #0
+
+
+
+define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  ret float %extract
+}
+
+; Multiple extractelements
+define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_two_select(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; Select has an extra non-extractelement user, don't change it
+define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select_user(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect_user(
+; CHECK: select <4 x i1> {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+; Extract from a vector select
+define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect(
+; CHECK-NOT: select <4 x i1>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %select, i32 0
+  ret float %extract
+}
+
+; Multiple extractelements from a vector select
+define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_two_vselect(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; All the vector selects should be decomposed into scalar selects
+; Test multiple extractelements
+define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_vector_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+entry:
+  %0 = extractelement <4 x i32> %c, i32 0
+  %tobool = icmp ne i32 %0, 0
+  %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
+  %1 = extractelement <4 x float> %a.sink, i32 0
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = extractelement <4 x i32> %c, i32 1
+  %tobool1 = icmp ne i32 %3, 0
+  %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
+  %4 = extractelement <4 x float> %a.sink1, i32 1
+  %5 = insertelement <4 x float> %2, float %4, i32 1
+  %6 = extractelement <4 x i32> %c, i32 2
+  %tobool6 = icmp ne i32 %6, 0
+  %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
+  %7 = extractelement <4 x float> %a.sink2, i32 2
+  %8 = insertelement <4 x float> %5, float %7, i32 2
+  %9 = extractelement <4 x i32> %c, i32 3
+  %tobool11 = icmp ne i32 %9, 0
+  %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
+  %10 = extractelement <4 x float> %a.sink3, i32 3
+  %11 = insertelement <4 x float> %8, float %10, i32 3
+  ret <4 x float> %11
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c7809f7..1458bde 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -986,6 +986,16 @@ define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
   ret i32 %select
 }
 
+; We can't combine here, because the cmp is scalar and the or vector.
+; Just make sure we don't assert.
+define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select i1 %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 0bdab13..b1082f0 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -744,3 +744,39 @@ define i32 @test62(i32 %x) {
 ; CHECK-LABEL: @test62(
 ; CHECK: ashr exact i32 %x, 3
 }
+
+; PR17026
+; CHECK-LABEL: @test63(
+; CHECK-NOT: sh
+; CHECK: ret
+define void @test63(i128 %arg) {
+bb:
+  br i1 undef, label %bb1, label %bb12
+
+bb1:                                              ; preds = %bb11, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb1
+  br i1 undef, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = lshr i128 %arg, 36893488147419103232
+  %tmp4 = shl i128 %tmp, 0
+  %tmp5 = or i128 %tmp4, undef
+  %tmp6 = trunc i128 %tmp5 to i16
+  br label %bb8
+
+bb7:                                              ; preds = %bb2
+  br i1 undef, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb7, %bb3
+  %tmp9 = phi i16 [ %tmp6, %bb3 ], [ undef, %bb7 ]
+  %tmp10 = icmp eq i16 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb8
+  br i1 undef, label %bb1, label %bb12
+
+bb12:                                             ; preds = %bb11, %bb8, %bb
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
new file mode 100644
index 0000000..0d1a602
--- /dev/null
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -0,0 +1,91 @@
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+
+
+attributes #0 = { readnone nounwind }
+
+declare float @__sinpif(float %x) #0
+declare float @__cospif(float %x) #0 
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0 
+
+@var32 = global float 0.0
+@var64 = global double 0.0
+
+define float @test_instbased_f32() {
+       %val = load float* @var32
+       %sin = call float @__sinpif(float %val) #0
+       %cos = call float @__cospif(float %val) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define float @test_constant_f32() {
+       %sin = call float @__sinpif(float 1.0) #0
+       %cos = call float @__cospif(float 1.0) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define double @test_instbased_f64() {
+       %val = load double* @var64
+       %sin = call double @__sinpi(double %val) #0
+       %cos = call double @__cospi(double %val) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_constant_f64() {
+       %sin = call double @__sinpi(double 1.0) #0
+       %cos = call double @__cospi(double 1.0) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index 6d0ab13..78dd7aa 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the sprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 9b666b9..b64c800 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,7 +113,8 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index a6d5585..8a02529 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -61,7 +61,7 @@ define i8* @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
 ; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   ret i8* %ret
@@ -75,7 +75,7 @@ define i8* @test_simplify6() {
 
 ; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
 ; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -93,4 +93,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
index 5efab9e..d2c9894 100644
--- a/test/Transforms/InstCombine/strchr-1.ll
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -52,3 +52,14 @@ define void @test_simplify4(i32 %chr) {
   store i8* %dst, i8** @chp
   ret void
 }
+
+define void @test_simplify5() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 5b98cf8..8e7fec7 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -61,7 +61,7 @@ define void @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret void
 }
@@ -73,7 +73,7 @@ define i8* @test_simplify6() {
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
 ; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -91,4 +91,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
index a0bdb22..4615f5f 100644
--- a/test/Transforms/InstCombine/strrchr-1.ll
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -42,6 +42,17 @@ define void @test_simplify3() {
   ret void
 }
 
+define void @test_simplify4() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
 define void @test_nosimplify1(i32 %chr) {
 ; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i8* @strrchr
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index d7a26fa..c80e31a 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -35,10 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !"float", metadata !1}
+; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
+; CHECK: !1 = metadata !{metadata !"float", metadata !2}
 
 !0 = metadata !{metadata !"Simple C/C++ TBAA"}
 !1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !5, metadata !5, i64 0}
 !3 = metadata !{i64 0, i64 4, metadata !2}
 !4 = metadata !{i64 0, i64 8, null}
+!5 = metadata !{metadata !"float", metadata !0}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 5449656..36c523b 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,34 +1,34 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 ; Optimize subtracts.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-	%B = sub i32 %A, %A	
+	%B = sub i32 %A, %A
 	ret i32 %B
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
 define i32 @test2(i32 %A) {
-	%B = sub i32 %A, 0	
+	%B = sub i32 %A, 0
 	ret i32 %B
 ; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test3(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sub i32 0, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test4(i32 %A, i32 %x) {
-	%B = sub i32 0, %A	
-	%C = sub i32 %x, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 %x, %B
 	ret i32 %C
 ; CHECK-LABEL: @test4(
 ; CHECK: %C = add i32 %x, %A
@@ -36,8 +36,8 @@ define i32 @test4(i32 %A, i32 %x) {
 }
 
 define i32 @test5(i32 %A, i32 %B, i32 %C) {
-	%D = sub i32 %B, %C	
-	%E = sub i32 %A, %D	
+	%D = sub i32 %B, %C
+	%E = sub i32 %A, %D
 	ret i32 %E
 ; CHECK-LABEL: @test5(
 ; CHECK: %D1 = sub i32 %C, %B
@@ -46,17 +46,17 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
 }
 
 define i32 @test6(i32 %A, i32 %B) {
-	%C = and i32 %A, %B	
-	%D = sub i32 %A, %C	
+	%C = and i32 %A, %B
+	%D = sub i32 %A, %C
 	ret i32 %D
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: xor i32 %B, -1
-; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: %D = and i32
 ; CHECK-NEXT: ret i32 %D
 }
 
 define i32 @test7(i32 %A) {
-	%B = sub i32 -1, %A	
+	%B = sub i32 -1, %A
 	ret i32 %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %B = xor i32 %A, -1
@@ -64,8 +64,8 @@ define i32 @test7(i32 %A) {
 }
 
 define i32 @test8(i32 %A) {
-	%B = mul i32 9, %A	
-	%C = sub i32 %B, %A	
+	%B = mul i32 9, %A
+	%C = sub i32 %B, %A
 	ret i32 %C
 ; CHECK-LABEL: @test8(
 ; CHECK: %C = shl i32 %A, 3
@@ -73,8 +73,8 @@ define i32 @test8(i32 %A) {
 }
 
 define i32 @test9(i32 %A) {
-	%B = mul i32 3, %A	
-	%C = sub i32 %A, %B	
+	%B = mul i32 3, %A
+	%C = sub i32 %A, %B
 	ret i32 %C
 ; CHECK-LABEL: @test9(
 ; CHECK: %C = mul i32 %A, -2
@@ -82,9 +82,9 @@ define i32 @test9(i32 %A) {
 }
 
 define i32 @test10(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = sub i32 0, %B	
-	%E = mul i32 %C, %D	
+	%C = sub i32 0, %A
+	%D = sub i32 0, %B
+	%E = mul i32 %C, %D
 	ret i32 %E
 ; CHECK-LABEL: @test10(
 ; CHECK: %E = mul i32 %A, %B
@@ -92,8 +92,8 @@ define i32 @test10(i32 %A, i32 %B) {
 }
 
 define i32 @test10a(i32 %A) {
-	%C = sub i32 0, %A	
-	%E = mul i32 %C, 7	
+	%C = sub i32 0, %A
+	%E = mul i32 %C, 7
 	ret i32 %E
 ; CHECK-LABEL: @test10a(
 ; CHECK: %E = mul i32 %A, -7
@@ -101,8 +101,8 @@ define i32 @test10a(i32 %A) {
 }
 
 define i1 @test11(i8 %A, i8 %B) {
-	%C = sub i8 %A, %B	
-	%cD = icmp ne i8 %C, 0	
+	%C = sub i8 %A, %B
+	%cD = icmp ne i8 %C, 0
 	ret i1 %cD
 ; CHECK-LABEL: @test11(
 ; CHECK: %cD = icmp ne i8 %A, %B
@@ -110,8 +110,8 @@ define i1 @test11(i8 %A, i8 %B) {
 }
 
 define i32 @test12(i32 %A) {
-	%B = ashr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = ashr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test12(
 ; CHECK: %C = lshr i32 %A, 31
@@ -119,8 +119,8 @@ define i32 @test12(i32 %A) {
 }
 
 define i32 @test13(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = lshr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test13(
 ; CHECK: %C = ashr i32 %A, 31
@@ -128,9 +128,9 @@ define i32 @test13(i32 %A) {
 }
 
 define i32 @test14(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = bitcast i32 %B to i32	
-	%D = sub i32 0, %C	
+	%B = lshr i32 %A, 31
+	%C = bitcast i32 %B to i32
+	%D = sub i32 0, %C
 	ret i32 %D
 ; CHECK-LABEL: @test14(
 ; CHECK: %D = ashr i32 %A, 31
@@ -138,17 +138,17 @@ define i32 @test14(i32 %A) {
 }
 
 define i32 @test15(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = srem i32 %B, %C	
+	%C = sub i32 0, %A
+	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A 
+; CHECK: %D = srem i32 %B, %A
 ; CHECK: ret i32 %D
 }
 
 define i32 @test16(i32 %A) {
-	%X = sdiv i32 %A, 1123	
-	%Y = sub i32 0, %X	
+	%X = sdiv i32 %A, 1123
+	%Y = sub i32 0, %X
 	ret i32 %Y
 ; CHECK-LABEL: @test16(
 ; CHECK: %Y = sdiv i32 %A, -1123
@@ -158,8 +158,8 @@ define i32 @test16(i32 %A) {
 ; Can't fold subtract here because negation it might oveflow.
 ; PR3142
 define i32 @test17(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sdiv i32 %B, 1234	
+	%B = sub i32 0, %A
+	%C = sdiv i32 %B, 1234
 	ret i32 %C
 ; CHECK-LABEL: @test17(
 ; CHECK: %B = sub i32 0, %A
@@ -168,25 +168,25 @@ define i32 @test17(i32 %A) {
 }
 
 define i64 @test18(i64 %Y) {
-	%tmp.4 = shl i64 %Y, 2	
-	%tmp.12 = shl i64 %Y, 2	
-	%tmp.8 = sub i64 %tmp.4, %tmp.12	
+	%tmp.4 = shl i64 %Y, 2
+	%tmp.12 = shl i64 %Y, 2
+	%tmp.8 = sub i64 %tmp.4, %tmp.12
 	ret i64 %tmp.8
 ; CHECK-LABEL: @test18(
 ; CHECK: ret i64 0
 }
 
 define i32 @test19(i32 %X, i32 %Y) {
-	%Z = sub i32 %X, %Y	
-	%Q = add i32 %Z, %Y	
+	%Z = sub i32 %X, %Y
+	%Q = add i32 %Z, %Y
 	ret i32 %Q
 ; CHECK-LABEL: @test19(
 ; CHECK: ret i32 %X
 }
 
 define i1 @test20(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g	
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
 	ret i1 %tmp.4
 ; CHECK-LABEL: @test20(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -194,8 +194,8 @@ define i1 @test20(i32 %g, i32 %h) {
 }
 
 define i1 @test21(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g		
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
         ret i1 %tmp.4
 ; CHECK-LABEL: @test21(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -204,9 +204,9 @@ define i1 @test21(i32 %g, i32 %h) {
 
 ; PR2298
 define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
-	%tmp2 = sub i32 0, %a	
-	%tmp4 = sub i32 0, %b	
-	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	%tmp2 = sub i32 0, %a
+	%tmp4 = sub i32 0, %b
+	%tmp5 = icmp eq i32 %tmp2, %tmp4
 	ret i1 %tmp5
 ; CHECK-LABEL: @test22(
 ; CHECK: %tmp5 = icmp eq i32 %b, %a
@@ -227,6 +227,19 @@ define i32 @test23(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i32
 }
 
+define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test23_as1
+; CHECK-NEXT: = trunc i16 %A to i8
+; CHECK-NEXT: ret i8
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %D = trunc i16 %C to i8
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %F = trunc i16 %E to i8
+  %G = sub i8 %D, %F
+  ret i8 %G
+}
+
 define i64 @test24(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -237,6 +250,16 @@ define i64 @test24(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i64 %A
 }
 
+define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24_as1
+; CHECK-NEXT: ret i16 %A
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %C, %E
+  ret i16 %G
+}
+
 define i64 @test24a(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -245,9 +268,21 @@ define i64 @test24a(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24a(
 ; CHECK-NEXT: sub i64 0, %A
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
+define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24a_as1
+; CHECK-NEXT: sub i16 0, %A
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %E, %C
+  ret i16 %G
+}
+
+
 @Arr = external global [42 x i16]
 
 define i64 @test24b(i8* %P, i64 %A){
@@ -257,7 +292,7 @@ define i64 @test24b(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24b(
 ; CHECK-NEXT: shl nuw i64 %A, 1
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
 
@@ -269,7 +304,21 @@ define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: add i64 {{.*}}, -84
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
+}
+
+@Arr_as1 = external addrspace(1) global [42 x i16]
+
+define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
+; CHECK: @test25_as1
+; CHECK-NEXT: %1 = trunc i64 %A to i16
+; CHECK-NEXT: shl nuw i16 %1, 1
+; CHECK-NEXT: add i16 {{.*}}, -84
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
+  %C = ptrtoint i16 addrspace(1)* %B to i16
+  %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16)
+  ret i16 %G
 }
 
 define i32 @test26(i32 %x) {
@@ -327,3 +376,19 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: sub i64 %gep1.idx, %j
 ; CHECK-NEXT: ret i64
 }
+
+define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test30_as1(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: sub i16 %gep1.idx, %j
+; CHECK-NEXT: ret i16
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16
+  %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16
+  %sub = sub i16 %cast1, %cast2
+  ret i16 %sub
+}
+
+
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 166066a..3daf72e 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | not grep extractelement
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: extractelement
 
 define i32 @test(float %f) {
         %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index e35fa5e..3b94920 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A"
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: ret <4 x i32> %A
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 738e05b..3ee43dc 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -200,3 +200,31 @@ define void @test14(i16 %conv10) {
   %sext = sext <2 x i1> %cmp to <2 x i16>
   ret void
 }
+
+; Check that sequences of insert/extract element are 
+; collapsed into valid shuffle instruction with correct shuffle indexes.
+ 
+define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+  %tmp1 = extractelement <4 x float> %LHS, i32 0
+  %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+  %tmp3 = extractelement <4 x float> %RHS, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+  ret <4 x float> %tmp4
+}
+ 
+define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+  %tmp0 = extractelement <4 x float> %LHS, i32 3
+  %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+  %tmp4 = extractelement <4 x float> %RHS, i32 2
+  %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+  ret <4 x float> %tmp5
+}
+
diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll
index df3ac93..e6e79e2 100644
--- a/test/Transforms/InstCombine/win-math.ll
+++ b/test/Transforms/InstCombine/win-math.ll
@@ -273,3 +273,23 @@ define float @float_round(float %x) nounwind readnone {
     ret float %3
 }
 
+declare float @powf(float, float)
+; win32 lacks sqrtf&fabsf, win64 lacks fabsf
+define float @float_powsqrt(float %x) nounwind readnone {
+; WIN32-LABEL: @float_powsqrt(
+; WIN32-NOT: float @sqrtf
+; WIN32: float @powf
+; WIN64-LABEL: @float_powsqrt(
+; WIN64-NOT: float @sqrtf
+; WIN64: float @powf
+; MINGW32-LABEL: @float_powsqrt(
+; MINGW32: float @sqrtf
+; MINGW32: float @fabsf
+; MINGW32-NOT: float @powf
+; MINGW64-LABEL: @float_powsqrt(
+; MINGW64: float @sqrtf
+; MINGW64: float @fabsf
+; MINGW64-NOT: float @powf
+    %1 = call float @powf(float %x, float 0.5)
+    ret float %1
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 3e1621c..fd854c5 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -101,3 +101,66 @@ define float @test_idempotence(float %a) {
 
   ret float %r4
 }
+
+define i8* @operator_new() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new
+; CHECK: br i1 false, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @_Znwm(i64)
+
+%"struct.std::nothrow_t" = type { i8 }
+@_ZSt7nothrow = external global %"struct.std::nothrow_t"
+
+define i8* @operator_new_nothrow_t() {
+entry:
+  %call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new_nothrow_t
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind
+
+define i8* @malloc_can_return_null() {
+entry:
+  %call = tail call noalias i8* @malloc(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @malloc_can_return_null
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 0957949..abb3869 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -717,3 +717,25 @@ define i1 @alloca_gep(i64 %a, i64 %b) {
   ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
+
+define i1 @non_inbounds_gep_compare(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 42
+  %y = getelementptr inbounds i64* %x, i64 -42
+  %z = getelementptr i64* %a, i64 -42
+  %w = getelementptr inbounds i64* %z, i64 42
+  %cmp = icmp eq i64* %y, %w
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @non_inbounds_gep_compare2(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare2(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 4294967297
+  %y = getelementptr i64* %a, i64 1
+  %cmp = icmp eq i64* %y, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/InstSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
deleted file mode 100644
index f75e80d..0000000
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; No arguments means internalize everything
-; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s
-
-; Internalize all but foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s
-
-; Non existent files should be treated as if they were empty (so internalize
-; everything)
-; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s
-
-; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=LIST2 %s
-
-; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s
-
-; NOARGS: @i = internal global
-; LIST: @i = internal global
-; EMPTYFILE: @i = internal global
-; LIST2: @i = internal global
-; MERGE: @i = internal global
-@i = global i32 0
-
-; NOARGS: @j = internal global
-; LIST: @j = global
-; EMPTYFILE: @j = internal global
-; LIST2: @j = internal global
-; MERGE: @j = global
-@j = global i32 0
-
-; NOARGS-LABEL: define internal void @main(
-; LIST-LABEL: define internal void @main(
-; EMPTYFILE-LABEL: define internal void @main(
-; LIST2-LABEL: define internal void @main(
-; MERGE-LABEL: define internal void @main(
-define void @main() {
-        ret void
-}
-
-; NOARGS-LABEL: define internal void @foo(
-; LIST-LABEL: define void @foo(
-; EMPTYFILE-LABEL: define internal void @foo(
-; LIST2-LABEL: define void @foo(
-; MERGE-LABEL: define void @foo(
-define void @foo() {
-        ret void
-}
-
-; NOARGS-LABEL: define internal void @bar(
-; LIST-LABEL: define internal void @bar(
-; EMPTYFILE-LABEL: define internal void @bar(
-; LIST2-LABEL: define void @bar(
-; MERGE-LABEL: define void @bar(
-define void @bar() {
-        ret void
-}
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile b/test/Transforms/Internalize/apifile
index f6c58b8..f6c58b8 100644
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
+++ b/test/Transforms/Internalize/apifile
diff --git a/test/Transforms/Internalize/available_externally.ll b/test/Transforms/Internalize/available_externally.ll
deleted file mode 100644
index bb89603..0000000
--- a/test/Transforms/Internalize/available_externally.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -internalize -internalize-public-api-list foo -S | FileCheck %s
-
-; CHECK-LABEL: define void @foo(
-define void @foo() {
-  ret void
-}
-
-; CHECK-LABEL: define internal void @zed(
-define void @zed() {
-  ret void
-}
-
-; CHECK-LABEL: define available_externally void @bar(
-define available_externally void @bar() {
-  ret void
-}
diff --git a/test/Transforms/Internalize/lists.ll b/test/Transforms/Internalize/lists.ll
new file mode 100644
index 0000000..83e441a2
--- /dev/null
+++ b/test/Transforms/Internalize/lists.ll
@@ -0,0 +1,50 @@
+; No arguments means internalize everything
+; RUN: opt < %s -internalize -S | FileCheck --check-prefix=ALL %s
+
+; Non existent files should be treated as if they were empty (so internalize
+; everything)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=ALL %s
+
+; Internalize all but foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=FOO_AND_J %s
+
+; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=FOO_AND_BAR %s
+
+; -file and -list options should be merged, the apifile contains foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %S/apifile -S | FileCheck --check-prefix=FOO_J_AND_BAR %s
+
+; ALL: @i = internal global
+; FOO_AND_J: @i = internal global
+; FOO_AND_BAR: @i = internal global
+; FOO_J_AND_BAR: @i = internal global
+@i = global i32 0
+
+; ALL: @j = internal global
+; FOO_AND_J: @j = global
+; FOO_AND_BAR: @j = internal global
+; FOO_J_AND_BAR: @j = global
+@j = global i32 0
+
+; ALL: define internal void @main() {
+; FOO_AND_J: define internal void @main() {
+; FOO_AND_BAR: define internal void @main() {
+; FOO_J_AND_BAR: define internal void @main() {
+define void @main() {
+        ret void
+}
+
+; ALL: define internal void @foo() {
+; FOO_AND_J: define void @foo() {
+; FOO_AND_BAR: define void @foo() {
+; FOO_J_AND_BAR: define void @foo() {
+define void @foo() {
+        ret void
+}
+
+; ALL: define available_externally void @bar() {
+; FOO_AND_J: define available_externally void @bar() {
+; FOO_AND_BAR: define available_externally void @bar() {
+; FOO_J_AND_BAR: define available_externally void @bar() {
+define available_externally void @bar() {
+  ret void
+}
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Internalize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/landing-pad.ll b/test/Transforms/JumpThreading/landing-pad.ll
new file mode 100644
index 0000000..9ee0526
--- /dev/null
+++ b/test/Transforms/JumpThreading/landing-pad.ll
@@ -0,0 +1,203 @@
+; RUN: opt < %s -disable-output -jump-threading
+
+%class.E = type { i32 (...)**, %class.C }
+%class.C = type { %class.A }
+%class.A = type { i32 }
+%class.D = type { %class.F }
+%class.F = type { %class.E }
+%class.B = type { %class.D* }
+
+@_ZTV1D = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (void (%class.D*)* @_ZN1D7doApplyEv to i8*)]
+@_ZTI1D = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @_ZN15EditCommandImpl5applyEv(%class.E* %this) uwtable align 2 {
+entry:
+  %0 = bitcast %class.E* %this to void (%class.E*)***
+  %vtable = load void (%class.E*)*** %0, align 8
+  %1 = load void (%class.E*)** %vtable, align 8
+  call void %1(%class.E* %this)
+  ret void
+}
+
+define void @_ZN1DC1Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+declare void @_ZN24CompositeEditCommandImplC2Ev() #1
+
+define void @_ZN1D7doApplyEv(%class.D* nocapture %this) unnamed_addr nounwind readnone uwtable align 2 {
+entry:
+  ret void
+}
+
+define void @_Z3fn1v() uwtable {
+entry:
+  %call = call noalias i8* @_Znwm() #8
+  invoke void @_ZN24CompositeEditCommandImplC2Ev()
+          to label %_ZN1DC1Ev.exit unwind label %lpad
+
+_ZN1DC1Ev.exit:                                   ; preds = %entry
+  %0 = bitcast i8* %call to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  %_ref.i.i.i = getelementptr inbounds i8* %call, i64 8
+  %1 = bitcast i8* %_ref.i.i.i to i32*
+  %2 = load i32* %1, align 4
+  %inc.i.i.i = add nsw i32 %2, 1
+  store i32 %inc.i.i.i, i32* %1, align 4
+  %3 = bitcast i8* %call to %class.D*
+  invoke void @_ZN1D7doApplyEv(%class.D* %3)
+          to label %_ZN15EditCommandImpl5applyEv.exit unwind label %lpad1
+
+_ZN15EditCommandImpl5applyEv.exit:                ; preds = %_ZN1DC1Ev.exit
+  invoke void @_ZN1D16deleteKeyPressedEv()
+          to label %invoke.cont7 unwind label %lpad1
+
+invoke.cont7:                                     ; preds = %_ZN15EditCommandImpl5applyEv.exit
+  ret void
+
+lpad:                                             ; preds = %entry
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  call void @_ZdlPv() #9
+  unreachable
+
+lpad1:                                            ; preds = %_ZN1DC1Ev.exit, %_ZN15EditCommandImpl5applyEv.exit
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %6 = load i32* %1, align 4
+  %tobool.i.i.i = icmp eq i32 %6, 0
+  br i1 %tobool.i.i.i, label %_ZN1BI1DED1Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad1
+  br i1 undef, label %_ZN1BI1DED1Ev.exit, label %delete.notnull.i.i.i
+
+delete.notnull.i.i.i:                             ; preds = %if.then.i.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED1Ev.exit:                               ; preds = %lpad1, %if.then.i.i.i
+  resume { i8*, i32 } undef
+
+terminate.lpad:                                   ; No predecessors!
+  %7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+define void @_ZN1BI1DEC1EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i.i, align 4
+  %inc.i.i = add nsw i32 %0, 1
+  store i32 %inc.i.i, i32* %_ref.i.i, align 4
+  ret void
+}
+
+declare noalias i8* @_Znwm()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv()
+
+define %class.D* @_ZN1BI1DEptEv(%class.B* nocapture readonly %this) nounwind readonly uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  ret %class.D* %0
+}
+
+declare void @_ZN1D16deleteKeyPressedEv()
+
+define void @_ZN1BI1DED1Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i.i, align 4
+  %tobool.i.i = icmp eq i32 %1, 0
+  br i1 %tobool.i.i, label %_ZN1BI1DED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %entry
+  br i1 undef, label %_ZN1BI1DED2Ev.exit, label %delete.notnull.i.i
+
+delete.notnull.i.i:                               ; preds = %if.then.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED2Ev.exit:                               ; preds = %entry, %if.then.i.i
+  ret void
+}
+
+declare hidden void @__clang_call_terminate()
+
+define void @_ZN1BI1DED2Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i, align 4
+  %tobool.i = icmp eq i32 %1, 0
+  br i1 %tobool.i, label %_ZN1AI1CE5derefEv.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  br i1 undef, label %_ZN1AI1CE5derefEv.exit, label %delete.notnull.i
+
+delete.notnull.i:                                 ; preds = %if.then.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1AI1CE5derefEv.exit:                           ; preds = %entry, %if.then.i
+  ret void
+}
+
+define void @_ZN1AI1CE5derefEv(%class.A* nocapture readonly %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  call void @_ZdlPv() #9
+  unreachable
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @_ZN1BI1DEC2EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i, align 4
+  %inc.i = add nsw i32 %0, 1
+  store i32 %inc.i, i32* %_ref.i, align 4
+  ret void
+}
+
+define void @_ZN1AI1CE3refEv(%class.A* nocapture %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %_ref, align 4
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/JumpThreading/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index e651f9a..e5bf64b 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -75,7 +75,7 @@ bb3:		; preds = %bb1
 	ret i32 %res.0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LCSSA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 86c2679..7cf7a32 100644
--- a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -30,8 +30,10 @@ for.end:                                          ; preds = %for.inc
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !6, metadata !6, i64 0}
+!5 = metadata !{metadata !"any pointer", metadata !1}
+!6 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 3c70064..e5c774f 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -33,19 +33,20 @@ for.end104:                                       ; preds = %for.cond.backedge
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
 !0 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589865, metadata !25} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !25, i32 12, metadata !"clang version 2.9 (trunk 127169)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
 !11 = metadata !{i32 281, i32 9, metadata !12, null}
 !12 = metadata !{i32 589835, metadata !25, metadata !13, i32 272, i32 5, i32 32} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 589835, metadata !25, metadata !14, i32 271, i32 5, i32 31} ; [ DW_TAG_lexical_block ]
@@ -61,3 +62,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !23 = metadata !{i32 296, i32 13, metadata !17, null}
 !24 = metadata !{i32 313, i32 1, metadata !14, null}
 !25 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LICM/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
index b016265..86f11fe 100644
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@@ -37,5 +37,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 }
 
 !0 = metadata !{metadata !"minimal TBAA"}
-!1 = metadata !{metadata !"float", metadata !0}
-!2 = metadata !{metadata !"int", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"float", metadata !0}
+!4 = metadata !{metadata !"int", metadata !0}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index b3e45c5..92ef155 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -181,7 +181,9 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"float", metadata !1}
+!3 = metadata !{metadata !5, metadata !5, i64 0}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
new file mode 100644
index 0000000..886d7f2
--- /dev/null
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
+; The objects *p and *q are aliased to each other, but even though *q is
+; volatile, *p can be considered invariant in the loop. Check if it is moved
+; out of the loop.
+; CHECK: load i32* %p
+; CHECK: for.body:
+; CHECK; load volatile i32* %q
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
+entry:
+  %p.addr = alloca i32*, align 8
+  %q.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %s = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 8
+  store i32* %q, i32** %q.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store i32 0, i32* %s, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %1 = load i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32** %p.addr, align 8
+  %3 = load i32* %2, align 4
+  %4 = load i32** %q.addr, align 8
+  %5 = load volatile i32* %4, align 4
+  %add = add nsw i32 %3, %5
+  %6 = load i32* %s, align 4
+  %add1 = add nsw i32 %6, %add
+  store i32 %add1, i32* %s, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %s, align 4
+  ret i32 %8
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopDeletion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/LoopIdiom/X86/lit.local.cfg
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopIdiom/basic-address-space.ll b/test/Transforms/LoopIdiom/basic-address-space.ll
new file mode 100644
index 0000000..697ab37
--- /dev/null
+++ b/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -0,0 +1,91 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8 addrspace(2)* %X) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i16 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i16 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i16 %i.04, 100
+  %add = add nsw i16 %j.02, %mul
+  %arrayidx = getelementptr inbounds i8 addrspace(2)* %X, i16 %add
+  store i8 0, i8 addrspace(2)* %arrayidx, align 1
+  %inc = add nsw i16 %j.02, 1
+  %cmp4 = icmp eq i16 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i16 %i.04, 1
+  %cmp = icmp eq i16 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+}
+
+define void @test11_pattern(i32 addrspace(2)* nocapture %P) nounwind ssp {
+; CHECK-LABEL: @test11_pattern(
+; CHECK-NOT: memset_pattern
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32 addrspace(2)* %P, i64 %indvar
+  store i32 1, i32 addrspace(2)* %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; PR9815 - This is a partial overlap case that cannot be safely transformed
+; into a memcpy.
+@g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
+
+
+define i32 @test14() nounwind {
+; CHECK-LABEL: @test14(
+; CHECK: for.body:
+; CHECK: load i32
+; CHECK: store i32
+; CHECK: br i1 %cmp
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %add = add nsw i32 %tmp5, 4
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
+  %tmp2 = load i32 addrspace(2)* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, 5
+  %idxprom5 = sext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
+  store i32 %tmp2, i32 addrspace(2)* %arrayidx6, align 4
+  %inc = add nsw i32 %tmp5, 1
+  %cmp = icmp slt i32 %inc, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  %tmp8 = load i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
+  ret i32 %tmp8
+}
+
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index 2337590..ef4a478 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -27,12 +27,13 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
 !6 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
@@ -48,3 +49,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{i32 3, i32 29, metadata !11, null}
 !17 = metadata !{i32 5, i32 1, metadata !12, null}
 !18 = metadata !{metadata !"li.c", metadata !"/private/tmp"}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopIdiom/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
new file mode 100644
index 0000000..314a149
--- /dev/null
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -0,0 +1,327 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; int foo(int a);
+; void bar(int *x) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i);
+;     foo(i+1);
+;     foo(i+2);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar(i32* nocapture readnone %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @foo(i32 %i.08) #1
+  %add = add nsw i32 %i.08, 1
+  %call1 = tail call i32 @foo(i32 %add) #1
+  %add2 = add nsw i32 %i.08, 2
+  %call3 = tail call i32 @foo(i32 %add2) #1
+  %add3 = add nsw i32 %i.08, 3
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @foo(i32)
+
+; void hi1(int *x) {
+;   for (int i = 0; i < 1500; i += 3) {
+;     x[i] = foo(0);
+;     x[i+1] = foo(0);
+;     x[i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi1(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %0 = add nsw i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call1, i32* %arrayidx3, align 4
+  %call4 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call4, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %2, 1500
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hi1
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hi2(int *x) {
+;   for (int i = 0; i < 500; ++i) {
+;     x[3*i] = foo(0);
+;     x[3*i+1] = foo(0);
+;     x[3*i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi2(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %0 = mul nsw i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %0, 1
+  %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call1, i32* %arrayidx4, align 4
+  %call5 = tail call i32 @foo(i32 0) #1
+  %2 = add nsw i64 %0, 2
+  %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
+  store i32 %call5, i32* %arrayidx9, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @hi2
+
+; CHECK: for.body:
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void goo(float alpha, float *a, float *b) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[i];
+;     a[i + 1] += alpha * b[i + 1];
+;     a[i + 2] += alpha * b[i + 2];
+;     a[i + 3] += alpha * b[i + 3];
+;     a[i + 4] += alpha * b[i + 4];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %mul = fmul float %0, %alpha
+  %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %add = fadd float %1, %mul
+  store float %add, float* %arrayidx2, align 4
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx5 = getelementptr inbounds float* %b, i64 %2
+  %3 = load float* %arrayidx5, align 4
+  %mul6 = fmul float %3, %alpha
+  %arrayidx9 = getelementptr inbounds float* %a, i64 %2
+  %4 = load float* %arrayidx9, align 4
+  %add10 = fadd float %4, %mul6
+  store float %add10, float* %arrayidx9, align 4
+  %5 = add nsw i64 %indvars.iv, 2
+  %arrayidx13 = getelementptr inbounds float* %b, i64 %5
+  %6 = load float* %arrayidx13, align 4
+  %mul14 = fmul float %6, %alpha
+  %arrayidx17 = getelementptr inbounds float* %a, i64 %5
+  %7 = load float* %arrayidx17, align 4
+  %add18 = fadd float %7, %mul14
+  store float %add18, float* %arrayidx17, align 4
+  %8 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds float* %b, i64 %8
+  %9 = load float* %arrayidx21, align 4
+  %mul22 = fmul float %9, %alpha
+  %arrayidx25 = getelementptr inbounds float* %a, i64 %8
+  %10 = load float* %arrayidx25, align 4
+  %add26 = fadd float %10, %mul22
+  store float %add26, float* %arrayidx25, align 4
+  %11 = add nsw i64 %indvars.iv, 4
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %11
+  %12 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %12, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %13 = load float* %arrayidx33, align 4
+  %add34 = fadd float %13, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %14 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %14, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @goo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds float* %b, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %mul = fmul float %0, %alpha
+; CHECK: %arrayidx2 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %add = fadd float %1, %mul
+; CHECK: store float %add, float* %arrayidx2, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hoo(float alpha, float *a, float *b, int *ip) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[ip[i]];
+;     a[i + 1] += alpha * b[ip[i + 1]];
+;     a[i + 2] += alpha * b[ip[i + 2]];
+;     a[i + 3] += alpha * b[ip[i + 3]];
+;     a[i + 4] += alpha * b[ip[i + 4]];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %ip, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+  %1 = load float* %arrayidx2, align 4
+  %mul = fmul float %1, %alpha
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4
+  %add = fadd float %2, %mul
+  store float %add, float* %arrayidx4, align 4
+  %3 = add nsw i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32* %ip, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %idxprom8 = sext i32 %4 to i64
+  %arrayidx9 = getelementptr inbounds float* %b, i64 %idxprom8
+  %5 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %5, %alpha
+  %arrayidx13 = getelementptr inbounds float* %a, i64 %3
+  %6 = load float* %arrayidx13, align 4
+  %add14 = fadd float %6, %mul10
+  store float %add14, float* %arrayidx13, align 4
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx17 = getelementptr inbounds i32* %ip, i64 %7
+  %8 = load i32* %arrayidx17, align 4
+  %idxprom18 = sext i32 %8 to i64
+  %arrayidx19 = getelementptr inbounds float* %b, i64 %idxprom18
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, %alpha
+  %arrayidx23 = getelementptr inbounds float* %a, i64 %7
+  %10 = load float* %arrayidx23, align 4
+  %add24 = fadd float %10, %mul20
+  store float %add24, float* %arrayidx23, align 4
+  %11 = add nsw i64 %indvars.iv, 3
+  %arrayidx27 = getelementptr inbounds i32* %ip, i64 %11
+  %12 = load i32* %arrayidx27, align 4
+  %idxprom28 = sext i32 %12 to i64
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %idxprom28
+  %13 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %13, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %14 = load float* %arrayidx33, align 4
+  %add34 = fadd float %14, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %15 = add nsw i64 %indvars.iv, 4
+  %arrayidx37 = getelementptr inbounds i32* %ip, i64 %15
+  %16 = load i32* %arrayidx37, align 4
+  %idxprom38 = sext i32 %16 to i64
+  %arrayidx39 = getelementptr inbounds float* %b, i64 %idxprom38
+  %17 = load float* %arrayidx39, align 4
+  %mul40 = fmul float %17, %alpha
+  %arrayidx43 = getelementptr inbounds float* %a, i64 %15
+  %18 = load float* %arrayidx43, align 4
+  %add44 = fadd float %18, %mul40
+  store float %add44, float* %arrayidx43, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %19 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %19, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hoo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %ip, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %idxprom1 = sext i32 %0 to i64
+; CHECK: %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %mul = fmul float %1, %alpha
+; CHECK: %arrayidx4 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %2 = load float* %arrayidx4, align 4
+; CHECK: %add = fadd float %2, %mul
+; CHECK: store float %add, float* %arrayidx4, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
new file mode 100644
index 0000000..aed7670
--- /dev/null
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %1
+  %2 = load i32* %arrayidx3, align 4
+  %add4 = add nsw i32 %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds i32* %x, i64 %5
+  %6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @foo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %add = add nsw i32 %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add12
+}
+
+define float @bar(float* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi float [ 0.0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %add = fadd float %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds float* %x, i64 %1
+  %2 = load float* %arrayidx3, align 4
+  %add4 = fadd float %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds float* %x, i64 %3
+  %4 = load float* %arrayidx7, align 4
+  %add8 = fadd float %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds float* %x, i64 %5
+  %6 = load float* %arrayidx11, align 4
+  %add12 = fadd float %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds float* %x, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %add = fadd float %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret float %add12
+}
+
+attributes #0 = { nounwind readonly uwtable }
+
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 3434cdc..9461980 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -77,12 +77,13 @@ for.end:
   ret void
 }
 
+!llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
 !1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 125492)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
@@ -99,3 +100,4 @@ for.end:
 !17 = metadata !{i32 37, i32 1, metadata !13, null}
 !18 = metadata !{metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopRotate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopSimplify/dup-preds.ll b/test/Transforms/LoopSimplify/dup-preds.ll
new file mode 100644
index 0000000..3d1f149
--- /dev/null
+++ b/test/Transforms/LoopSimplify/dup-preds.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-simplify -S %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define fastcc void @do_update_md([3 x float]* nocapture readonly %x) #0 {
+entry:
+  br i1 undef, label %if.end365, label %lor.lhs.false134
+
+lor.lhs.false134:                                 ; preds = %entry
+  br i1 undef, label %lor.lhs.false138, label %if.end365
+
+lor.lhs.false138:                                 ; preds = %lor.lhs.false134
+  br i1 undef, label %lor.lhs.false142, label %if.end365
+
+lor.lhs.false142:                                 ; preds = %lor.lhs.false138
+  br i1 undef, label %for.body276.lr.ph, label %if.end365
+
+for.body276.lr.ph:                                ; preds = %lor.lhs.false142
+  switch i16 undef, label %if.then288 [
+    i16 4, label %for.body344
+    i16 2, label %for.body344
+  ]
+
+if.then288:                                       ; preds = %for.body276.lr.ph
+  br label %for.body305
+
+for.body305:                                      ; preds = %for.body305, %if.then288
+  br label %for.body305
+
+for.body344:                                      ; preds = %for.body344, %for.body276.lr.ph, %for.body276.lr.ph
+  %indvar = phi i64 [ %indvar.next, %for.body344 ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body276.lr.ph ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv.next553 = add nuw nsw i64 %indvars.iv552, 1
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body344
+
+; CHECK-LABEL: @do_update_md
+; CHECK: %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body344.preheader ]
+; CHECK: ret
+
+if.end365:                                        ; preds = %lor.lhs.false142, %lor.lhs.false138, %lor.lhs.false134, %entry
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
index bac2ffa..8a3ba96 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index fad5241..001a1d6 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
-; Test LSR's ability to prune formulae that refer to nonexistant
+; Test LSR's ability to prune formulae that refer to nonexistent
 ; AddRecs in other loops.
 ;
 ; Unable to reduce this case further because it requires LSR to exceed
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
index da2db5a..ba763cf 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
new file mode 100644
index 0000000..6333291
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
+
+; CHECK-NOT: cast
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1
+; CHECK: br {{.*}} label %bb1
+
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* {{.*}}, i16
+
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
+	%tmp6 = load double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
+	store double %tmp7, double addrspace(1)* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
new file mode 100644
index 0000000..9c1b213
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK-LABEL: @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index ff8cab8..3ba93ff 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -68,3 +68,46 @@ bb7:
           catch i8* null
   ret void
 }
+
+; PR17425
+define void @i() {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %c.0 = phi i16* [ undef, %entry ], [ %incdec.ptr, %while.cond ]
+  %incdec.ptr = getelementptr inbounds i16* %c.0, i64 1
+  br i1 undef, label %while.cond1, label %while.cond
+
+while.cond1:                                      ; preds = %while.cond1, %while.cond
+  %c.1 = phi i16* [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ]
+  %incdec.ptr5 = getelementptr inbounds i16* %c.1, i64 1
+  br i1 undef, label %while.cond7, label %while.cond1
+
+while.cond7:                                      ; preds = %while.cond7, %while.cond1
+  %0 = phi i16* [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ]
+  %incdec.ptr10 = getelementptr inbounds i16* %0, i64 1
+  br i1 undef, label %while.cond12.preheader, label %while.cond7
+
+while.cond12.preheader:                           ; preds = %while.cond7
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13:                                     ; preds = %if.else, %while.body13.lr.ph
+  %1 = phi i16* [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ]
+  br i1 undef, label %while.cond12.outer.loopexit, label %if.else
+
+while.cond12.outer.loopexit:                      ; preds = %while.body13
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13.lr.ph:                               ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader
+  %2 = phi i16* [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ]
+  br label %while.body13
+
+if.else:                                          ; preds = %while.body13
+  %incdec.ptr15 = getelementptr inbounds i16* %1, i64 1
+  %cmp = icmp eq i16* %incdec.ptr15, %0
+  br i1 %cmp, label %while.end16, label %while.body13
+
+while.end16:                                      ; preds = %if.else, %while.cond12.outer.loopexit, %while.cond12.preheader
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopStrengthReduce/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
new file mode 100644
index 0000000..255cf41
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV expander cannot expand quadratic recurrences outside of the
+; loop. This recurrence depends on %sub.us, so can't be expanded.
+;
+; CHECK-LABEL: @test2
+; CHECK-LABEL: test2.loop:
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+;
+; CHECK=LABEL: for.end:
+; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
+; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: ret i32 %f
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 0118241..65aa61f 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,18 +1,50 @@
-; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)"
+; RUN: opt < %s -analyze -iv-users | FileCheck %s
 
 ; The value of %r is dependent on a polynomial iteration expression.
-
+;
+; CHECK-LABEL: IV Users for loop %foo.loop
+; CHECK: {1,+,3,+,2}<%foo.loop>
 define i64 @foo(i64 %n) {
 entry:
-  br label %loop
+  br label %foo.loop
 
-loop:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+foo.loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %foo.loop ]
   %indvar.next = add i64 %indvar, 1
   %c = icmp eq i64 %indvar.next, %n
-  br i1 %c, label %exit, label %loop
+  br i1 %c, label %exit, label %foo.loop
 
 exit:
   %r = mul i64 %indvar.next, %indvar.next
   ret i64 %r
 }
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV does not know how to denormalize chained recurrences, so make
+; sure they aren't marked as post-inc users.
+;
+; CHECK-LABEL: IV Users for loop %test2.loop
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
new file mode 100644
index 0000000..2c65261
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK: define void @Z4
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
new file mode 100644
index 0000000..17c91e5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK: icmp
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+; CHECK-LABEL: @test
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..2e46300
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopUnroll/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopUnswitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
index cb77b09..8a3ba96 100644
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
index c0795b6..99d7fa7 100644
--- a/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -3,27 +3,27 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
 
-;CHECK:foo_F64
-;CHECK: <2 x double>
+;CHECK:foo_F32
+;CHECK: <4 x float>
 ;CHECK:ret
-define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
-  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
-  %3 = load double* %2, align 8
-  %4 = fmul fast double %prod.01, %3
+  %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  %3 = load float* %2, align 8
+  %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
 ._crit_edge:                                      ; preds = %.lr.ph, %0
-  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
-  ret double %prod.0.lcssa
+  %prod.0.lcssa = phi float [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret float %prod.0.lcssa
 }
 
 ;CHECK:foo_I8
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
new file mode 100644
index 0000000..885418c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; We want to make sure that we don't even try to vectorize loops again
+; The vectorizer used to mark the un-vectorized loop only as already vectorized
+; thus, trying to vectorize the vectorized loop again
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global [255 x i32]
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @vect() {
+; CHECK: LV: Checking a loop in "vect"
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; We need to make sure we did vectorize the loop
+; CHECK: LV: Found a loop: for.body
+; CHECK: LV: We can vectorize this loop!
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [255 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+; If it did, we have two loops:
+; CHECK: vector.body:
+; CHECK: br {{.*}} label %vector.body, !llvm.loop [[vect:![0-9]+]]
+; CHECK: for.body:
+; CHECK: br {{.*}} label %for.body, !llvm.loop [[scalar:![0-9]+]]
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; Now, we check for the Hint metadata
+; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
+; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
+
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index b7f479a..98718e1 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @a = common global [2048 x i32] zeroinitializer, align 16
 
 ; The program below gathers and scatters data. We better not vectorize it.
-;CHECK: cost_model_1
+;CHECK-LABEL: @cost_model_1(
 ;CHECK-NOT: <2 x i32>
 ;CHECK-NOT: <4 x i32>
 ;CHECK-NOT: <8 x i32>
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/LoopVectorize/X86/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/X86/rauw-bug.ll b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
new file mode 100644
index 0000000..4284fba
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
@@ -0,0 +1,33 @@
+; RUN: opt -slp-vectorizer -S %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; This test used to fail under libgmalloc. Because we would try to access a
+; pointer that was already deleted.
+;
+; llvm-lit -v --param use_gmalloc=1 --param
+;   gmalloc_path=/usr/lib/libgmalloc.dylib
+;   test/Transforms/LoopVectorize/X86/rauw-bug.ll
+;
+; radar://15498655
+
+; CHECK: reduced
+define void @reduced()  {
+entry:
+  br i1 undef, label %while.body, label %while.cond63.preheader.while.end76_crit_edge
+
+while.cond63.preheader.while.end76_crit_edge:
+  ret void
+
+while.body:
+  %d2_fx.015 = phi double [ %sub52, %while.body ], [ undef, %entry ]
+  %d2_fy.014 = phi double [ %sub58, %while.body ], [ undef, %entry ]
+  %d3_fy.013 = phi double [ %div56, %while.body ], [ undef, %entry ]
+  %d3_fx.012 = phi double [ %div50, %while.body ], [ undef, %entry ]
+  %div50 = fmul double %d3_fx.012, 1.250000e-01
+  %sub52 = fsub double 0.000000e+00, %div50
+  %div56 = fmul double %d3_fy.013, 1.250000e-01
+  %sub58 = fsub double 0.000000e+00, %div56
+  br label %while.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
new file mode 100644
index 0000000..6b38bac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd11.0"
+
+@big = external global [0 x i32]
+
+; PR18049
+; We need to truncate the exit count to i32. This is legal because the
+; arithmetic is signed (%inc is nsw).
+
+; CHECK-LABEL: tripcount
+; CHECK: trunc i64 %count to i32
+
+define void @tripcount(i64 %count) {
+entry:
+  %cmp6 = icmp sgt i64 %count, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
+  %0 = load i32* %arrayidx, align 4
+  %neg = xor i32 %0, -1
+  store i32 %neg, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.07, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %count
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
new file mode 100644
index 0000000..5064fec
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -disable-loop-unrolling -S | FileCheck %s -check-prefix=CHECK-NOUNRL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK-LABEL: @bar(
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+;CHECK-NOUNRL-LABEL: @bar(
+;CHECK-NOUNRL: store <4 x i32>
+;CHECK-NOUNRL-NOT: store <4 x i32>
+;CHECK-NOUNRL: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
new file mode 100644
index 0000000..4d17d46
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
new file mode 100644
index 0000000..a099daa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+; The xcore target has no vector registers, so loop should not be vectorized.
+;CHECK-LABEL: @f(
+;CHECK: entry:
+;CHECK-NOT: vector.body
+;CHECK-NEXT: br label %do.body
+define void @f(i8* nocapture %ptr, i32 %len) {
+entry:
+  br label %do.body
+do.body:
+  %ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
+  %len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ]
+  %incdec.ptr = getelementptr inbounds i8* %ptr.addr.0, i32 1
+  store i8 0, i8* %ptr.addr.0, align 1
+  %dec = add nsw i32 %len.addr.0, -1
+  %tobool = icmp eq i32 %len.addr.0, 0
+  br i1 %tobool, label %do.end, label %do.body
+do.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll
new file mode 100644
index 0000000..84b0361
--- /dev/null
+++ b/test/Transforms/LoopVectorize/align.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we output the abi alignment if no alignment is specified.
+
+;CHECK-LABEL: @align
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: store <4 x i32> {{.*}} align  4
+
+define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %3 = load i32* %2
+  %4 = getelementptr inbounds i32* %c, i64 %indvars.iv
+  %5 = load i32* %4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %6, i32* %7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 128 
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
index a14b92d..7b71272 100644
--- a/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ;PR 15830.
 
-;CHECK: foo
+;CHECK-LABEL: @foo(
 ; When scalarizing stores we need to preserve the original order.
 ; Make sure that we are extracting in the correct order (0101, and not 0011).
 ;CHECK: extractelement <2 x i64> {{.*}}, i32 0
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index b69e72f..2497b25 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -42,13 +42,14 @@ attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
 
 !0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""}
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
 !3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
 !4 = metadata !{i32 786473, metadata !25}
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !8 = metadata !{metadata !9}
@@ -56,7 +57,7 @@ attributes #1 = { nounwind readnone }
 !10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0}
 !11 = metadata !{metadata !12, metadata !16, metadata !17}
 !12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 1024}
 !16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
@@ -66,3 +67,4 @@ attributes #1 = { nounwind readnone }
 !20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1}
 !24 = metadata !{i32 9, i32 0, metadata !3, null}
 !25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index 0a6fc4e..bf0b418 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -5,17 +5,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure we are preserving debug info in the vectorized code.
 
 ; CHECK: for.body.lr.ph
-; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !21
+; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg ![[LOC:[0-9]+]]
 ; CHECK: vector.body
-; CHECK:   index {{.*}}, !dbg !21
-; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg !22
-; CHECK:   load <2 x i32>* {{.*}}, !dbg !22
-; CHECK:   add <2 x i32> {{.*}}, !dbg !22
-; CHECK:   add i64 %index, 2, !dbg !21
-; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg !21
+; CHECK:   index {{.*}}, !dbg ![[LOC]]
+; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
+; CHECK:   load <2 x i32>* {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
+; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
 ; CHECK: middle.block
-; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg !22
-; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg !22
+; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
 
 define i32 @f(i32* nocapture %a, i32 %size) #0 {
 entry:
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
-  %0 = load i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %0 = load i32* %arrayidx, align 4, !dbg !22
   %add = add i32 %0, %sum.05, !dbg !22
   tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
@@ -61,7 +61,7 @@ attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
+!llvm.module.flags = !{!18, !27}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
 !1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
@@ -70,7 +70,7 @@ attributes #1 = { nounwind readnone }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
 !5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !11}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
@@ -86,7 +86,5 @@ attributes #1 = { nounwind readnone }
 !20 = metadata !{i32 4, i32 0, metadata !4, null}
 !21 = metadata !{i32 5, i32 0, metadata !17, null}
 !22 = metadata !{i32 6, i32 0, metadata !17, null}
-!23 = metadata !{metadata !"int", metadata !24}
-!24 = metadata !{metadata !"omnipotent char", metadata !25}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
 !26 = metadata !{i32 7, i32 0, metadata !4, null}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll
new file mode 100644
index 0000000..8a4f8ce
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ee-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; This test checks that we deal with an in-loop extractelement (for now, this
+; means not crashing by not vectorizing).
+; CHECK-LABEL: @_Z4foo1Pii(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @_Z4foo1Pii(i32* %A, i32 %n, <2 x i32> %q) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4
+  %q1 = extractelement <2 x i32> %q, i32 %n
+  %q2 = add nsw i32 %0, %q1
+  %add.i = add nsw i32 %q2, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable }
+
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
index 0fb929f..f1f068c 100644
--- a/test/Transforms/LoopVectorize/funcall.ll
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; contain a limited set of function calls and none of them sets the rounding
 ; mode, so vectorizing them is safe.
 
-; CHECK: test
+; CHECK-LABEL: @test(
 ; CHECK: <2 x double>
 
 define void @test(double* %d, double %t) {
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index ae72d3c..0118fb4 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 
@@ -336,9 +336,8 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias07(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
-
 define i32 @noAlias07(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
@@ -552,7 +551,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias11(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias11(i32 %a) #0 {
@@ -612,7 +611,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias12(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias12(i32 %a) #0 {
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
index fad1735..765e14d 100644
--- a/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @B = common global [1024 x float] zeroinitializer, align 16
 
 ; Make sure we can vectorize in the presence of hoistable conditional loads.
-; CHECK: hoist_cond_load
+; CHECK-LABEL: @hoist_cond_load(
 ; CHECK: load <2 x float>
 
 define void @hoist_cond_load() {
@@ -38,7 +38,7 @@ for.end:
 
 ; However, we can't hoist loads whose address we have not seen unconditionally
 ; accessed.
-; CHECK:     dont_hoist_cond_load
+; CHECK-LABEL: @dont_hoist_cond_load(
 ; CHECK-NOT: load <2 x float>
 
 define void @dont_hoist_cond_load() {
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 88e56b2..dbe0243 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -106,3 +106,66 @@ for.end:                                          ; preds = %for.inc, %entry
   ret i32 %sum.0.lcssa
 }
 
+@a = common global [1 x i32*] zeroinitializer, align 8
+@c = common global i32* null, align 8
+
+; We use to if convert this loop. This is not safe because there is a trapping
+; constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
+
+; Neither should we if-convert if there is an instruction operand that is a
+; trapping constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression2
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression2() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32))
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %cond.1, %cond.false ], [ %inc3, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 48bb438..50c3b6b 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Make sure that we can handle multiple integer induction variables.
-; CHECK: multi_int_induction
+; CHECK-LABEL: @multi_int_induction(
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:  %normalized.idx = sub i64 %index, 0
@@ -28,3 +28,83 @@ for.end:
   ret void
 }
 
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+
+; Make sure we remove unneeded vectorization of induction variables.
+; In order for instcombine to cleanup the vectorized induction variables that we
+; create in the loop vectorizer we need to perform some form of redundancy
+; elimination to get rid of multiple uses.
+
+; IND-LABEL: scalar_use
+
+; IND:     br label %vector.body
+; IND:     vector.body:
+;   Vectorized induction variable.
+; IND-NOT:  insertelement <2 x i64>
+; IND-NOT:  shufflevector <2 x i64>
+; IND:     br {{.*}}, label %vector.body
+
+define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
+
+
+; Make sure that the loop exit count computation does not overflow for i8 and
+; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
+; induction variable to a bigger type the exit count computation will overflow
+; to 0.
+; PR17532
+
+; CHECK-LABEL: i8_loop
+; CHECK; icmp eq i32 {{.*}}, 256
+define i32 @i8_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i8 %b.0, -1
+  %4 = icmp eq i8 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
+
+; CHECK-LABEL: i16_loop
+; CHECK; icmp eq i32 {{.*}}, 65536
+
+define i32 @i16_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i16 %b.0, -1
+  %4 = icmp eq i16 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 6141c39..9c8201a 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK-LABEL: @array_at_plus_one(
-;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
+;CHECK: trunc i64
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/infiniteloop.ll b/test/Transforms/LoopVectorize/infiniteloop.ll
index f6ab564..5c5e1a3 100644
--- a/test/Transforms/LoopVectorize/infiniteloop.ll
+++ b/test/Transforms/LoopVectorize/infiniteloop.ll
@@ -14,7 +14,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ;   // return SCEVCouldNotCompute.
 ; For an infinite loop SE can return any number.
 
-; CHECK: fn1
+; CHECK-LABEL: @fn1(
 define void @fn1()  {
 entry:
   store i64 0, i64* @a, align 8
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 95b53b7..c3d570c 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -468,6 +468,59 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fabs(double) nounwind readnone
 
+;CHECK-LABEL: @copysign_f32(
+;CHECK: llvm.copysign.v4f32
+;CHECK: ret void
+define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx1, align 4
+  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.copysign(double, double) nounwind readnone
+
 ;CHECK-LABEL: @floor_f32(
 ;CHECK: llvm.floor.v4f32
 ;CHECK: ret void
@@ -728,6 +781,58 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.nearbyint.f64(double) nounwind readnone
 
+;CHECK-LABEL: @round_f32(
+;CHECK: llvm.round.v4f32
+;CHECK: ret void
+define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.round.f32(float) nounwind readnone
+
+;CHECK-LABEL: @round_f64(
+;CHECK: llvm.round.v4f64
+;CHECK: ret void
+define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.round.f64(double) nounwind readnone
+
 ;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void
@@ -927,3 +1032,61 @@ for.end:                                          ; preds = %for.body
 declare float @fabsf(float) nounwind readnone
 
 declare double @llvm.pow.f64(double, double) nounwind readnone
+
+
+
+; Make sure we don't replace calls to functions with standard library function
+; signatures but defined with internal linkage.
+
+define internal float @roundf(float %x) nounwind readnone {
+  ret float 0.00000000
+}
+; CHECK-LABEL: internal_round
+; CHECK-NOT:  load <4 x float>
+
+define void @internal_round(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @roundf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Make sure we don't replace calls to functions with standard library names but
+; different signatures.
+
+declare void @round(double %f)
+
+; CHECK-LABEL: wrong_signature
+; CHECK-NOT:  load <4 x double>
+
+define void @wrong_signature(double* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  store double %0, double* %arrayidx, align 4
+  tail call void @round(double %0) nounwind readnone
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
index 87006ed..4f6f3b8 100644
--- a/test/Transforms/LoopVectorize/lifetime.ll
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Make sure we can vectorize loops which contain lifetime markers.
 
-; CHECK-LABEL: test
+; CHECK-LABEL: @test(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
@@ -33,7 +33,7 @@ for.end:
   ret void
 }
 
-; CHECK-LABEL: testbitcast
+; CHECK-LABEL: @testbitcast(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
@@ -63,7 +63,7 @@ for.end:
   ret void
 }
 
-; CHECK-LABEL: testloopvariant
+; CHECK-LABEL: @testloopvariant(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopVectorize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll
index b6d9e2e..21cb703 100644
--- a/test/Transforms/LoopVectorize/memdep.ll
+++ b/test/Transforms/LoopVectorize/memdep.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i] = A[i + 1] + 1;
 
-; CHECK: f1_vec
+; CHECK-LABEL: @f1_vec(
 ; CHECK: <2 x i32>
 
 define void @f1_vec(i32* %A) {
@@ -35,7 +35,7 @@ for.end:
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i+1] = A[i] + 1;
 
-; CHECK: f2_novec
+; CHECK-LABEL: @f2_novec(
 ; CHECK-NOT: <2 x i32>
 
 define void @f2_novec(i32* %A) {
@@ -61,7 +61,7 @@ for.end:
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i+2] = A[i] + 1;
 
-; CHECK: f3_vec_len
+; CHECK-LABEL: @f3_vec_len(
 ; CHECK: <2 x i32>
 
 ; WIDTH: f3_vec_len
@@ -96,7 +96,7 @@ for.end:
 ;     A[i] = B[i + 1];
 ;   }
 
-; CHECK: f5
+; CHECK-LABEL: @f5(
 ; CHECK-NOT: <2 x i32>
 
 define void @f5(i32*  %A, i32* %B) {
@@ -127,7 +127,7 @@ for.end:
 ;     tmp = a[i];
 ;   }
 
-; CHECK: f6
+; CHECK-LABEL: @f6
 ; CHECK-NOT: <2 x i32>
 
 define i32 @f6(i32* %a, i32 %tmp) {
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index bade561..0e47260 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red(i32 %max) {
 entry:
@@ -46,7 +46,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red_inverse_select(i32 %max) {
 entry:
@@ -74,7 +74,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red(i32 %max) {
 entry:
@@ -103,7 +103,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red_inverse_select(i32 %max) {
 entry:
@@ -133,7 +133,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red(i32 %max) {
 entry:
@@ -162,7 +162,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red_inverse_select(i32 %max) {
 entry:
@@ -190,7 +190,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red(i32 %max) {
 entry:
@@ -219,7 +219,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red_inverse_select(i32 %max) {
 entry:
@@ -248,7 +248,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sge_min_red(i32 %max) {
 entry:
@@ -277,7 +277,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sle_min_red(i32 %max) {
 entry:
@@ -306,7 +306,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @uge_min_red(i32 %max) {
 entry:
@@ -335,7 +335,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @ule_min_red(i32 %max) {
 entry:
@@ -416,7 +416,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float(float %max) #0 {
 entry:
@@ -442,7 +442,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float_ge(float %max) #0 {
 entry:
@@ -468,7 +468,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float(float %max) #0 {
 entry:
@@ -494,7 +494,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float_le(float %max) #0 {
 entry:
@@ -515,12 +515,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @unordered_max_red
+; CHECK-LABEL: @unordered_max_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float(float %max) #0 {
 entry:
@@ -546,7 +546,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float_ge(float %max) #0 {
 entry:
@@ -567,12 +567,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_unordered_max_red
+; CHECK-LABEL: @inverted_unordered_max_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float(float %max) #0 {
 entry:
@@ -598,7 +598,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float_le(float %max) #0 {
 entry:
@@ -627,7 +627,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float(float %min) #0 {
 entry:
@@ -653,7 +653,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float_le(float %min) #0 {
 entry:
@@ -679,7 +679,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float(float %min) #0 {
 entry:
@@ -705,7 +705,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float_ge(float %min) #0 {
 entry:
@@ -726,12 +726,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @unordered_min_red
+; CHECK-LABEL: @unordered_min_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float(float %min) #0 {
 entry:
@@ -757,7 +757,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float_le(float %min) #0 {
 entry:
@@ -778,12 +778,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_unordered_min_red
+; CHECK-LABEL: @inverted_unordered_min_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float(float %min) #0 {
 entry:
@@ -809,7 +809,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
 entry:
@@ -836,7 +836,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x double>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define double @min_red_double(double %min) #0 {
 entry:
@@ -882,4 +882,4 @@ for.end:
 }
 
 
-attributes #0 = { "no-nans-fp-math"="true" } 
+attributes #0 = { "no-nans-fp-math"="true" }
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index 6906195..7d836de 100644
--- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -28,10 +28,10 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [40000 x i8] addrspace(1)* @Y, i64 0, i64 %indvars.iv
-  %0 = load i8 addrspace(1)* %arrayidx, align 1, !tbaa !0
+  %0 = load i8 addrspace(1)* %arrayidx, align 1
   %add = add i8 %0, 1
   %arrayidx3 = getelementptr inbounds [40000 x i8]* @X, i64 0, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3, align 1, !tbaa !0
+  store i8 %add, i8* %arrayidx3, align 1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 40000
@@ -42,6 +42,3 @@ for.end:                                          ; preds = %for.body
 }
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index 66d5301..e572d1a 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -4,10 +4,10 @@
 ;  return std::accumulate(A, A + n, 0);
 ; }
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
 
 ;CHECK-LABEL: @sum_array(
+;CHECK: phi i64
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
@@ -31,3 +31,30 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
   %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
   ret i32 %.01.lcssa.i
 }
+
+; Same, but use a pointer with a different size.
+;CHECK-LABEL: @sum_array_as1(
+;CHECK: phi i16
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array_as1(i32 addrspace(1)* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32 addrspace(1)* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32 addrspace(1)* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32 addrspace(1)* %.03.i, i64 1
+  %7 = icmp eq i32 addrspace(1)* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 6f0357c..1f891ad 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -12,6 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; We used to vectorize this loop. But it has a value that is used outside of the
 ; and is not a recognized reduction variable "tmp17".
 
+; CHECK-LABEL: @main(
 ; CHECK-NOT: <2 x i32>
 
 define i32 @main()  {
@@ -38,4 +39,33 @@ f1.exit.loopexit:
   ret i32 %.lcssa
 }
 
+; Don't vectorize this loop. Its phi node (induction variable) has an outside
+; loop user. We currently don't handle this case.
+; PR17179
 
+; CHECK-LABEL: @test2(
+; CHECK-NOT:  <2 x
+
+@x1 = common global i32 0, align 4
+@x2 = common global i32 0, align 4
+@x0 = common global i32 0, align 4
+
+define i32 @test2()  {
+entry:
+  store i32 0, i32* @x1, align 4
+  %0 = load i32* @x0, align 4
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
+  %inc = add nsw i32 %inc7, 1
+  %cmp = icmp eq i32 %inc, 52
+  br i1 %cmp, label %for.end5, label %for.cond1.preheader
+
+for.end5:
+  %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
+  %xor = xor i32 %inc7.lcssa, %0
+  store i32 52, i32* @x1, align 4
+  store i32 1, i32* @x2, align 4
+  ret i32 %xor
+}
diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll
new file mode 100644
index 0000000..27030a2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/opt.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
+; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable vectorization in opt.
+
+; LOOPVEC:       add <2 x i32>
+; NOLOOPVEC-NOT: add <2 x i32>
+
+define i32 @vect(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
index 25599f8..15983f0 100644
--- a/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 @B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 
-;CHECK:_Z5test1v
+;CHECK-LABEL:@_Z5test1v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
@@ -29,7 +29,7 @@ define i32 @_Z5test1v() nounwind uwtable ssp {
   ret i32 0
 }
 
-;CHECK:_Z5test2v
+;CHECK-LABEL: @_Z5test2v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 18a0a93..791fce1 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -467,3 +467,30 @@ for.body:
 for.end:
   ret i32 %p.addr.02
 }
+
+; Don't vectorize a reduction value that is not the last in a reduction cyle. We
+; would loose iterations (VF-1) on the operations after that use.
+; PR17498
+
+; CHECK-LABEL: not_last_operation
+; CHECK-NOT: x i32>
+define i32 @not_last_operation(i32 %p, i32 %val) {
+entry:
+  %tobool = icmp eq i32 %p, 0
+  br label %for.body
+
+for.body:
+  %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
+  %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
+  %0 = zext i1 %tobool to i32
+  %inc4.1 = xor i32 %0, 1
+  %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
+  %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
+  %inc6.1 = add nsw i32 %inc613.1, 1
+  %exitcond.1 = icmp eq i32 %inc6.1, 22
+  br i1 %exitcond.1, label %exit, label %for.body
+
+exit:
+  %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
+  ret i32 %inc.2
+}
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 9e8c1b1..65ef95d 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure consecutive vector generates correct negative indices.
 ; PR15882
 
-; CHECK: reverse_induction_i64
+; CHECK-LABEL: @reverse_induction_i64(
 ; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3>
 ; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
 
@@ -29,7 +29,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i128
+; CHECK-LABEL: @reverse_induction_i128(
 ; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3>
 ; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7>
 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
@@ -52,7 +52,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i16
+; CHECK-LABEL: @reverse_induction_i16(
 ; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3>
 ; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7>
 
@@ -93,7 +93,7 @@ loopend:
 ;   }
 ; }
 
-; CHECK: reverse_forward_induction_i64_i8
+; CHECK-LABEL: @reverse_forward_induction_i64_i8(
 ; CHECK: vector.body
 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %normalized.idx = sub i64 %index, 0
@@ -120,7 +120,7 @@ while.end:
   ret void
 }
 
-; CHECK: reverse_forward_induction_i64_i8_signed
+; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:  %normalized.idx = sub i64 %index, 129
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
new file mode 100644
index 0000000..6c86561
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -0,0 +1,235 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Check vectorization that would ordinarily require a runtime bounds
+; check on the pointers when mixing address spaces. For now we cannot
+; assume address spaces do not alias, and we can't assume that
+; different pointers are directly comparable.
+;
+; These all test this basic loop for different combinations of address
+; spaces, and swapping in globals or adding noalias.
+;
+;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
+;{
+;    for (int i = 0; i < n; ++i)
+;    {
+;        a[i] = 3 * b[i];
+;    }
+;}
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+@g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
+@q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
+
+; Both parameters are unidentified objects with the same address
+; space, so this should vectorize normally.
+define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @foo(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Parameters are unidentified and different address spaces, so cannot vectorize.
+define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Swapped arguments should be the same
+define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; We should still be able to vectorize with noalias even if the
+; address spaces are different.
+define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+; CHECK-LABEL: @bar2(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Store to identified global with different address space. This isn't
+; generally safe and shouldn't be vectorized.
+define void @arst0(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+
+; Load from identified global with different address space.
+; This isn't generally safe and shouldn't be vectorized.
+define void @arst1(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Read and write to 2 identified globals in different address
+; spaces. This should be vectorized.
+define void @aoeu(i32 %n) #0 {
+; CHECK-LABEL: @aoeu(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(2)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
new file mode 100644
index 0000000..212b37c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -0,0 +1,142 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_1_1_1(
+; CHECK: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_1_0_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_2(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
+  %1 = load i32 addrspace(2)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 4145d13..a2b9ad9 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: add_ints
+;CHECK-LABEL: @add_ints(
 ;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 4772256..d15479d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -34,3 +34,31 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret i32 undef
 }
+
+; Make sure that we try to vectorize loops with a runtime check if the
+; dependency check fails.
+
+; CHECK-LABEL: test_runtime_check
+; CHECK:      <4 x float>
+define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index d783974..7370a6f 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; We are vectorizing with 6 runtime checks.
-;CHECK: func1x6
+;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
@@ -38,7 +38,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; We are not vectorizing with 12 runtime checks.
-;CHECK: func2x6
+;CHECK-LABEL: func2x6(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret
 define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
index 46ec28b..c950860 100644
--- a/test/Transforms/LoopVectorize/safegep.ll
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:
 
 ; PR16592
 
-; CHECK: safe
+; CHECK-LABEL: @safe(
 ; CHECK: <4 x float>
 
 define void @safe(float* %A, float* %B, float %K) {
@@ -34,7 +34,7 @@ return:
 
 ; In a non-default address space we don't have this rule.
 
-; CHECK: notsafe
+; CHECK-LABEL: @notsafe(
 ; CHECK-NOT: <4 x float>
 
 define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 7687738..683621a 100644
--- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx"
 @e = common global i32* null, align 8
 @c = common global i32 0, align 4
 
-; CHECK-LABEL-LABEL: @fn1(
+; CHECK-LABEL: @fn1(
 ; CHECK: vector.body
 define void @fn1() #0 {
 entry:
@@ -29,14 +29,14 @@ for.cond4.preheader:                              ; preds = %for.cond
   br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
 
 for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
-  %0 = load i32** @e, align 8, !tbaa !0
+  %0 = load i32** @e, align 8, !tbaa !4
   br label %for.cond7.preheader
 
 for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
   %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
   %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
   %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
-  %1 = load i32* @b, align 4, !tbaa !3
+  %1 = load i32* @b, align 4, !tbaa !5
   %tobool11 = icmp eq i32 %1, 0
   br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
 
@@ -49,7 +49,7 @@ for.body8:                                        ; preds = %for.body8.lr.ph, %f
   %i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
   %2 = trunc i64 %indvars.iv19 to i32
   %add10 = add i32 %add9, %2
-  store i32 %add10, i32* @f, align 4, !tbaa !3
+  store i32 %add10, i32* @f, align 4, !tbaa !5
   %idx.ext = sext i32 %add10 to i64
   %add.ptr = getelementptr inbounds i32* @a, i64 %idx.ext
   %tobool129 = icmp eq i32 %i.213, 0
@@ -63,9 +63,9 @@ for.body13:                                       ; preds = %for.body13.lr.ph, %
   %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
   %add.ptr.sum = add i64 %idx.ext, %indvars.iv
   %arrayidx = getelementptr inbounds i32* @a, i64 %add.ptr.sum
-  %4 = load i32* %arrayidx, align 4, !tbaa !3
+  %4 = load i32* %arrayidx, align 4, !tbaa !5
   %arrayidx15 = getelementptr inbounds i32* %0, i64 %indvars.iv
-  store i32 %4, i32* %arrayidx15, align 4, !tbaa !3
+  store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %5 = trunc i64 %indvars.iv.next to i32
   %tobool12 = icmp eq i32 %5, 0
@@ -75,17 +75,17 @@ for.cond11.for.inc19_crit_edge:                   ; preds = %for.body13
   br label %for.inc19
 
 for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
-  %6 = load i32* @c, align 4, !tbaa !3
+  %6 = load i32* @c, align 4, !tbaa !5
   %inc20 = add nsw i32 %6, 1
-  store i32 %inc20, i32* @c, align 4, !tbaa !3
+  store i32 %inc20, i32* @c, align 4, !tbaa !5
   %indvars.iv.next20 = add i64 %indvars.iv19, 1
-  %7 = load i32* @b, align 4, !tbaa !3
+  %7 = load i32* @b, align 4, !tbaa !5
   %tobool = icmp eq i32 %7, 0
   br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
 
 for.cond7.for.inc23_crit_edge:                    ; preds = %for.inc19
   %add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
-  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !0
+  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
   br label %for.inc23
 
 for.inc23:                                        ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader
@@ -110,4 +110,5 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index 0cfaabe..75beae8 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -44,3 +44,45 @@ for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %sum.0.lcssa
 }
+
+%struct.lit = type { i32 }
+
+; Verify that we still vectorize the access if the struct has the same size as
+; the loaded element.
+; struct lit {
+;  int x;
+; };
+;
+;
+; int bar(struct lit *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK-LABEL: @bar(
+;CHECK: load <4 x i32>
+;CHECK: ret
+define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
new file mode 100644
index 0000000..33f128d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load i32*
+;CHECK: load i32*
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index f376656..e8d3728 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; SCEVExpander::expandCodeFor would change a value (the start value of an
 ; induction) that we cached in the induction variable list.
 
-; CHECK: test_vh
+; CHECK-LABEL: @test_vh(
 ; CHECK-NOT: store <4 x i8> undef
 
 define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 2b8f3fd..7800469 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 
 ; This test checks that we add metadata to vectorized loops
-; CHECK: _Z4foo1Pii
+; CHECK-LABEL: @_Z4foo1Pii(
 ; CHECK: <4 x i32>
 ; CHECK: llvm.loop
 ; CHECK: ret
@@ -41,7 +41,7 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 }
 
 ; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata.
-; CHECK: _Z4foo2Pii
+; CHECK-LABEL: @_Z4foo2Pii(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: llvm.loop
 ; CHECK: ret
@@ -68,9 +68,10 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
-; CHECK: !0 = metadata !{metadata !0, metadata !1}
+; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
 ; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: !2 = metadata !{metadata !2, metadata !1}
+; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
 !0 = metadata !{metadata !0, metadata !1}
 !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerAtomic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerInvoke/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index cc77d3c..e85f03e 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -7,88 +7,88 @@
 ;CHECK-NEXT:   br label %NodeBlock37
 
 ;CHECK:      NodeBlock37:                                      ; preds = %entry
-;CHECK-NEXT:   %Pivot38 = icmp ult i32 %tmp158, 11
+;CHECK-NEXT:   %Pivot38 = icmp slt i32 %tmp158, 10
 ;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
 
 ;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot36 = icmp ult i32 %tmp158, 14
+;CHECK-NEXT:   %Pivot36 = icmp slt i32 %tmp158, 13
 ;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
 
 ;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot34 = icmp ult i32 %tmp158, 15
+;CHECK-NEXT:   %Pivot34 = icmp slt i32 %tmp158, 14
 ;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
 
 ;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %Pivot32 = icmp ult i32 %tmp158, -6
+;CHECK-NEXT:   %Pivot32 = icmp slt i32 %tmp158, 15
 ;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
 
 ;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
-;CHECK-NEXT:   %SwitchLeaf30 = icmp ule i32 %tmp158.off, 4
-;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf30 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb334, label %NewDefault
 
 ;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb334, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 14
+;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb332, label %NewDefault
 
 ;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb332, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 13
+;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb330, label %NewDefault
 
 ;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot24 = icmp ult i32 %tmp158, 12
+;CHECK-NEXT:   %Pivot24 = icmp slt i32 %tmp158, 11
 ;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
 
 ;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %Pivot22 = icmp ult i32 %tmp158, 13
+;CHECK-NEXT:   %Pivot22 = icmp slt i32 %tmp158, 12
 ;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
 
 ;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb330, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 12
+;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb328, label %NewDefault
 
 ;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb328, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 11
+;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb326, label %NewDefault
 
 ;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb326, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 10
+;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb324, label %NewDefault
 
 ;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot14 = icmp ult i32 %tmp158, 8
+;CHECK-NEXT:   %Pivot14 = icmp slt i32 %tmp158, 7
 ;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
 
 ;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot12 = icmp ult i32 %tmp158, 9
+;CHECK-NEXT:   %Pivot12 = icmp slt i32 %tmp158, 8
 ;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
 
 ;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %Pivot10 = icmp ult i32 %tmp158, 10
+;CHECK-NEXT:   %Pivot10 = icmp slt i32 %tmp158, 9
 ;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
 
 ;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb324, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 9
+;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb322, label %NewDefault
 
 ;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb322, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 8
+;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 7
+;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb, label %NewDefault
 
 ;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot = icmp ult i32 %tmp158, 7
+;CHECK-NEXT:   %Pivot = icmp slt i32 %tmp158, 0
 ;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
 
 ;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf2 = icmp eq i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf2 = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158.off, 4
 ;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerSwitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 5754fcd..33eaed6 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -33,12 +33,13 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !12, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
 !6 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -48,3 +49,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !11 = metadata !{i32 786443, metadata !12, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
 !13 = metadata !{i32 0}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 49dcb04..32acdd6 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -31,18 +31,19 @@ return:                                           ; preds = %entry
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!22}
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
 !6 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
 !9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
 !13 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786447, metadata !20, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -53,3 +54,4 @@ return:                                           ; preds = %entry
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
 !20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
new file mode 100644
index 0000000..5e4f9bf
--- /dev/null
+++ b/test/Transforms/Mem2Reg/ignore-lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; CHECK: test1
+; CHECK-NOT: alloca
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; CHECK: test2
+; CHECK-NOT: alloca
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Mem2Reg/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll
deleted file mode 100644
index b08b1f1..0000000
--- a/test/Transforms/Mem2Reg/use-analysis.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
-
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
-declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
-
-define void @test1() {
-; Ensure we can look through a bitcast to i8* and the addition of lifetime
-; markers.
-;
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
-  %A = alloca i32
-  %B = bitcast i32* %A to i8*
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store i32 1, i32* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
-
-define void @test2() {
-; Ensure we can look through a GEP to i8* and the addition of lifetime
-; markers.
-;
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
-  %A = alloca {i8, i16}
-  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store {i8, i16} zeroinitializer, {i8, i16}* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
-
-define i32 @test3(i32 %x) {
-; CHECK-LABEL: @test3(
-;
-; Check that we recursively walk the uses of the alloca and thus can see
-; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime
-; markers.
-entry:
-  %a = alloca i32
-; CHECK-NOT: alloca
-
-  %b = bitcast i32* %a to i8*
-  %b2 = getelementptr inbounds i8* %b, i32 0
-  %b3 = getelementptr inbounds i8* %b2, i32 0
-  call void @llvm.lifetime.start(i64 -1, i8* %b3)
-; CHECK-NOT: call void @llvm.lifetime.start
-
-  store i32 %x, i32* %a
-; CHECK-NOT: store
-
-  %dead = bitcast i32* %a to i4096*
-  %dead1 = bitcast i4096* %dead to i42*
-  %dead2 = getelementptr inbounds i32* %a, i32 %x
-; CHECK-NOT: bitcast
-; CHECK-NOT: getelementptr
-
-  %ret = load i32* %a
-; CHECK-NOT: load
-
-  ret i32 %ret
-; CHECK: ret i32 %x
-}
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/MemCpyOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/address-spaces.ll b/test/Transforms/MergeFunc/address-spaces.ll
new file mode 100644
index 0000000..0d66b82
--- /dev/null
+++ b/test/Transforms/MergeFunc/address-spaces.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "p:32:32:32-p1:32:32:32-p2:16:16:16"
+
+declare void @foo(i32) nounwind
+
+; None of these functions should be merged
+
+define i32 @store_as0(i32* %x) {
+; CHECK-LABEL: @store_as0(
+; CHECK: call void @foo(
+  %gep = getelementptr i32* %x, i32 4
+  %y = load i32* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as1(i32 addrspace(1)* %x) {
+; CHECK-LABEL: @store_as1(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(1)* %x, i32 4
+  %y = load i32 addrspace(1)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as2(i32 addrspace(2)* %x) {
+; CHECK-LABEL: @store_as2(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(2)* %x, i32 4
+  %y = load i32 addrspace(2)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
new file mode 100644
index 0000000..0d834bc
--- /dev/null
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -0,0 +1,29 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-n8:16:32-S128"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external addrspace(1) constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
+
+
+define internal hidden i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32 addrspace(1)* %tmp, align 4
+  ret i32 %tmp1
+}
+
+; Check for pointer bitwidth equal assertion failure
+define internal hidden i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
+bb:
+; CHECK-LABEL: @func35(
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585 addrspace(1)* %this, i32 0, i32 2
+  %tmp1 = load i8* addrspace(1)* %tmp, align 4
+  ret i8* %tmp1
+}
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 93250fa..6a69e3f 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -46,6 +46,7 @@ bb:
 
 define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
 bb:
+; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
 ; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/MergeFunc/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/merge-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
new file mode 100644
index 0000000..4e887ce
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+declare void @stuff()
+
+; CHECK-LABEL: @f0(
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f1(
+; CHECK: ptrtoint i64*
+; CHECK: tail call void @f0(i64
+
+define void @f1(i64* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
new file mode 100644
index 0000000..d6ff10f
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
new file mode 100644
index 0000000..c9fb6a6
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f0
+; CHECK:  %2 = ptrtoint i64* %0 to i64
+; CHECK:  tail call void @f0(i64 %2)
+; CHECK:  ret void
+define void @f1(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
new file mode 100644
index 0000000..8f00f03
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/too-small.ll b/test/Transforms/MergeFunc/too-small.ll
new file mode 100644
index 0000000..1a526ff
--- /dev/null
+++ b/test/Transforms/MergeFunc/too-small.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: call
+  ret void
+}
+
+define void @bar(i32 %x) {
+; CHECK-LABEL: @bar(
+; CHECK-NOT: call
+  ret void
+}
+
diff --git a/test/Transforms/MetaRenamer/lit.local.cfg b/test/Transforms/MetaRenamer/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/MetaRenamer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 12af354..885935c 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -182,7 +182,7 @@ if.end5:                                          ; preds = %if.then3, %if.end
 ; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE:[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b_imprecise(i8* %x, i1 %p, i1 %q) {
@@ -1357,55 +1357,6 @@ C:
   ret void
 }
 
-; Optimize objc_retainBlock.
-
-; CHECK-LABEL: define void @test23(
-; CHECK-NOT: @objc_
-; CHECK: }
-%block0 = type { i64, i64, i8*, i8* }
-%block1 = type { i8**, i32, i32, i32 (%struct.__block_literal_1*)*, %block0* }
-%struct.__block_descriptor = type { i64, i64 }
-%struct.__block_literal_1 = type { i8**, i32, i32, i8**, %struct.__block_descriptor* }
-@__block_holder_tmp_1 = external constant %block1
-define void @test23() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, but do strength reduce it.
-
-; CHECK: define void @test23b(i8* %p) {
-; CHECK: @objc_retain
-; CHECK: @objc_release
-; CHECK: }
-define void @test23b(i8* %p) {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
-  call void @callee()
-  call void @use_pointer(i8* %p)
-  call void @objc_release(i8* %p) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
-
-; CHECK-LABEL: define void @test23c(
-; CHECK: @objc_retainBlock
-; CHECK: @objc_release
-; CHECK: }
-define void @test23c() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
 ; Any call can decrement a retain count.
 
 ; CHECK-LABEL: define void @test24(
@@ -2251,7 +2202,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK-LABEL: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: ret void
 ; CHECK: }
 define void @test54() {
@@ -2285,7 +2236,7 @@ entry:
 ; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -3058,7 +3009,11 @@ define void @test67(i8* %x) {
   ret void
 }
 
+!llvm.module.flags = !{!1}
+
 !0 = metadata !{}
+!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
 
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: ![[RELEASE]] = metadata !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 96a7d3e..0728617 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -111,14 +111,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!33, !34, !35, !36}
+!llvm.module.flags = !{!33, !34, !35, !36, !61}
 
 !0 = metadata !{i32 786449, metadata !60, i32 16, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !27}
 !5 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
 !6 = metadata !{i32 786473, metadata !60} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
@@ -127,11 +127,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{i32 786443, metadata !60, metadata !5, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !14 = metadata !{i32 786454, metadata !60, null, metadata !"id", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
 !15 = metadata !{i32 786447, metadata !60, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786445, metadata !60, metadata !16, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !19 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
 !22 = metadata !{i32 786443, metadata !60, metadata !13, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !23 = metadata !{i32 786454, metadata !60, null, metadata !"BOOL", i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
@@ -139,7 +139,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
 !26 = metadata !{i32 786443, metadata !60, metadata !22, i32 14, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !27 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !14}
 !30 = metadata !{metadata !31}
 !31 = metadata !{metadata !32}
@@ -171,3 +171,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !57 = metadata !{i32 786443, metadata !60, metadata !27, i32 5, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !58 = metadata !{i32 7, i32 0, metadata !57, null}
 !60 = metadata !{metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997"}
+!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ObjCARC/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
deleted file mode 100644
index 2a56371..0000000
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ /dev/null
@@ -1,123 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
-%struct.__block_descriptor = type { i64, i64 }
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-
-; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
-; metadata and eliminate the retainBlock+release pair here.
-; rdar://10803830.
-
-; CHECK-LABEL: define void @test0(
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
-; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
-; to an objc_retain.
-
-; CHECK-LABEL: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retain(
-; CHECK: invoke
-; CHECK: call void @objc_release(
-; CHECK: }
-define void @test0_no_metadata() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-declare i8* @objc_retainBlock(i8*)
-declare void @objc_release(i8*)
-declare void @_Block_object_dispose(i8*, i32)
-declare i32 @__objc_personality_v0(...)
-declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
-
-!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/path-overflow.ll b/test/Transforms/ObjCARC/path-overflow.ll
index 605e860..3c14353 100644
--- a/test/Transforms/ObjCARC/path-overflow.ll
+++ b/test/Transforms/ObjCARC/path-overflow.ll
@@ -1,6 +1,8 @@
 ; RUN: opt -objc-arc -S < %s
 ; rdar://12277446
 ; rdar://12480535
+; rdar://14590914
+; rdar://15377890
 
 ; The total number of paths grows exponentially with the number of branches, and a
 ; computation of this number can overflow any reasonable fixed-sized
@@ -10,14 +12,22 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios5.0.0"
 
-%struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768 = type { i32*, i32, i8*, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+%struct.CGPoint = type { float, float }
 
-@_unnamed_cfstring_591 = external constant %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768, section "__DATA,__cfstring"
+@_unnamed_cfstring = external constant %struct.NSConstantString, section "__DATA,__cfstring"
+@_unnamed_cfstring_2 = external constant %struct.NSConstantString, section "__DATA,__cfstring"
 
 declare i8* @objc_retain(i8*) nonlazybind
 declare i8* @objc_retainAutoreleasedReturnValue(i8*) nonlazybind
 declare void @objc_release(i8*) nonlazybind
 declare i8* @returner()
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare void @NSLog(i8*, ...)
+declare void @objc_msgSend_stret(i8*, i8*, ...)
+declare i32 @__gxx_personality_sj0(...)
+declare i32 @__objc_personality_v0(...)
+
 
 define hidden void @test1() {
 entry:
@@ -30,7 +40,7 @@ msgSend.nullinit:                                 ; preds = %entry
   br label %msgSend.cont
 
 msgSend.cont:                                     ; preds = %msgSend.nullinit, %msgSend.call
-  %0 = bitcast %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768* @_unnamed_cfstring_591 to i8*
+  %0 = bitcast %struct.NSConstantString* @_unnamed_cfstring to i8*
   %1 = call i8* @objc_retain(i8* %0) nounwind
   br i1 undef, label %msgSend.nullinit33, label %msgSend.call32
 
@@ -853,5 +863,1331 @@ bb222:                                            ; preds = %bb20, %bb19
   ret void
 }
 
+; Function Attrs: ssp
+define void @test3() #1 {
+entry:
+  %call2 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %call5 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4:                                     ; preds = %invoke.cont
+  br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %invoke.cont4
+  %call7 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %land.end unwind label %lpad3
+
+land.end:                                         ; preds = %land.rhs, %invoke.cont4
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i unwind label %lpad.i
+
+invoke.cont.i:                                    ; preds = %land.end
+  br i1 undef, label %invoke.cont8, label %if.then.i
+
+if.then.i:                                        ; preds = %invoke.cont.i
+  br label %invoke.cont8
+
+lpad.i:                                           ; preds = %land.end
+  %tmp13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont8:                                     ; preds = %if.then.i, %invoke.cont.i
+  %call18 = invoke i8* (i8*, i8*, i8*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, ...)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont17 unwind label %lpad16
+
+invoke.cont17:                                    ; preds = %invoke.cont8
+  %call22 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont21 unwind label %lpad20
+
+invoke.cont21:                                    ; preds = %invoke.cont17
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1980 unwind label %lpad.i1982
+
+invoke.cont.i1980:                                ; preds = %invoke.cont21
+  br i1 undef, label %invoke.cont24, label %if.then.i1981
+
+if.then.i1981:                                    ; preds = %invoke.cont.i1980
+  br label %invoke.cont24
+
+lpad.i1982:                                       ; preds = %invoke.cont21
+  %tmp28 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont24:                                    ; preds = %if.then.i1981, %invoke.cont.i1980
+  %call37 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont24
+  br i1 undef, label %land.end43, label %land.rhs39
+
+land.rhs39:                                       ; preds = %invoke.cont36
+  %call41 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end43 unwind label %lpad35
+
+land.end43:                                       ; preds = %land.rhs39, %invoke.cont36
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1986 unwind label %lpad.i1988
+
+invoke.cont.i1986:                                ; preds = %land.end43
+  br i1 undef, label %invoke.cont44, label %if.then.i1987
+
+if.then.i1987:                                    ; preds = %invoke.cont.i1986
+  br label %invoke.cont44
+
+lpad.i1988:                                       ; preds = %land.end43
+  %tmp42 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.then.i1987, %invoke.cont.i1986
+  %call53 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont52 unwind label %lpad51
+
+invoke.cont52:                                    ; preds = %invoke.cont44
+  br i1 undef, label %land.end70, label %land.rhs58
+
+land.rhs58:                                       ; preds = %invoke.cont52
+  %call63 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont62 unwind label %lpad61
+
+invoke.cont62:                                    ; preds = %land.rhs58
+  %call68 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %land.end70 unwind label %lpad66.body.thread
+
+land.end70:                                       ; preds = %invoke.cont62, %invoke.cont52
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1992 unwind label %lpad66.body
+
+invoke.cont.i1992:                                ; preds = %land.end70
+  br i1 undef, label %invoke.cont71, label %if.then.i1993
+
+if.then.i1993:                                    ; preds = %invoke.cont.i1992
+  br label %invoke.cont71
+
+invoke.cont71:                                    ; preds = %if.then.i1993, %invoke.cont.i1992
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1998 unwind label %lpad.i2000
+
+invoke.cont.i1998:                                ; preds = %invoke.cont71
+  br i1 undef, label %invoke.cont91, label %if.then.i1999
+
+if.then.i1999:                                    ; preds = %invoke.cont.i1998
+  br label %invoke.cont91
+
+lpad.i2000:                                       ; preds = %invoke.cont71
+  %tmp74 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+invoke.cont91:                                    ; preds = %if.then.i1999, %invoke.cont.i1998
+  %call96 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont95 unwind label %lpad94
+
+invoke.cont95:                                    ; preds = %invoke.cont91
+  %call98 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* %call96)
+          to label %invoke.cont97 unwind label %lpad94
+
+invoke.cont97:                                    ; preds = %invoke.cont95
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2004 unwind label %lpad.i2006
+
+invoke.cont.i2004:                                ; preds = %invoke.cont97
+  br i1 undef, label %invoke.cont100, label %if.then.i2005
+
+if.then.i2005:                                    ; preds = %invoke.cont.i2004
+  br label %invoke.cont100
+
+lpad.i2006:                                       ; preds = %invoke.cont97
+  %tmp82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont100:                                   ; preds = %if.then.i2005, %invoke.cont.i2004
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont110 unwind label %lpad109
+
+invoke.cont110:                                   ; preds = %invoke.cont100
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2010 unwind label %lpad.i2012
+
+invoke.cont.i2010:                                ; preds = %invoke.cont110
+  br i1 undef, label %invoke.cont117, label %if.then.i2011
+
+if.then.i2011:                                    ; preds = %invoke.cont.i2010
+  br label %invoke.cont117
+
+lpad.i2012:                                       ; preds = %invoke.cont110
+  %tmp98 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont117:                                   ; preds = %if.then.i2011, %invoke.cont.i2010
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2022 unwind label %lpad156.body
+
+lpad:                                             ; preds = %entry
+  %tmp118 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+lpad3:                                            ; preds = %land.rhs, %invoke.cont
+  %tmp119 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+ehcleanup:                                        ; preds = %lpad3, %lpad
+  unreachable
+
+lpad16:                                           ; preds = %invoke.cont8
+  %tmp121 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+lpad20:                                           ; preds = %invoke.cont17
+  %tmp122 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+ehcleanup26:                                      ; preds = %lpad20, %lpad16
+  unreachable
+
+lpad35:                                           ; preds = %land.rhs39, %invoke.cont24
+  %tmp124 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad51:                                           ; preds = %invoke.cont44
+  %tmp125 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad61:                                           ; preds = %land.rhs58
+  %tmp127 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body.thread:                               ; preds = %invoke.cont62
+  %tmp128 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body:                                      ; preds = %land.end70
+  %tmp129 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad94:                                           ; preds = %invoke.cont95, %invoke.cont91
+  %tmp133 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+ehcleanup102:                                     ; preds = %lpad94, %lpad.i2000
+  unreachable
+
+lpad109:                                          ; preds = %invoke.cont100
+  %tmp134 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont.i2022:                                ; preds = %invoke.cont117
+  br i1 undef, label %invoke.cont157, label %if.then.i2023
+
+if.then.i2023:                                    ; preds = %invoke.cont.i2022
+  br label %invoke.cont157
+
+invoke.cont157:                                   ; preds = %if.then.i2023, %invoke.cont.i2022
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2028 unwind label %lpad164.body
+
+invoke.cont.i2028:                                ; preds = %invoke.cont157
+  br i1 undef, label %invoke.cont165, label %if.then.i2029
+
+if.then.i2029:                                    ; preds = %invoke.cont.i2028
+  br label %invoke.cont165
+
+invoke.cont165:                                   ; preds = %if.then.i2029, %invoke.cont.i2028
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, void (i8*, i8*)*)*)(i8* undef, i8* undef, void (i8*, i8*)* undef)
+          to label %invoke.cont184 unwind label %lpad183
+
+invoke.cont184:                                   ; preds = %invoke.cont165
+  %call186 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont185 unwind label %lpad183
+
+invoke.cont185:                                   ; preds = %invoke.cont184
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2034 unwind label %lpad.i2036
+
+invoke.cont.i2034:                                ; preds = %invoke.cont185
+  br i1 undef, label %invoke.cont190, label %if.then.i2035
+
+if.then.i2035:                                    ; preds = %invoke.cont.i2034
+  br label %invoke.cont190
+
+lpad.i2036:                                       ; preds = %invoke.cont185
+  %tmp168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+invoke.cont190:                                   ; preds = %if.then.i2035, %invoke.cont.i2034
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont197 unwind label %lpad196
+
+invoke.cont197:                                   ; preds = %invoke.cont190
+  %call202 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont201 unwind label %lpad200
+
+invoke.cont201:                                   ; preds = %invoke.cont197
+  %call205 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont204 unwind label %lpad203
+
+invoke.cont204:                                   ; preds = %invoke.cont201
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2040 unwind label %lpad.i2042
+
+invoke.cont.i2040:                                ; preds = %invoke.cont204
+  br i1 undef, label %invoke.cont207, label %if.then.i2041
+
+if.then.i2041:                                    ; preds = %invoke.cont.i2040
+  br label %invoke.cont207
+
+lpad.i2042:                                       ; preds = %invoke.cont204
+  %tmp181 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont207:                                   ; preds = %if.then.i2041, %invoke.cont.i2040
+  %call209 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont208 unwind label %lpad203
+
+invoke.cont208:                                   ; preds = %invoke.cont207
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2046 unwind label %lpad212.body
+
+invoke.cont.i2046:                                ; preds = %invoke.cont208
+  br i1 undef, label %invoke.cont213, label %if.then.i2047
+
+if.then.i2047:                                    ; preds = %invoke.cont.i2046
+  br label %invoke.cont213
+
+invoke.cont213:                                   ; preds = %if.then.i2047, %invoke.cont.i2046
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont221 unwind label %lpad220
+
+invoke.cont221:                                   ; preds = %invoke.cont213
+  %call229 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont228 unwind label %lpad227
+
+invoke.cont228:                                   ; preds = %invoke.cont221
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2052 unwind label %lpad.i2054
+
+invoke.cont.i2052:                                ; preds = %invoke.cont228
+  br i1 undef, label %invoke.cont231, label %if.then.i2053
+
+if.then.i2053:                                    ; preds = %invoke.cont.i2052
+  br label %invoke.cont231
+
+lpad.i2054:                                       ; preds = %invoke.cont228
+  %tmp198 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont231:                                   ; preds = %if.then.i2053, %invoke.cont.i2052
+  %call233 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont232 unwind label %lpad227
+
+invoke.cont232:                                   ; preds = %invoke.cont231
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2058 unwind label %lpad236.body
+
+invoke.cont.i2058:                                ; preds = %invoke.cont232
+  br i1 undef, label %invoke.cont237, label %if.then.i2059
+
+if.then.i2059:                                    ; preds = %invoke.cont.i2058
+  br label %invoke.cont237
+
+invoke.cont237:                                   ; preds = %if.then.i2059, %invoke.cont.i2058
+  %call246 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont245 unwind label %lpad244
+
+invoke.cont245:                                   ; preds = %invoke.cont237
+  %call248 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 13)
+          to label %invoke.cont247 unwind label %lpad244
+
+invoke.cont247:                                   ; preds = %invoke.cont245
+  %call251 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 2)
+          to label %invoke.cont250 unwind label %lpad249
+
+invoke.cont250:                                   ; preds = %invoke.cont247
+  %call254 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 7)
+          to label %invoke.cont253 unwind label %lpad252
+
+invoke.cont253:                                   ; preds = %invoke.cont250
+  %call257 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i32 3)
+          to label %invoke.cont256 unwind label %lpad255
+
+invoke.cont256:                                   ; preds = %invoke.cont253
+  %call260 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %invoke.cont256
+  %call267 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont266 unwind label %lpad265
+
+invoke.cont266:                                   ; preds = %invoke.cont259
+  %call275 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont274 unwind label %lpad273
+
+invoke.cont274:                                   ; preds = %invoke.cont266
+  %call279 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont278 unwind label %lpad277
+
+invoke.cont278:                                   ; preds = %invoke.cont274
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2064 unwind label %lpad.i2066
+
+invoke.cont.i2064:                                ; preds = %invoke.cont278
+  br i1 undef, label %invoke.cont281, label %if.then.i2065
+
+if.then.i2065:                                    ; preds = %invoke.cont.i2064
+  br label %invoke.cont281
+
+lpad.i2066:                                       ; preds = %invoke.cont278
+  %tmp253 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont281:                                   ; preds = %if.then.i2065, %invoke.cont.i2064
+  %call291 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont290 unwind label %lpad289
+
+invoke.cont290:                                   ; preds = %invoke.cont281
+  %call303 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 8)
+          to label %invoke.cont302 unwind label %lpad301
+
+invoke.cont302:                                   ; preds = %invoke.cont290
+  %call310 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, double)*)(i8* undef, i8* undef, double 5.000000e-01)
+          to label %invoke.cont309 unwind label %lpad308
+
+invoke.cont309:                                   ; preds = %invoke.cont302
+  %call313 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont312 unwind label %lpad311
+
+invoke.cont312:                                   ; preds = %invoke.cont309
+  %call316 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i8** undef, i32 2)
+          to label %invoke.cont315 unwind label %lpad314
+
+invoke.cont315:                                   ; preds = %invoke.cont312
+  %call322 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont321 unwind label %lpad320
+
+invoke.cont321:                                   ; preds = %invoke.cont315
+  br i1 undef, label %land.end344, label %land.rhs335
+
+land.rhs335:                                      ; preds = %invoke.cont321
+  %call342 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end344 unwind label %lpad340.body.thread
+
+land.end344:                                      ; preds = %land.rhs335, %invoke.cont321
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2070 unwind label %lpad340.body
+
+invoke.cont.i2070:                                ; preds = %land.end344
+  br i1 undef, label %invoke.cont345, label %if.then.i2071
+
+if.then.i2071:                                    ; preds = %invoke.cont.i2070
+  br label %invoke.cont345
+
+invoke.cont345:                                   ; preds = %if.then.i2071, %invoke.cont.i2070
+  %call362 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont361 unwind label %lpad360
+
+invoke.cont361:                                   ; preds = %invoke.cont345
+  %call365 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont364 unwind label %lpad363
+
+invoke.cont364:                                   ; preds = %invoke.cont361
+  %call371 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont370 unwind label %lpad369
+
+invoke.cont370:                                   ; preds = %invoke.cont364
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2076 unwind label %lpad.i2078
+
+invoke.cont.i2076:                                ; preds = %invoke.cont370
+  br i1 undef, label %invoke.cont373, label %if.then.i2077
+
+if.then.i2077:                                    ; preds = %invoke.cont.i2076
+  br label %invoke.cont373
+
+lpad.i2078:                                       ; preds = %invoke.cont370
+  %tmp340 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont373:                                   ; preds = %if.then.i2077, %invoke.cont.i2076
+  %call377 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32, i8*)*)(i8* undef, i8* undef, i32 42, i8* undef)
+          to label %invoke.cont376 unwind label %lpad363
+
+invoke.cont376:                                   ; preds = %invoke.cont373
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 5)
+          to label %invoke.cont382 unwind label %lpad381
+
+invoke.cont382:                                   ; preds = %invoke.cont376
+  %call384 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont383 unwind label %lpad381
+
+invoke.cont383:                                   ; preds = %invoke.cont382
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2082 unwind label %lpad.i2084
+
+invoke.cont.i2082:                                ; preds = %invoke.cont383
+  br i1 undef, label %invoke.cont392, label %if.then.i2083
+
+if.then.i2083:                                    ; preds = %invoke.cont.i2082
+  br label %invoke.cont392
+
+lpad.i2084:                                       ; preds = %invoke.cont383
+  %tmp360 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont392:                                   ; preds = %if.then.i2083, %invoke.cont.i2082
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont395 unwind label %lpad381
+
+invoke.cont395:                                   ; preds = %invoke.cont392
+  %call397 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont396 unwind label %lpad381
+
+invoke.cont396:                                   ; preds = %invoke.cont395
+  %call400 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont399 unwind label %lpad398
+
+invoke.cont399:                                   ; preds = %invoke.cont396
+  %call403 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont402 unwind label %lpad401
+
+invoke.cont402:                                   ; preds = %invoke.cont399
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2088 unwind label %lpad.i2090
+
+invoke.cont.i2088:                                ; preds = %invoke.cont402
+  br i1 undef, label %invoke.cont405, label %if.then.i2089
+
+if.then.i2089:                                    ; preds = %invoke.cont.i2088
+  br label %invoke.cont405
+
+lpad.i2090:                                       ; preds = %invoke.cont402
+  %tmp370 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont405:                                   ; preds = %if.then.i2089, %invoke.cont.i2088
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont408 unwind label %lpad381
+
+invoke.cont408:                                   ; preds = %invoke.cont405
+  %call410 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont409 unwind label %lpad381
+
+invoke.cont409:                                   ; preds = %invoke.cont408
+  %call413 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont412 unwind label %lpad411
+
+invoke.cont412:                                   ; preds = %invoke.cont409
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2094 unwind label %lpad.i2096
+
+invoke.cont.i2094:                                ; preds = %invoke.cont412
+  br i1 undef, label %invoke.cont418, label %if.then.i2095
+
+if.then.i2095:                                    ; preds = %invoke.cont.i2094
+  br label %invoke.cont418
+
+lpad.i2096:                                       ; preds = %invoke.cont412
+  %tmp380 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont418:                                   ; preds = %if.then.i2095, %invoke.cont.i2094
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 0)
+          to label %invoke.cont422 unwind label %lpad381
+
+invoke.cont422:                                   ; preds = %invoke.cont418
+  %call424 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont423 unwind label %lpad381
+
+invoke.cont423:                                   ; preds = %invoke.cont422
+  %call427 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont426 unwind label %lpad425
+
+invoke.cont426:                                   ; preds = %invoke.cont423
+  %call430 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont429 unwind label %lpad428
+
+invoke.cont429:                                   ; preds = %invoke.cont426
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2100 unwind label %lpad.i2102
+
+invoke.cont.i2100:                                ; preds = %invoke.cont429
+  br i1 undef, label %invoke.cont432, label %if.then.i2101
+
+if.then.i2101:                                    ; preds = %invoke.cont.i2100
+  br label %invoke.cont432
+
+lpad.i2102:                                       ; preds = %invoke.cont429
+  %tmp390 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont432:                                   ; preds = %if.then.i2101, %invoke.cont.i2100
+  %call436 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont435 unwind label %lpad381
+
+invoke.cont435:                                   ; preds = %invoke.cont432
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2106 unwind label %lpad.i2108
+
+invoke.cont.i2106:                                ; preds = %invoke.cont435
+  %call444 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 5)
+          to label %invoke.cont443 unwind label %lpad381
+
+lpad.i2108:                                       ; preds = %invoke.cont435
+  %tmp396 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont443:                                   ; preds = %invoke.cont.i2106
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2112 unwind label %lpad.i2114
+
+invoke.cont.i2112:                                ; preds = %invoke.cont443
+  br i1 undef, label %invoke.cont449, label %if.then.i2113
+
+if.then.i2113:                                    ; preds = %invoke.cont.i2112
+  br label %invoke.cont449
+
+lpad.i2114:                                       ; preds = %invoke.cont443
+  %tmp402 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont449:                                   ; preds = %if.then.i2113, %invoke.cont.i2112
+  %call453 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont452 unwind label %lpad381
+
+invoke.cont452:                                   ; preds = %invoke.cont449
+  %call456 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont455 unwind label %lpad454
+
+invoke.cont455:                                   ; preds = %invoke.cont452
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2118 unwind label %lpad.i2120
+
+invoke.cont.i2118:                                ; preds = %invoke.cont455
+  br i1 undef, label %invoke.cont458, label %if.then.i2119
+
+if.then.i2119:                                    ; preds = %invoke.cont.i2118
+  br label %invoke.cont458
+
+lpad.i2120:                                       ; preds = %invoke.cont455
+  %tmp408 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont458:                                   ; preds = %if.then.i2119, %invoke.cont.i2118
+  %call461 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont460 unwind label %lpad381
+
+invoke.cont460:                                   ; preds = %invoke.cont458
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2124 unwind label %lpad.i2126
+
+invoke.cont.i2124:                                ; preds = %invoke.cont460
+  br i1 undef, label %invoke.cont466, label %if.then.i2125
+
+if.then.i2125:                                    ; preds = %invoke.cont.i2124
+  br label %invoke.cont466
+
+lpad.i2126:                                       ; preds = %invoke.cont460
+  %tmp414 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont466:                                   ; preds = %if.then.i2125, %invoke.cont.i2124
+  %call470 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont469 unwind label %lpad381
+
+invoke.cont469:                                   ; preds = %invoke.cont466
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2130 unwind label %lpad.i2132
+
+invoke.cont.i2130:                                ; preds = %invoke.cont469
+  br i1 undef, label %invoke.cont475, label %if.then.i2131
+
+if.then.i2131:                                    ; preds = %invoke.cont.i2130
+  br label %invoke.cont475
+
+lpad.i2132:                                       ; preds = %invoke.cont469
+  %tmp420 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont475:                                   ; preds = %if.then.i2131, %invoke.cont.i2130
+  %call491 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 1)
+          to label %invoke.cont490 unwind label %lpad489
+
+invoke.cont490:                                   ; preds = %invoke.cont475
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont499 unwind label %lpad498
+
+invoke.cont499:                                   ; preds = %invoke.cont490
+  %call504 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont503 unwind label %lpad489
+
+invoke.cont503:                                   ; preds = %invoke.cont499
+  %call507 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 3)
+          to label %invoke.cont506 unwind label %lpad505
+
+invoke.cont506:                                   ; preds = %invoke.cont503
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont509 unwind label %lpad508
+
+invoke.cont509:                                   ; preds = %invoke.cont506
+  %call513 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont512 unwind label %lpad489
+
+invoke.cont512:                                   ; preds = %invoke.cont509
+  br i1 undef, label %msgSend.null-receiver, label %msgSend.call
+
+msgSend.call:                                     ; preds = %invoke.cont512
+  invoke void bitcast (void (i8*, i8*, ...)* @objc_msgSend_stret to void (%struct.CGPoint*, i8*, i8*)*)(%struct.CGPoint* sret undef, i8* undef, i8* undef)
+          to label %msgSend.cont unwind label %lpad514
+
+msgSend.null-receiver:                            ; preds = %invoke.cont512
+  br label %msgSend.cont
+
+msgSend.cont:                                     ; preds = %msgSend.null-receiver, %msgSend.call
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2136 unwind label %lpad.i2138
+
+invoke.cont.i2136:                                ; preds = %msgSend.cont
+  br i1 undef, label %invoke.cont521, label %if.then.i2137
+
+if.then.i2137:                                    ; preds = %invoke.cont.i2136
+  br label %invoke.cont521
+
+lpad.i2138:                                       ; preds = %msgSend.cont
+  %tmp468 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont521:                                   ; preds = %if.then.i2137, %invoke.cont.i2136
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont528 unwind label %lpad527
+
+invoke.cont528:                                   ; preds = %invoke.cont521
+  %call532 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont531 unwind label %lpad489
+
+invoke.cont531:                                   ; preds = %invoke.cont528
+  %call535 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont534 unwind label %lpad533
+
+invoke.cont534:                                   ; preds = %invoke.cont531
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2142 unwind label %lpad.i2144
+
+invoke.cont.i2142:                                ; preds = %invoke.cont534
+  br i1 undef, label %invoke.cont540, label %if.then.i2143
+
+if.then.i2143:                                    ; preds = %invoke.cont.i2142
+  br label %invoke.cont540
+
+lpad.i2144:                                       ; preds = %invoke.cont534
+  %tmp486 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont540:                                   ; preds = %if.then.i2143, %invoke.cont.i2142
+  %call544 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i32 3)
+          to label %invoke.cont543 unwind label %lpad489
+
+invoke.cont543:                                   ; preds = %invoke.cont540
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont546 unwind label %lpad545
+
+invoke.cont546:                                   ; preds = %invoke.cont543
+  %call549 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont548 unwind label %lpad489
+
+invoke.cont548:                                   ; preds = %invoke.cont546
+  %call555 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont554 unwind label %lpad553
+
+invoke.cont554:                                   ; preds = %invoke.cont548
+  %tmp499 = call i8* @objc_retain(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*)) #3
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* %tmp499, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2148 unwind label %lpad.i2150
+
+invoke.cont.i2148:                                ; preds = %invoke.cont554
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont566 unwind label %lpad565
+
+lpad.i2150:                                       ; preds = %invoke.cont554
+  %tmp500 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  unreachable
+
+invoke.cont566:                                   ; preds = %invoke.cont.i2148
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont572 unwind label %lpad571
+
+invoke.cont572:                                   ; preds = %invoke.cont566
+  %call582 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont581 unwind label %lpad580
+
+invoke.cont581:                                   ; preds = %invoke.cont572
+  unreachable
+
+lpad156.body:                                     ; preds = %invoke.cont117
+  %tmp1157 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad164.body:                                     ; preds = %invoke.cont157
+  %tmp1158 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad183:                                          ; preds = %invoke.cont184, %invoke.cont165
+  %tmp1159 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+lpad183.body:                                     ; preds = %lpad183, %lpad.i2036
+  unreachable
+
+lpad196:                                          ; preds = %invoke.cont190
+  %tmp1160 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad200:                                          ; preds = %invoke.cont197
+  %tmp1161 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad203:                                          ; preds = %invoke.cont207, %invoke.cont201
+  %tmp1162 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad212.body:                                     ; preds = %invoke.cont208
+  %tmp1163 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad220:                                          ; preds = %invoke.cont213
+  %tmp1164 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad227:                                          ; preds = %invoke.cont231, %invoke.cont221
+  %tmp1166 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+lpad236.body:                                     ; preds = %invoke.cont232
+  %tmp1167 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+ehcleanup239:                                     ; preds = %lpad236.body, %lpad227
+  unreachable
+
+lpad244:                                          ; preds = %invoke.cont245, %invoke.cont237
+  %tmp1168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad249:                                          ; preds = %invoke.cont247
+  %tmp1169 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad252:                                          ; preds = %invoke.cont250
+  %tmp1170 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad255:                                          ; preds = %invoke.cont253
+  %tmp1171 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad258:                                          ; preds = %invoke.cont256
+  %tmp1172 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup263:                                     ; preds = %lpad255, %lpad252
+  unreachable
+
+lpad265:                                          ; preds = %invoke.cont259
+  %tmp1173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad273:                                          ; preds = %invoke.cont266
+  %tmp1175 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad277:                                          ; preds = %invoke.cont274
+  %tmp1176 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad289:                                          ; preds = %invoke.cont281
+  %tmp1177 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad301:                                          ; preds = %invoke.cont290
+  %tmp1180 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad308:                                          ; preds = %invoke.cont302
+  %tmp1182 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad311:                                          ; preds = %invoke.cont309
+  %tmp1183 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad314:                                          ; preds = %invoke.cont312
+  %tmp1184 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad320:                                          ; preds = %invoke.cont315
+  %tmp1186 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body.thread:                              ; preds = %land.rhs335
+  %tmp1188 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body:                                     ; preds = %land.end344
+  %tmp1189 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad360:                                          ; preds = %invoke.cont345
+  %tmp1191 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad363:                                          ; preds = %invoke.cont373, %invoke.cont361
+  %tmp1192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad369:                                          ; preds = %invoke.cont364
+  %tmp1194 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad381:                                          ; preds = %invoke.cont466, %invoke.cont458, %invoke.cont449, %invoke.cont.i2106, %invoke.cont432, %invoke.cont422, %invoke.cont418, %invoke.cont408, %invoke.cont405, %invoke.cont395, %invoke.cont392, %invoke.cont382, %invoke.cont376
+  %tmp1196 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad398:                                          ; preds = %invoke.cont396
+  %tmp1199 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad401:                                          ; preds = %invoke.cont399
+  %tmp1200 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad411:                                          ; preds = %invoke.cont409
+  %tmp1201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad425:                                          ; preds = %invoke.cont423
+  %tmp1203 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad428:                                          ; preds = %invoke.cont426
+  %tmp1204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad454:                                          ; preds = %invoke.cont452
+  %tmp1207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup477:                                     ; preds = %lpad425, %lpad381, %lpad.i2132, %lpad.i2126
+  unreachable
+
+lpad489:                                          ; preds = %invoke.cont546, %invoke.cont540, %invoke.cont528, %invoke.cont509, %invoke.cont499, %invoke.cont475
+  %tmp1211 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad498:                                          ; preds = %invoke.cont490
+  %tmp1214 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad505:                                          ; preds = %invoke.cont503
+  %tmp1215 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad508:                                          ; preds = %invoke.cont506
+  %tmp1216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad514:                                          ; preds = %msgSend.call
+  %tmp1217 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad527:                                          ; preds = %invoke.cont521
+  %tmp1219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad533:                                          ; preds = %invoke.cont531
+  %tmp1220 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad545:                                          ; preds = %invoke.cont543
+  %tmp1222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad553:                                          ; preds = %invoke.cont548
+  %tmp1224 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup560:                                     ; preds = %lpad527, %lpad489
+  br label %eh.resume
+
+lpad565:                                          ; preds = %invoke.cont.i2148
+  %tmp1225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad571:                                          ; preds = %invoke.cont566
+  %tmp1227 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad580:                                          ; preds = %invoke.cont572
+  %tmp1228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad580, %ehcleanup560, %lpad360, %lpad220
+  resume { i8*, i32 } undef
+}
+
+@"OBJC_EHTYPE_$_NSException" = external global i8
+
+define void @test4() {
+entry:
+  br i1 undef, label %if.end13, label %if.then10
+
+if.then10:                                        ; preds = %entry
+  br label %if.end13
+
+if.end13:                                         ; preds = %if.then10, %entry
+  %0 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i64, i8*, i8)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i64 2, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_2 to i8*), i8 signext 0), !clang.arc.no_objc_arc_exceptions !0
+  br i1 undef, label %if.then17, label %if.end18
+
+if.then17:                                        ; preds = %if.end13
+  br label %if.end18
+
+if.end18:                                         ; preds = %if.then17, %if.end13
+  br i1 undef, label %if.then64, label %if.end73
+
+if.then64:                                        ; preds = %if.end18
+  br i1 undef, label %cond.end71, label %cond.true68
+
+cond.true68:                                      ; preds = %if.then64
+  br label %cond.end71
+
+cond.end71:                                       ; preds = %cond.true68, %if.then64
+  br i1 undef, label %cleanup.action, label %cleanup.done
+
+cleanup.action:                                   ; preds = %cond.end71
+  br label %cleanup.done
+
+cleanup.done:                                     ; preds = %cleanup.action, %cond.end71
+  br label %if.end73
+
+if.end73:                                         ; preds = %cleanup.done, %if.end18
+  br i1 undef, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:                                 ; preds = %if.end73
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:                           ; preds = %forcoll.refetch, %forcoll.loopinit
+  br label %forcoll.loopbody
+
+forcoll.loopbody:                                 ; preds = %forcoll.notmutated, %forcoll.loopbody.outer
+  br i1 undef, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:                                  ; preds = %forcoll.loopbody
+  br label %forcoll.notmutated
+
+forcoll.notmutated:                               ; preds = %forcoll.mutated, %forcoll.loopbody
+  br i1 undef, label %forcoll.loopbody, label %forcoll.refetch
+
+forcoll.refetch:                                  ; preds = %forcoll.notmutated
+  br i1 undef, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:                                    ; preds = %forcoll.refetch, %if.end73
+  br i1 undef, label %if.end85, label %if.then82
+
+if.then82:                                        ; preds = %forcoll.empty
+  br label %if.end85
+
+if.end85:                                         ; preds = %if.then82, %forcoll.empty
+  br i1 undef, label %if.then87, label %if.end102
+
+if.then87:                                        ; preds = %if.end85
+  br i1 undef, label %if.end94, label %if.then91
+
+if.then91:                                        ; preds = %if.then87
+  br label %if.end94
+
+if.end94:                                         ; preds = %if.then91, %if.then87
+  br i1 undef, label %if.end101, label %if.then98
+
+if.then98:                                        ; preds = %if.end94
+  br label %if.end101
+
+if.end101:                                        ; preds = %if.then98, %if.end94
+  br label %if.end102
+
+if.end102:                                        ; preds = %if.end101, %if.end85
+  br i1 undef, label %do.body113, label %if.then107
+
+if.then107:                                       ; preds = %if.end102
+  br label %do.body113
+
+do.body113:                                       ; preds = %if.then107, %if.end102
+  br i1 undef, label %if.then116, label %if.end117
+
+if.then116:                                       ; preds = %do.body113
+  br label %if.end117
+
+if.end117:                                        ; preds = %if.then116, %do.body113
+  br i1 undef, label %if.then125, label %if.end126
+
+if.then125:                                       ; preds = %if.end117
+  br label %if.end126
+
+if.end126:                                        ; preds = %if.then125, %if.end117
+  br i1 undef, label %do.end166, label %cond.true132
+
+cond.true132:                                     ; preds = %if.end126
+  br i1 undef, label %do.body148, label %cond.true151
+
+do.body148:                                       ; preds = %cond.true132
+  br i1 undef, label %do.end166, label %cond.true151
+
+cond.true151:                                     ; preds = %do.body148, %cond.true132
+  br i1 undef, label %if.then162, label %do.end166
+
+if.then162:                                       ; preds = %cond.true151
+  br label %do.end166
+
+do.end166:                                        ; preds = %if.then162, %cond.true151, %do.body148, %if.end126
+  br i1 undef, label %if.then304, label %if.then170
+
+if.then170:                                       ; preds = %do.end166
+  br i1 undef, label %do.end193, label %cond.true179
+
+cond.true179:                                     ; preds = %if.then170
+  br i1 undef, label %if.then190, label %do.end193
+
+if.then190:                                       ; preds = %cond.true179
+  br label %do.end193
+
+do.end193:                                        ; preds = %if.then190, %cond.true179, %if.then170
+  br i1 undef, label %do.body200, label %do.body283
+
+do.body200:                                       ; preds = %do.end193
+  br i1 undef, label %do.end254, label %cond.true203
+
+cond.true203:                                     ; preds = %do.body200
+  br i1 undef, label %do.body218, label %cond.true221
+
+do.body218:                                       ; preds = %cond.true203
+  br i1 undef, label %do.end254, label %cond.true221
+
+cond.true221:                                     ; preds = %do.body218, %cond.true203
+  br i1 undef, label %if.then232, label %do.body236
+
+if.then232:                                       ; preds = %cond.true221
+  br label %do.body236
+
+do.body236:                                       ; preds = %if.then232, %cond.true221
+  br i1 undef, label %do.end254, label %cond.true239
+
+cond.true239:                                     ; preds = %do.body236
+  br i1 undef, label %if.then250, label %do.end254
+
+if.then250:                                       ; preds = %cond.true239
+  br label %do.end254
+
+do.end254:                                        ; preds = %if.then250, %cond.true239, %do.body236, %do.body218, %do.body200
+  br i1 undef, label %do.end277, label %cond.true263
+
+cond.true263:                                     ; preds = %do.end254
+  br i1 undef, label %if.then274, label %do.end277
+
+if.then274:                                       ; preds = %cond.true263
+  unreachable
+
+do.end277:                                        ; preds = %cond.true263, %do.end254
+  br i1 undef, label %if.then280, label %do.body283
+
+if.then280:                                       ; preds = %do.end277
+  br label %do.body283
+
+do.body283:                                       ; preds = %if.then280, %do.end277, %do.end193
+  br i1 undef, label %if.end301, label %cond.true286
+
+cond.true286:                                     ; preds = %do.body283
+  br i1 undef, label %if.then297, label %if.end301
+
+if.then297:                                       ; preds = %cond.true286
+  br label %if.end301
+
+if.end301:                                        ; preds = %if.then297, %cond.true286, %do.body283
+  br i1 undef, label %if.then304, label %do.body351
+
+if.then304:                                       ; preds = %if.end301, %do.end166
+  br i1 undef, label %do.body309.lr.ph, label %do.body351
+
+do.body309.lr.ph:                                 ; preds = %if.then304
+  br label %do.body309
+
+do.body309:                                       ; preds = %for.cond.backedge, %do.body309.lr.ph
+  br i1 undef, label %do.end328, label %cond.true312
+
+cond.true312:                                     ; preds = %do.body309
+  br i1 undef, label %if.then323, label %do.end328
+
+if.then323:                                       ; preds = %cond.true312
+  br label %do.end328
+
+do.end328:                                        ; preds = %if.then323, %cond.true312, %do.body309
+  br i1 undef, label %for.cond.backedge, label %cond.true335
+
+for.cond.backedge:                                ; preds = %if.then346, %cond.true335, %do.end328
+  br i1 undef, label %do.body309, label %do.body351
+
+cond.true335:                                     ; preds = %do.end328
+  br i1 undef, label %if.then346, label %for.cond.backedge
+
+if.then346:                                       ; preds = %cond.true335
+  br label %for.cond.backedge
+
+do.body351:                                       ; preds = %for.cond.backedge, %if.then304, %if.end301
+  br i1 undef, label %if.then354, label %if.end355
+
+if.then354:                                       ; preds = %do.body351
+  br label %if.end355
+
+if.end355:                                        ; preds = %if.then354, %do.body351
+  br i1 undef, label %if.else, label %if.then364
+
+if.then364:                                       ; preds = %if.end355
+  br label %do.body366
+
+if.else:                                          ; preds = %if.end355
+  br label %do.body366
+
+do.body366:                                       ; preds = %if.else, %if.then364
+  br i1 undef, label %if.then369, label %if.end377.critedge
+
+if.then369:                                       ; preds = %do.body366
+  br label %if.end377
+
+if.end377.critedge:                               ; preds = %do.body366
+  br label %if.end377
+
+if.end377:                                        ; preds = %if.end377.critedge, %if.then369
+  br i1 undef, label %if.then383, label %if.end392.critedge
+
+if.then383:                                       ; preds = %if.end377
+  br label %if.end392
+
+if.end392.critedge:                               ; preds = %if.end377
+  br label %if.end392
+
+if.end392:                                        ; preds = %if.end392.critedge, %if.then383
+  br i1 undef, label %if.then398, label %if.end399
+
+if.then398:                                       ; preds = %if.end392
+  br label %if.end399
+
+if.end399:                                        ; preds = %if.then398, %if.end392
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %eh.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+eh.cont:                                          ; preds = %if.end399
+  br i1 undef, label %if.then430, label %if.end439.critedge
+
+if.then430:                                       ; preds = %eh.cont
+  %1 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+lpad:                                             ; preds = %if.end399
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* @"OBJC_EHTYPE_$_NSException"
+  unreachable
+
+if.end439.critedge:                               ; preds = %eh.cont
+  %3 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+if.end439:                                        ; preds = %if.end439.critedge, %if.then430
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  unreachable
+
+return:                                           ; No predecessors!
+  ret void
+}
+
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
deleted file mode 100644
index 6b1578a..0000000
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-; rdar://10209613
-
-%0 = type opaque
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-@"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
-
-; CHECK-LABEL: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
-; CHECK: @objc_msgSend
-; CHECK-NEXT: @objc_release(i8* %3)
-define void @test(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test, but the objc_retainBlock has a clang.arc.copy_on_escape
-; tag so it's safe to delete.
-
-; CHECK-LABEL: define void @test_with_COE(
-; CHECK-NOT: @objc_retainBlock
-; CHECK: @objc_msgSend
-; CHECK: @objc_release
-; CHECK-NOT: @objc_release
-; CHECK: }
-define void @test_with_COE(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i8* @objc_retain(i8*)
-
-declare void @__test_block_invoke_0(i8* nocapture) uwtable
-
-declare i8* @objc_retainBlock(i8*)
-
-declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
-
-declare void @objc_release(i8*)
-
-; CHECK: attributes #0 = { uwtable }
-; CHECK: attributes #1 = { nonlazybind }
-; CHECK: attributes [[NUW]] = { nounwind }
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
deleted file mode 100644
index 7914bb8..0000000
--- a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
+++ /dev/null
@@ -1,215 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-declare i8* @objc_retain(i8*) nonlazybind
-declare void @objc_release(i8*) nonlazybind
-declare i8* @objc_retainBlock(i8*)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Use by an instruction which copies the value is an escape if the             ;
-; result is an escape. The current instructions with this property are:        ;
-;                                                                              ;
-; 1. BitCast.                                                                  ;
-; 2. GEP.                                                                      ;
-; 3. PhiNode.                                                                  ;
-; 4. SelectInst.                                                               ;
-;                                                                              ;
-; Make sure that such instructions do not confuse the optimizer into removing  ;
-; an objc_retainBlock that is needed.                                          ;
-;                                                                              ;
-; rdar://13273675. (With extra test cases to handle bitcast, phi, and select.  ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @bitcasttest(i8* %storage, void (...)* %block)  {
-; CHECK-LABEL: define void @bitcasttest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @bitcasttest_a(i8* %storage, void (...)* %block)  {
-; CHECK-LABEL: define void @bitcasttest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @geptest(void (...)** %storage_array, void (...)* %block)  {
-; CHECK-LABEL: define void @geptest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @geptest_a(void (...)** %storage_array, void (...)* %block)  {
-; CHECK-LABEL: define void @geptest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @selecttest(void (...)** %store1, void (...)** %store2,
-                        void (...)* %block) {
-; CHECK-LABEL: define void @selecttest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @selecttest_a(void (...)** %store1, void (...)** %store2,
-                          void (...)* %block) {
-; CHECK-LABEL: define void @selecttest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest(void (...)** %storage1,
-                         void (...)** %storage2,
-                         void (...)* %block) {
-; CHECK-LABEL: define void @phinodetest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-; CHECK: store1_set:
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-; CHECK: end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest_a(void (...)** %storage1,
-                           void (...)** %storage2,
-                           void (...)* %block) {
-; CHECK-LABEL: define void @phinodetest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This test makes sure that we do not hang clang when visiting a use ;
-; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
-; test case looks a little convoluted since it was produced by	     ;
-; bugpoint.							     ;
-; 								     ;
-; bugzilla://14551						     ;
-; rdar://12851911						     ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @phinode_use_cycle(i8* %block) uwtable optsize ssp {
-; CHECK: define void @phinode_use_cycle(i8* %block)
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %if.then, %for.body, %entry
-  %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ]
-  br i1 undef, label %for.body, label %if.then
-
-if.then:                                          ; preds = %for.body
-  %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0
-  %1 = bitcast i8* %0 to void (...)*
-  %2 = bitcast void (...)* %block.05 to i8*
-  call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
-  br label %for.body
-}
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
deleted file mode 100644
index a5170e3..0000000
--- a/test/Transforms/ObjCARC/retain-block-load.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-; rdar://10803830
-; The optimizer should be able to prove that the block does not
-; "escape", so the retainBlock+release pair can be eliminated.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
-
-; CHECK: define void @test() {
-; CHECK-NOT: @objc
-; CHECK: declare i8* @objc_retainBlock(i8*)
-; CHECK: declare void @objc_release(i8*)
-
-define void @test() {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
-  store i32 1073741824, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
-  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
-  store i32 4, i32* %block.captured, align 8
-  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
-  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
-  %tmp3 = bitcast i8* %tmp2 to i8**
-  %tmp4 = load i8** %tmp3, align 8
-  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
-  %call = call i32 %tmp5(i8* %tmp1)
-  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
-
-declare i8* @objc_retainBlock(i8*)
-
-declare void @objc_release(i8*)
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
deleted file mode 100644
index 1bb3f02..0000000
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64"
-
-!0 = metadata !{}
-
-declare i8* @objc_retain(i8*)
-declare void @callee(i8)
-declare void @use_pointer(i8*)
-declare void @objc_release(i8*)
-declare i8* @objc_retainBlock(i8*)
-declare i8* @objc_autorelease(i8*)
-
-; Basic retainBlock+release elimination.
-
-; CHECK: define void @test0(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but there's no copy_on_escape metadata, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_no_metadata(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but the pointer escapes, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0_escape, but there's no intervening call.
-
-; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_just_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Basic nested retainBlock+release elimination.
-
-; CHECK: define void @test1(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
-; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but there's no copy_on_escape metadata, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release.
-
-; CHECK: define void @test1_no_metadata(i8* %tmp) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_no_metadata(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but the pointer escapes, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release
-
-; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK-NEXT: store i8* %tmp2, i8** %z
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/PhaseOrdering/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/PruneEH/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Reassociate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Reg2Mem/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/SCCP/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
new file mode 100644
index 0000000..5fc35d8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM/memory.ll b/test/Transforms/SLPVectorizer/ARM/memory.ll
new file mode 100644
index 0000000..383c808
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
+; to do this scalar.
+
+; CHECK-LABEL: expensive_double_store
+; CHECK-NOT: load <2 x double>
+; CHECK-NOT: store <2 x double>
+define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
+entry:
+  %0 = load double* %src, align 8
+  store double %0, double* %dst, align 8
+  %arrayidx2 = getelementptr inbounds double* %src, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst, i64 1
+  store double %1, double* %arrayidx3, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll
new file mode 100644
index 0000000..e0c75b1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -mcpu=swift -mtriple=thumbv7-apple-ios -basicaa -slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%class.Complex = type { double, double }
+
+; Code like this is the result of SROA. Make sure we don't vectorize this
+; because the in the scalar version of this the shl/or are handled by the
+; backend and disappear, the vectorized code stays.
+
+; CHECK-LABEL: SROAed
+; CHECK-NOT: shl <2 x i64>
+; CHECK-NOT: or <2 x i64>
+
+define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
+entry:
+  %a.coerce.fca.0.extract = extractvalue [4 x i32] %a.coerce, 0
+  %a.sroa.0.0.insert.ext = zext i32 %a.coerce.fca.0.extract to i64
+  %a.coerce.fca.1.extract = extractvalue [4 x i32] %a.coerce, 1
+  %a.sroa.0.4.insert.ext = zext i32 %a.coerce.fca.1.extract to i64
+  %a.sroa.0.4.insert.shift = shl nuw i64 %a.sroa.0.4.insert.ext, 32
+  %a.sroa.0.4.insert.insert = or i64 %a.sroa.0.4.insert.shift, %a.sroa.0.0.insert.ext
+  %0 = bitcast i64 %a.sroa.0.4.insert.insert to double
+  %a.coerce.fca.2.extract = extractvalue [4 x i32] %a.coerce, 2
+  %a.sroa.3.8.insert.ext = zext i32 %a.coerce.fca.2.extract to i64
+  %a.coerce.fca.3.extract = extractvalue [4 x i32] %a.coerce, 3
+  %a.sroa.3.12.insert.ext = zext i32 %a.coerce.fca.3.extract to i64
+  %a.sroa.3.12.insert.shift = shl nuw i64 %a.sroa.3.12.insert.ext, 32
+  %a.sroa.3.12.insert.insert = or i64 %a.sroa.3.12.insert.shift, %a.sroa.3.8.insert.ext
+  %1 = bitcast i64 %a.sroa.3.12.insert.insert to double
+  %b.coerce.fca.0.extract = extractvalue [4 x i32] %b.coerce, 0
+  %b.sroa.0.0.insert.ext = zext i32 %b.coerce.fca.0.extract to i64
+  %b.coerce.fca.1.extract = extractvalue [4 x i32] %b.coerce, 1
+  %b.sroa.0.4.insert.ext = zext i32 %b.coerce.fca.1.extract to i64
+  %b.sroa.0.4.insert.shift = shl nuw i64 %b.sroa.0.4.insert.ext, 32
+  %b.sroa.0.4.insert.insert = or i64 %b.sroa.0.4.insert.shift, %b.sroa.0.0.insert.ext
+  %2 = bitcast i64 %b.sroa.0.4.insert.insert to double
+  %b.coerce.fca.2.extract = extractvalue [4 x i32] %b.coerce, 2
+  %b.sroa.3.8.insert.ext = zext i32 %b.coerce.fca.2.extract to i64
+  %b.coerce.fca.3.extract = extractvalue [4 x i32] %b.coerce, 3
+  %b.sroa.3.12.insert.ext = zext i32 %b.coerce.fca.3.extract to i64
+  %b.sroa.3.12.insert.shift = shl nuw i64 %b.sroa.3.12.insert.ext, 32
+  %b.sroa.3.12.insert.insert = or i64 %b.sroa.3.12.insert.shift, %b.sroa.3.8.insert.ext
+  %3 = bitcast i64 %b.sroa.3.12.insert.insert to double
+  %add = fadd double %0, %2
+  %add3 = fadd double %1, %3
+  %re.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 0
+  store double %add, double* %re.i.i, align 4
+  %im.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 1
+  store double %add3, double* %im.i.i, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
new file mode 100644
index 0000000..9e0ab99
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'R600' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/R600/simplebb.ll b/test/Transforms/SLPVectorizer/R600/simplebb.ll
new file mode 100644
index 0000000..b6d794b
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/simplebb.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+; Simple 3-pair chain with loads and stores
+define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_3_3_3(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double addrspace(3)* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double addrspace(3)* %b, i64 1
+  %i4 = load double addrspace(3)* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1_as_3_0_0(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_0_0_3(
+; CHECK: load <2 x double>*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 931195e..25c6545 100644
--- a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -60,9 +60,9 @@ if.end332:                                        ; preds = %if.then329, %if.end
   %sub334 = fsub float %add294, %dx272.1
   %sub338 = fsub float %add297, %dy276.1
   %arrayidx.i.i606 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 0
-  store float %sub334, float* %arrayidx.i.i606, align 4, !tbaa !0
+  store float %sub334, float* %arrayidx.i.i606, align 4
   %arrayidx3.i607 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 1
-  store float %sub338, float* %arrayidx3.i607, align 4, !tbaa !0
+  store float %sub338, float* %arrayidx3.i607, align 4
   br label %return
 
 return:                                           ; preds = %if.end332, %for.end271, %entry
@@ -82,7 +82,3 @@ if.end22.2:                                       ; preds = %if.then17.2, %if.en
 }
 
 attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
new file mode 100644
index 0000000..8da3c34
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.DState = type { i32, i32 }
+
+@b = common global %struct.DState zeroinitializer, align 4
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@e = common global i32 0, align 4
+
+define i32 @fn1() {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  %1 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  %2 = load i32* @d, align 4
+  %cond = icmp eq i32 %2, 0
+  br i1 %cond, label %sw.bb, label %save_state_and_return
+
+sw.bb:                                            ; preds = %entry
+  %3 = load i32* @c, align 4
+  %and = and i32 %3, 7
+  store i32 %and, i32* @a, align 4
+  switch i32 %and, label %if.end [
+    i32 7, label %save_state_and_return
+    i32 0, label %save_state_and_return
+  ]
+
+if.end:                                           ; preds = %sw.bb
+  br label %save_state_and_return
+
+save_state_and_return:                            ; preds = %sw.bb, %sw.bb, %if.end, %entry
+  %t.0 = phi i32 [ 0, %if.end ], [ %0, %entry ], [ %0, %sw.bb ], [ %0, %sw.bb ]
+  %f.0 = phi i32 [ 0, %if.end ], [ %1, %entry ], [ 0, %sw.bb ], [ 0, %sw.bb ]
+  store i32 %t.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  store i32 %f.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index b408913..f4e68f2 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -13,13 +13,13 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; }
 
 ;CHECK: @depth
-;CHECK: getelementptr inbounds {{.*}}, !dbg !24
-;CHECK: bitcast double* {{.*}}, !dbg !24
-;CHECK: load <2 x double>* {{.*}}, !dbg !24
-;CHECK: store <2 x double> {{.*}}, !dbg !26
+;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
+;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
+;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
+;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
 ;CHECK: ret
-;CHECK: !24 = metadata !{i32 4, i32 0,
-;CHECK: !26 = metadata !{i32 7, i32 0,
+;CHECK: ![[LOC]] = metadata !{i32 4, i32 0,
+;CHECK: ![[LOC2]] = metadata !{i32 7, i32 0,
 
 define i32 @depth(double* nocapture %A, i32 %m) #0 {
 entry:
@@ -33,18 +33,18 @@ entry:
 
 for.body.lr.ph:                                   ; preds = %entry
   %arrayidx = getelementptr inbounds double* %A, i64 4, !dbg !24
-  %0 = load double* %arrayidx, align 8, !dbg !24, !tbaa !26
+  %0 = load double* %arrayidx, align 8, !dbg !24
   %arrayidx1 = getelementptr inbounds double* %A, i64 5, !dbg !29
-  %1 = load double* %arrayidx1, align 8, !dbg !29, !tbaa !26
+  %1 = load double* %arrayidx1, align 8, !dbg !29
   br label %for.end, !dbg !23
 
 for.end:                                          ; preds = %for.body.lr.ph, %entry
   %y1.0.lcssa = phi double [ %1, %for.body.lr.ph ], [ 1.000000e+00, %entry ]
   %y0.0.lcssa = phi double [ %0, %for.body.lr.ph ], [ 0.000000e+00, %entry ]
   %arrayidx2 = getelementptr inbounds double* %A, i64 8, !dbg !30
-  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30, !tbaa !26
+  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30
   %arrayidx3 = getelementptr inbounds double* %A, i64 9, !dbg !30
-  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30, !tbaa !26
+  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30
   ret i32 undef, !dbg !31
 }
 
@@ -55,7 +55,7 @@ attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-po
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
+!llvm.module.flags = !{!18, !32}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"file.c", metadata !"/Users/nadav"}
@@ -63,7 +63,7 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"depth", metadata !"depth", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (double*, i32)* @depth, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !9, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
@@ -83,9 +83,7 @@ attributes #1 = { nounwind readnone }
 !23 = metadata !{i32 3, i32 0, metadata !17, null}
 !24 = metadata !{i32 4, i32 0, metadata !25, null}
 !25 = metadata !{i32 786443, metadata !1, metadata !17, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
-!26 = metadata !{metadata !"double", metadata !27}
-!27 = metadata !{metadata !"omnipotent char", metadata !28}
-!28 = metadata !{metadata !"Simple C/C++ TBAA"}
 !29 = metadata !{i32 5, i32 0, metadata !25, null}
 !30 = metadata !{i32 7, i32 0, metadata !4, null}
 !31 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll
index 22f0e64..6d09aa6 100644
--- a/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -59,3 +59,38 @@ for.end:                                          ; preds = %for.body
   ret double %mul3
 }
 
+; A need-to-gather entry cannot be an external use of the scalar element.
+; Instead the insertelement instructions of the need-to-gather entry are the
+; external users.
+; This test would assert because we would keep the scalar fpext and fadd alive.
+; PR18129
+
+; CHECK-LABEL: needtogather
+define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
+                i32 * noalias %d) {
+entry:
+  %0 = load i32* %d, align 4
+  %conv = sitofp i32 %0 to float
+  %1 = load float* %c
+  %sub = fsub float 0.000000e+00, %1
+  %mul = fmul float %sub, 0.000000e+00
+  %add = fadd float %conv, %mul
+  %conv1 = fpext float %add to double
+  %sub3 = fsub float 1.000000e+00, %1
+  %mul4 = fmul float %sub3, 0.000000e+00
+  %add5 = fadd float %conv, %mul4
+  %conv6 = fpext float %add5 to double
+  %tobool = fcmp une float %add, 0.000000e+00
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  br label %if.end
+
+if.end:
+  %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
+  %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
+  store double %storemerge, double* %a, align 8
+  %conv7 = fptosi double %e.0 to i32
+  store i32 %conv7, i32* %b, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
new file mode 100644
index 0000000..8f91951
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -0,0 +1,417 @@
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; #include <stdint.h>
+;
+; int foo(float *A, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += 7*A[i*4  ] +
+;            7*A[i*4+1] +
+;            7*A[i*4+2] +
+;            7*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; NOSTORE-LABEL: add_red
+; NOSTORE: fmul <4 x float>
+; NOSTORE: shufflevector <4 x float>
+
+define i32 @add_red(float* %A, i32 %n) {
+entry:
+  %cmp31 = icmp sgt i32 %n, 0
+  br i1 %cmp31, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
+  %mul = shl nsw i64 %i.033, 2
+  %arrayidx = getelementptr inbounds float* %A, i64 %mul
+  %1 = load float* %arrayidx, align 4
+  %mul2 = fmul float %1, 7.000000e+00
+  %add28 = or i64 %mul, 1
+  %arrayidx4 = getelementptr inbounds float* %A, i64 %add28
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul float %2, 7.000000e+00
+  %add6 = fadd fast float %mul2, %mul5
+  %add829 = or i64 %mul, 2
+  %arrayidx9 = getelementptr inbounds float* %A, i64 %add829
+  %3 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %3, 7.000000e+00
+  %add11 = fadd fast float %add6, %mul10
+  %add1330 = or i64 %mul, 3
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %add1330
+  %4 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %4, 7.000000e+00
+  %add16 = fadd fast float %add11, %mul15
+  %add17 = fadd fast float %sum.032, %add16
+  %inc = add nsw i64 %i.033, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add17 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum *= B[0]*A[i*4  ] +
+;       B[1]*A[i*4+1] +
+;       B[2]*A[i*4+2] +
+;       B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: mul_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp38 = icmp sgt i32 %n, 0
+  br i1 %cmp38, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
+  %mul = shl nsw i64 %i.040, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul float %0, %5
+  %add35 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add35
+  %6 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %1, %6
+  %add8 = fadd fast float %mul3, %mul7
+  %add1136 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1136
+  %7 = load float* %arrayidx12, align 4
+  %mul13 = fmul float %2, %7
+  %add14 = fadd fast float %add8, %mul13
+  %add1737 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1737
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul float %3, %8
+  %add20 = fadd fast float %add14, %mul19
+  %mul21 = fmul float %sum.039, %add20
+  %inc = add nsw i64 %i.040, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %mul21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*6  ] +
+;            B[1]*A[i*6+1] +
+;            B[2]*A[i*6+2] +
+;            B[3]*A[i*6+3] +
+;            B[4]*A[i*6+4] +
+;            B[5]*A[i*6+5] +
+;            B[6]*A[i*6+6] +
+;            B[7]*A[i*6+7] +
+;            B[8]*A[i*6+8];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: long_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp81 = icmp sgt i32 %n, 0
+  br i1 %cmp81, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %arrayidx21 = getelementptr inbounds float* %B, i64 4
+  %4 = load float* %arrayidx21, align 4
+  %arrayidx27 = getelementptr inbounds float* %B, i64 5
+  %5 = load float* %arrayidx27, align 4
+  %arrayidx33 = getelementptr inbounds float* %B, i64 6
+  %6 = load float* %arrayidx33, align 4
+  %arrayidx39 = getelementptr inbounds float* %B, i64 7
+  %7 = load float* %arrayidx39, align 4
+  %arrayidx45 = getelementptr inbounds float* %B, i64 8
+  %8 = load float* %arrayidx45, align 4
+  %9 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
+  %mul = mul nsw i64 %i.083, 6
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %10 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %10
+  %add80 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add80
+  %11 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %1, %11
+  %add8 = fadd fast float %mul3, %mul7
+  %add11 = add nsw i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add11
+  %12 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %2, %12
+  %add14 = fadd fast float %add8, %mul13
+  %add17 = add nsw i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add17
+  %13 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %3, %13
+  %add20 = fadd fast float %add14, %mul19
+  %add23 = add nsw i64 %mul, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %add23
+  %14 = load float* %arrayidx24, align 4
+  %mul25 = fmul fast float %4, %14
+  %add26 = fadd fast float %add20, %mul25
+  %add29 = add nsw i64 %mul, 5
+  %arrayidx30 = getelementptr inbounds float* %A, i64 %add29
+  %15 = load float* %arrayidx30, align 4
+  %mul31 = fmul fast float %5, %15
+  %add32 = fadd fast float %add26, %mul31
+  %add35 = add nsw i64 %mul, 6
+  %arrayidx36 = getelementptr inbounds float* %A, i64 %add35
+  %16 = load float* %arrayidx36, align 4
+  %mul37 = fmul fast float %6, %16
+  %add38 = fadd fast float %add32, %mul37
+  %add41 = add nsw i64 %mul, 7
+  %arrayidx42 = getelementptr inbounds float* %A, i64 %add41
+  %17 = load float* %arrayidx42, align 4
+  %mul43 = fmul fast float %7, %17
+  %add44 = fadd fast float %add38, %mul43
+  %add47 = add nsw i64 %mul, 8
+  %arrayidx48 = getelementptr inbounds float* %A, i64 %add47
+  %18 = load float* %arrayidx48, align 4
+  %mul49 = fmul fast float %8, %18
+  %add50 = fadd fast float %add44, %mul49
+  %add51 = fadd fast float %sum.082, %add50
+  %inc = add nsw i64 %i.083, 1
+  %exitcond = icmp eq i64 %inc, %9
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add51 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*4  ];
+;     sum += B[1]*A[i*4+1];
+;     sum += B[2]*A[i*4+2];
+;     sum += B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: chain_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp41 = icmp sgt i32 %n, 0
+  br i1 %cmp41, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx10 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx10, align 4
+  %arrayidx16 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx16, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
+  %mul = shl nsw i64 %i.043, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %5
+  %add = fadd fast float %sum.042, %mul3
+  %add638 = or i64 %mul, 1
+  %arrayidx7 = getelementptr inbounds float* %A, i64 %add638
+  %6 = load float* %arrayidx7, align 4
+  %mul8 = fmul fast float %1, %6
+  %add9 = fadd fast float %add, %mul8
+  %add1239 = or i64 %mul, 2
+  %arrayidx13 = getelementptr inbounds float* %A, i64 %add1239
+  %7 = load float* %arrayidx13, align 4
+  %mul14 = fmul fast float %2, %7
+  %add15 = fadd fast float %add9, %mul14
+  %add1840 = or i64 %mul, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %add1840
+  %8 = load float* %arrayidx19, align 4
+  %mul20 = fmul fast float %3, %8
+  %add21 = fadd fast float %add15, %mul20
+  %inc = add nsw i64 %i.043, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, float * restrict C, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] +
+;          B[1] *A[i*4+1] +
+;          B[2] *A[i*4+2] +
+;          B[3] *A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: store_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
+entry:
+  %cmp37 = icmp sgt i32 %n, 0
+  br i1 %cmp37, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %1 = load float* %B, align 4
+  %mul = shl nsw i64 %i.039, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %2 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %1, %2
+  %3 = load float* %arrayidx4, align 4
+  %add34 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add34
+  %4 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %3, %4
+  %add8 = fadd fast float %mul3, %mul7
+  %5 = load float* %arrayidx9, align 4
+  %add1135 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1135
+  %6 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %5, %6
+  %add14 = fadd fast float %add8, %mul13
+  %7 = load float* %arrayidx15, align 4
+  %add1736 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1736
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %7, %8
+  %add20 = fadd fast float %add14, %mul19
+  store float %add20, float* %C.addr.038, align 4
+  %incdec.ptr = getelementptr inbounds float* %C.addr.038, i64 1
+  %inc = add nsw i64 %i.039, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
+
+; void foo(double * restrict A, double * restrict B, double * restrict C,
+;          int n) {
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] + B[1] *A[i*4+1];
+;   }
+; }
+
+; STORE-LABEL: store_red_double
+; STORE: fmul <2 x double>
+; STORE: extractelement <2 x double>
+; STORE: extractelement <2 x double>
+
+define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
+entry:
+  %cmp17 = icmp sgt i32 %n, 0
+  br i1 %cmp17, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load double* %B, align 8
+  %arrayidx4 = getelementptr inbounds double* %B, i64 1
+  %1 = load double* %arrayidx4, align 8
+  %2 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = shl nsw i64 %i.018, 2
+  %arrayidx2 = getelementptr inbounds double* %A, i64 %mul
+  %3 = load double* %arrayidx2, align 8
+  %mul3 = fmul fast double %0, %3
+  %add16 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %add16
+  %4 = load double* %arrayidx6, align 8
+  %mul7 = fmul fast double %1, %4
+  %add8 = fadd fast double %mul3, %mul7
+  %arrayidx9 = getelementptr inbounds double* %C, i64 %i.018
+  store double %add8, double* %arrayidx9, align 8
+  %inc = add nsw i64 %i.018, 1
+  %exitcond = icmp eq i64 %inc, %2
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
new file mode 100644
index 0000000..43f7aed
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -0,0 +1,197 @@
+; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
+
+target triple = "x86_64-apple-macosx10.8.0"
+
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Insert in an order different from the vector indices to make sure it
+; doesn't matter
+define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_insert_out_of_order(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 2
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 0
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+declare void @v4f32_user(<4 x float>) #0
+declare void @f32_user(float) #0
+
+; Multiple users of the final constructed vector
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_users(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  call void @v4f32_user(<4 x float> %rd) #0
+  ret <4 x float> %rd
+}
+
+; Unused insertelement
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_no_users(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> undef, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Make sure infinite loop doesn't happen which I ran into when trying
+; to do this backwards this backwards
+define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
+; CHECK-LABEL: @reconstruct(
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %ra = insertelement <4 x i32> undef, i32 %c0, i32 0
+  %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
+  %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
+  %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
+  ret <4 x i32> %rd
+}
+
+define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_v2(
+; CHECK: icmp ne <2 x i32>
+; CHECK: select <2 x i1>
+  %c0 = extractelement <2 x i32> %c, i32 0
+  %c1 = extractelement <2 x i32> %c, i32 1
+  %a0 = extractelement <2 x float> %a, i32 0
+  %a1 = extractelement <2 x float> %a, i32 1
+  %b0 = extractelement <2 x float> %b, i32 0
+  %b1 = extractelement <2 x float> %b, i32 1
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %ra = insertelement <2 x float> undef, float %s0, i32 0
+  %rb = insertelement <2 x float> %ra, float %s1, i32 1
+  ret <2 x float> %rb
+}
+
+; Make sure when we construct partial vectors, we don't keep
+; re-visiting the insertelement chains starting with undef
+; (low cost threshold needed to force this to happen)
+define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %1 = insertelement <2 x i32> undef, i32 %c0, i32 0
+  %2 = insertelement <2 x i32> %1, i32 %c1, i32 1
+  %3 = icmp ne <2 x i32> %2, zeroinitializer
+  %4 = insertelement <2 x float> undef, float %a0, i32 0
+  %5 = insertelement <2 x float> %4, float %a1, i32 1
+  %6 = insertelement <2 x float> undef, float %b0, i32 0
+  %7 = insertelement <2 x float> %6, float %b1, i32 1
+  %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
+  %9 = extractelement <2 x float> %8, i32 0
+  %ra = insertelement <4 x float> undef, float %9, i32 0
+  %10 = extractelement <2 x float> %8, i32 1
+  %rb = insertelement <4 x float> %ra, float %10, i32 1
+  ret <4 x float> %rb
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/SLPVectorizer/X86/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll
new file mode 100644
index 0000000..c5322a8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -0,0 +1,234 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+
+
+; Make sure we order the operands of commutative operations so that we get
+; bigger vectorizable trees.
+
+; CHECK-LABEL: shuffle_operands1
+; CHECK:         load <2 x double>
+; CHECK:         fadd <2 x double>
+
+define void @shuffle_operands1(double * noalias %from, double * noalias %to,
+                               double %v1, double %v2) {
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v1
+  %v1_2 = fadd double %v2, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %p
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast2
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast2(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_2, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast3
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast3(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast4
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast4(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_2, %v0_1
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast5
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast5(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast6
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast6(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %v0_1, %p
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; Make sure we don't scramble operands when we reorder them and destroy
+; 'good' source order.
+
+; CHECK-LABEL: good_load_order
+
+; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
+; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
+; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
+
+@a = common global [32000 x float] zeroinitializer, align 16
+
+define void @good_load_order() {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
+  br label %for.body3
+
+for.body3:
+  %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %2
+  %3 = load float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %mul6 = fmul float %3, %1
+  store float %mul6, float* %arrayidx5, align 4
+  %4 = add nsw i64 %indvars.iv, 2
+  %arrayidx11 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %4
+  %5 = load float* %arrayidx11, align 4
+  %mul15 = fmul float %5, %3
+  store float %mul15, float* %arrayidx, align 4
+  %6 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %6
+  %7 = load float* %arrayidx21, align 4
+  %mul25 = fmul float %7, %5
+  store float %mul25, float* %arrayidx11, align 4
+  %8 = add nsw i64 %indvars.iv, 4
+  %arrayidx31 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %8
+  %9 = load float* %arrayidx31, align 4
+  %mul35 = fmul float %9, %7
+  store float %mul35, float* %arrayidx21, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %arrayidx41 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.next
+  %10 = load float* %arrayidx41, align 4
+  %mul45 = fmul float %10, %9
+  store float %mul45, float* %arrayidx31, align 4
+  %11 = trunc i64 %indvars.iv.next to i32
+  %cmp2 = icmp slt i32 %11, 31995
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/opt.ll b/test/Transforms/SLPVectorizer/X86/opt.ll
new file mode 100644
index 0000000..14137c1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
+; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable slp vectorization in opt.
+
+; SLP-LABEL: test1
+; SLP: store <2 x double>
+
+; NOSLP-LABEL: test1
+; NOSLP-NOT: store <2 x double>
+
+
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/ordering.ll b/test/Transforms/SLPVectorizer/X86/ordering.ll
index 588e115..d2ecd45 100644
--- a/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -17,3 +17,65 @@ entry:
   %cmp11 = fcmp olt double %add, 0.000000e+00
   ret void
 }
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+declare i32 @personality_v0(...)
+
+define void @invoketest() {
+entry:
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:
+  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.true54 unwind label %lpad
+
+cond.false:
+  %call51 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.false57 unwind label %lpad
+
+cond.true54:
+  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.end60 unwind label %lpad
+
+cond.false57:
+  %call59 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.end60 unwind label %lpad
+
+; Make sure we don't vectorize these phis - they have invokes as inputs.
+
+; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+; CHECK-LABEL: invoketest
+
+; CHECK-LABEL: cond.end60
+; CHECK-NEXT-NOT: phi <2 x double>
+; CHECK: insertelement
+; CHECK-LABEL: if.then63
+
+cond.end60:
+  %cond126 = phi double [ %call49, %cond.true54 ], [ %call51, %cond.false57 ]
+  %cond61 = phi double [ %call56, %cond.true54 ], [ %call59, %cond.false57 ]
+  br i1 undef, label %if.end98, label %if.then63
+
+if.then63:
+  %conv69 = fptrunc double undef to float
+  %conv70 = fpext float %conv69 to double
+  %div71 = fdiv double %cond126, %conv70
+  %conv78 = fptrunc double undef to float
+  %conv79 = fpext float %conv78 to double
+  %div80 = fdiv double %cond61, %conv79
+  br label %if.end98
+
+lpad:
+  %l = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %l
+
+if.end98:
+  %dimensionsResult.sroa.0.0 = phi double [ %div71, %if.then63 ], [ %cond126, %cond.end60 ]
+  %dimensionsResult.sroa.6.0 = phi double [ %div80, %if.then63 ], [ %cond61, %cond.end60 ]
+  br label %if.end99
+
+if.end99:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll
index 1c7f9cc..964e0e4 100644
--- a/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
@@ -95,3 +95,154 @@ for.end:                                          ; preds = %for.body
   ret i32 0
 }
 
+; float foo3(float *A) {
+;
+;   float R = A[0];
+;   float G = A[1];
+;   float B = A[2];
+;   float Y = A[3];
+;   float P = A[4];
+;   for (int i=0; i < 121; i+=3) {
+;     R+=A[i+0]*7;
+;     G+=A[i+1]*8;
+;     B+=A[i+2]*9;
+;     Y+=A[i+3]*10;
+;     P+=A[i+4]*11;
+;   }
+;
+;   return R+G+B+Y+P;
+; }
+
+;CHECK: foo3
+;CHECK: phi <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK-NOT: phi <5 x float>
+;CHECK-NOT: fmul <5 x float>
+;CHECK-NOT: fadd <5 x float>
+
+define float @foo3(float* nocapture readonly %A) #0 {
+entry:
+  %0 = load float* %A, align 4
+  %arrayidx1 = getelementptr inbounds float* %A, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %A, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %A, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %A, i64 4
+  %4 = load float* %arrayidx4, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ]
+  %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ]
+  %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ]
+  %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ]
+  %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ]
+  %5 = phi float [ %1, %entry ], [ %11, %for.body ]
+  %6 = phi float [ %0, %entry ], [ %9, %for.body ]
+  %mul = fmul float %6, 7.000000e+00
+  %add6 = fadd float %R.052, %mul
+  %mul10 = fmul float %5, 8.000000e+00
+  %add11 = fadd float %G.053, %mul10
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %7
+  %8 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %8, 9.000000e+00
+  %add16 = fadd float %B.054, %mul15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %indvars.iv.next
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, 1.000000e+01
+  %add21 = fadd float %Y.055, %mul20
+  %10 = add nsw i64 %indvars.iv, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %10
+  %11 = load float* %arrayidx24, align 4
+  %mul25 = fmul float %11, 1.100000e+01
+  %add26 = fadd float %P.056, %mul25
+  %12 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %12, 121
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float %add6, %add11
+  %add29 = fadd float %add28, %add16
+  %add30 = fadd float %add29, %add21
+  %add31 = fadd float %add30, %add26
+  ret float %add31
+}
+
+; Make sure the order of phi nodes of different types does not prevent
+; vectorization of same typed phi nodes.
+; CHECK-LABEL: sort_phi_type
+; CHECK: phi <4 x float>
+; CHECK: fmul <4 x float>
+
+define float @sort_phi_type(float* nocapture readonly %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ]
+  %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ]
+  %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ]
+  %mul10 = fmul float %Y, 8.000000e+00
+  %mul15 = fmul float %B, 9.000000e+00
+  %mul20 = fmul float %R, 10.000000e+01
+  %mul25 = fmul float %G, 11.100000e+01
+  %indvars.iv.next = add nsw i64 %indvars.iv, 4
+  %cmp = icmp slt i64 %indvars.iv.next, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float 1.000000e+01, %mul10
+  %add29 = fadd float %mul10, %mul15
+  %add30 = fadd float %add29, %mul20
+  %add31 = fadd float %add30, %mul25
+  ret float %add31
+}
+
+define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+;
+; We disable the vectorization of x86_fp80 for now. 
+
+entry:
+  %i1.0 = load x86_fp80* %i1, align 16
+  %i1.gep1 = getelementptr x86_fp80* %i1, i64 1
+  %i1.1 = load x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK_NOT: insertelement <2 x x86_fp80>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds x86_fp80* %i2, i64 0
+  %i2.0 = load x86_fp80* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds x86_fp80* %i2, i64 1
+  %i2.1 = load x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK-NOT: insertelement <2 x x86_fp80>
+  br label %end
+
+end:
+  %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK-NOT: phi <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+  store x86_fp80 %phi0, x86_fp80* %o, align 16
+  %o.gep1 = getelementptr inbounds x86_fp80* %o, i64 1
+  store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
new file mode 100644
index 0000000..6d2d5e3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -disable-output
+
+target datalayout = "f64:64:64-v64:64:64"
+
+define void @test_phi_in_landingpad() {
+entry:
+  invoke void @foo()
+          to label %inner unwind label %lpad
+
+inner:
+  %x0 = fsub double undef, undef
+  %y0 = fsub double undef, undef
+  invoke void @foo()
+          to label %done unwind label %lpad
+
+lpad:
+  %x1 = phi double [ undef, %entry ], [ undef, %inner ]
+  %y1 = phi double [ undef, %entry ], [ undef, %inner ]
+  landingpad { i8*, i32 } personality i8*
+          bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+  br label %done
+
+done:
+  phi double [ %x0, %inner ], [ %x1, %lpad ]
+  phi double [ %y0, %inner ], [ %y1, %lpad ]
+  ret void
+}
+
+declare void @foo()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
new file mode 100644
index 0000000..520e672
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+; We purposely over-align f64 to 128bit here. 
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:128:128-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+
+define void @test(double* %i1, double* %i2, double* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+
+entry:
+  %i1.0 = load double* %i1, align 16
+  %i1.gep1 = getelementptr double* %i1, i64 1
+  %i1.1 = load double* %i1.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds double* %i2, i64 0
+  %i2.0 = load double* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds double* %i2, i64 1
+  %i2.1 = load double* %i2.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br label %end
+
+end:
+  %phi0 = phi double [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi double [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK: phi <2 x double>
+; CHECK: extractelement <2 x double>
+; CHECK: extractelement <2 x double>
+  store double %phi0, double* %o, align 16
+  %o.gep1 = getelementptr inbounds double* %o, i64 1
+  store double %phi1, double* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/pr16899.ll b/test/Transforms/SLPVectorizer/X86/pr16899.ll
new file mode 100644
index 0000000..8631bc9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386--netbsd"
+
+@a = common global i32* null, align 4
+
+; Function Attrs: noreturn nounwind readonly
+define i32 @fn1() #0 {
+entry:
+  %0 = load i32** @a, align 4, !tbaa !4
+  %1 = load i32* %0, align 4, !tbaa !5
+  %arrayidx1 = getelementptr inbounds i32* %0, i32 1
+  %2 = load i32* %arrayidx1, align 4, !tbaa !5
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %c.0 = phi i32 [ %2, %entry ], [ %add2, %do.body ]
+  %b.0 = phi i32 [ %1, %entry ], [ %add, %do.body ]
+  %add = add nsw i32 %b.0, %c.0
+  %add2 = add nsw i32 %add, 1
+  br label %do.body
+}
+
+attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/pr18060.ll b/test/Transforms/SLPVectorizer/X86/pr18060.ll
new file mode 100644
index 0000000..e6813f3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr18060.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux"
+
+; Function Attrs: nounwind
+define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
+entry:
+  %extract.t = trunc i64 %Value to i32
+  %extract = lshr i64 %Value, 12
+  %extract.t6 = trunc i64 %extract to i32
+  switch i32 %Kind, label %sw.default [
+    i32 0, label %return
+    i32 1, label %return
+    i32 129, label %sw.bb1
+    i32 130, label %sw.bb2
+  ]
+
+sw.default:                                       ; preds = %entry
+  call void @_Z25llvm_unreachable_internalv()
+  unreachable
+
+sw.bb1:                                           ; preds = %entry
+  %shr = lshr i64 %Value, 16
+  %extract.t5 = trunc i64 %shr to i32
+  %extract7 = lshr i64 %Value, 28
+  %extract.t8 = trunc i64 %extract7 to i32
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %sw.bb1, %entry
+  %Value.addr.0.off0 = phi i32 [ %extract.t, %entry ], [ %extract.t5, %sw.bb1 ]
+  %Value.addr.0.off12 = phi i32 [ %extract.t6, %entry ], [ %extract.t8, %sw.bb1 ]
+  %conv6 = and i32 %Value.addr.0.off0, 4095
+  %conv4 = shl i32 %Value.addr.0.off12, 16
+  %shl = and i32 %conv4, 983040
+  %or = or i32 %shl, %conv6
+  %or11 = or i32 %or, 8388608
+  br label %return
+
+return:                                           ; preds = %sw.bb2, %entry, %entry
+  %retval.0 = phi i32 [ %or11, %sw.bb2 ], [ %extract.t, %entry ], [ %extract.t, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: noreturn
+declare void @_Z25llvm_unreachable_internalv()
+
diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 3235fd9..6aea5d3 100644
--- a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -3,6 +3,8 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
 
+; We disable the vectorization of <3 x float> for now
+
 ; float foo(float *A) {
 ;
 ;   float R = A[0];
@@ -19,14 +21,14 @@ target triple = "i386-apple-macosx10.9.0"
 
 ;CHECK-LABEL: @foo(
 ;CHECK: br
-;CHECK: phi <3 x float>
-;CHECK: fmul <3 x float>
-;CHECK: fadd <3 x float>
+;CHECK-NOT: phi <3 x float>
+;CHECK-NOT: fmul <3 x float>
+;CHECK-NOT: fadd <3 x float>
 ; At the moment we don't sink extractelements.
 ;CHECK: br
-;CHECK: extractelement
-;CHECK: extractelement
-;CHECK: extractelement
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
 ;CHECK: ret
 
 define float @foo(float* nocapture readonly %A) {
diff --git a/test/Transforms/SLPVectorizer/X86/simplebb.ll b/test/Transforms/SLPVectorizer/X86/simplebb.ll
index cd0b99e..7d682e5 100644
--- a/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -23,3 +23,67 @@ entry:
   ret void
 }
 
+; Simple 3-pair chain with loads and stores, obfuscated with bitcasts
+; CHECK: test2
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test2(double* %a, double* %b, i8* %e) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %c = bitcast i8* %e to double*
+  store double %mul, double* %c, align 8
+  %carrayidx5 = getelementptr inbounds i8* %e, i64 8
+  %arrayidx5 = bitcast i8* %carrayidx5 to double*
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile loads.
+; CHECK: test_volatile_load
+; CHECK-NOT: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test_volatile_load(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load volatile double* %a, align 8
+  %i1 = load volatile double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile stores.
+; CHECK: test_volatile_store
+; CHECK-NOT: store <2 x double>
+; CHECK: ret
+define void @test_volatile_store(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store volatile double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store volatile double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
new file mode 100644
index 0000000..2747a1f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -0,0 +1,140 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+
+; CHECK: tiny_tree_fully_vectorizable
+; CHECK: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: tiny_tree_fully_vectorizable2
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+; CHECK: ret
+
+define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 1
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; We do not vectorize the tiny tree which is not fully vectorizable. 
+; CHECK: tiny_tree_not_fully_vectorizable
+; CHECK-NOT: <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 2
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1 
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; CHECK: tiny_tree_not_fully_vectorizable2
+; CHECK-NOT: <2 x double>
+; CHECK: ret
+
+define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 4 
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
new file mode 100644
index 0000000..4d17d46
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
new file mode 100644
index 0000000..66392e7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=xcore  | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+; CHECK-NOT: <2 x double>
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/lit.local.cfg b/test/Transforms/SLPVectorizer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/SLPVectorizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 458b0df..5d3e4b5 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1181,7 +1181,6 @@ entry:
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
   %foo = load i8* %b.i8, align 1
-; CHECK-NEXT: {{.*}} = zext i1 %x to i8
 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
 ; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
diff --git a/test/Transforms/SROA/lit.local.cfg b/test/Transforms/SROA/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/SROA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/SROA/vector-conversion.ll b/test/Transforms/SROA/vector-conversion.ll
new file mode 100644
index 0000000..08d7960
--- /dev/null
+++ b/test/Transforms/SROA/vector-conversion.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtoint
+  %a = alloca {<2 x i32*>, <2 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
+  %vec = load <4 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+
+  ret <4 x i64> %vec
+}
+
+define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
+; CHECK-LABEL: @vector_inttoptr
+  %a = alloca {<2 x i64>, <2 x i64>}
+; CHECK-NOT: alloca
+
+  store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
+  %vec = load <4 x i32*>* %cast
+; CHECK-NOT: load
+; CHECK: inttoptr
+
+  ret <4 x i32*> %vec
+}
+
+define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtointbitcast
+  %a = alloca {<1 x i32*>, <1 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
+  %vec = load <2 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+; CHECK: bitcast
+; CHECK: ptrtoint
+; CHECK: bitcast
+
+  ret <2 x i64> %vec
+}
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
new file mode 100644
index 0000000..d19894d
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -0,0 +1,11 @@
+symbol table
+1
+main
+main:15680:0:7
+0: 0
+4: 0
+7: 0
+9: 10226
+10: 2243
+16: 0
+18: 0
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
new file mode 100644
index 0000000..5167627
--- /dev/null
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+; #include <stdlib.h>
+;
+; int main(int argc, char *argv[]) {
+;   if (argc < 2)
+;     return 1;
+;   double result;
+;   int limit = atoi(argv[1]);
+;   if (limit > 100) {
+;     double s = 23.041968;
+;     for (int u = 0; u < limit; u++) {
+;       double x = s;
+;       s = x + 3.049 + (double)u;
+;       s -= s + 3.94 / x * 0.32;
+;     }
+;     result = s;
+;   } else {
+;     result = 0;
+;   }
+;   printf("result is %lf\n", result);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
+
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13), !dbg !27
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14), !dbg !27
+  %cmp = icmp slt i32 %argc, 2, !dbg !28
+  br i1 %cmp, label %return, label %if.end, !dbg !28
+; CHECK: edge entry -> return probability is 1 / 2 = 50%
+; CHECK: edge entry -> if.end probability is 1 / 2 = 50%
+
+if.end:                                           ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !30
+  %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
+  %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
+  %cmp1 = icmp sgt i32 %call, 100, !dbg !35
+  br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
+; CHECK: edge if.end -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+; CHECK: edge if.end -> if.end6 probability is 1 / 2244 = 0.0445633%
+
+for.body:                                         ; preds = %if.end, %for.body
+  %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
+  %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
+  %add = fadd double %s.015, 3.049000e+00, !dbg !36
+  %conv = sitofp i32 %u.016 to double, !dbg !36
+  %add4 = fadd double %add, %conv, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18), !dbg !36
+  %div = fdiv double 3.940000e+00, %s.015, !dbg !37
+  %mul = fmul double %div, 3.200000e-01, !dbg !37
+  %add5 = fadd double %add4, %mul, !dbg !37
+  %sub = fsub double %add4, %add5, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18), !dbg !37
+  %inc = add nsw i32 %u.016, 1, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
+  %exitcond = icmp eq i32 %inc, %call, !dbg !38
+  br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
+; CHECK: edge for.body -> if.end6 probability is 1 / 2244 = 0.0445633%
+; CHECK: edge for.body -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+
+if.end6:                                          ; preds = %for.body, %if.end
+  %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
+  %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
+  br label %return, !dbg !40
+; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
+
+return:                                           ; preds = %entry, %if.end6
+  %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
+  ret i32 %retval.0, !dbg !41
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind readonly }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25, !42}
+!llvm.ident = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"branch.cc", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !12, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !17, metadata !18, metadata !21, metadata !23}
+!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 4]
+!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 4]
+!15 = metadata !{i32 786688, metadata !4, metadata !"result", metadata !5, i32 7, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 7]
+!16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
+!18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
+!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
+!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
+!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
+!27 = metadata !{i32 4, i32 0, metadata !4, null}
+!28 = metadata !{i32 5, i32 0, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!31 = metadata !{metadata !32, metadata !32, i64 0}
+!32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
+!33 = metadata !{metadata !"omnipotent char", metadata !34, i64 0}
+!34 = metadata !{metadata !"Simple C/C++ TBAA"}
+!35 = metadata !{i32 9, i32 0, metadata !20, null}
+!36 = metadata !{i32 13, i32 0, metadata !24, null}
+!37 = metadata !{i32 14, i32 0, metadata !24, null}
+!38 = metadata !{i32 11, i32 0, metadata !22, null}
+!39 = metadata !{i32 20, i32 0, metadata !4, null}
+!40 = metadata !{i32 21, i32 0, metadata !4, null}
+!41 = metadata !{i32 22, i32 0, metadata !4, null}
+!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 27e6670..71bf22a 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,11 +40,12 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786449, metadata !18, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -61,3 +62,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/ScalarRepl/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index 03d25ac..f0dc141 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -1,13 +1,16 @@
 ; PR892
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | grep "ret i8"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
-target datalayout = "e-p:32:32-n8:16:32"
+
+target datalayout = "e-p:32:32-p1:16:16-n8:16:32"
 target triple = "i686-apple-darwin8.7.2"
-	%struct.Val = type { i32*, i32 }
+
+%struct.Val = type { i32*, i32 }
 
 define i8* @test(i16* %X) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: alloca
+; CHECK: ret i8*
 	%X_addr = alloca i16*		; <i16**> [#uses=2]
 	store i16* %X, i16** %X_addr
 	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
@@ -15,7 +18,37 @@ define i8* @test(i16* %X) {
 	ret i8* %tmp
 }
 
+define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+    %x_addr = alloca i16 addrspace(1)*
+	store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
+	%x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
+	%tmp = load i8 addrspace(1)** %x_addr.upgrd.1
+	ret i8 addrspace(1)* %tmp
+}
+
+define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1_array(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+  %as_ptr_array = alloca [4 x i16 addrspace(1)*]
+  %elem1 = getelementptr [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
+  store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
+  %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
+  %tmp = load i8 addrspace(1)** %elem1.cast
+  ret i8 addrspace(1)* %tmp
+}
+
+
 define void @test2(i64 %Op.0) {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
 	%tmp = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp1 = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp.upgrd.2 = call i64 @_Z3foov( )		; <i64> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
new file mode 100644
index 0000000..8b45a59
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
@@ -0,0 +1,48 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; rdar://15268442
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; CHECK-LABEL: define i3 @coveredswitch_test(
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %input, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i24
+; CHECK-NEXT: mul i24 %switch.cast, 3
+; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt
+; CHECK-NEXT: trunc i24 %switch.downshift to i3
+; CHECK-NEXT: ret i3 %switch.masked
+
+define i3 @coveredswitch_test(i3 %input) {
+entry:
+  switch i3 %input, label %bb8 [
+    i3 0, label %bb7
+    i3 1, label %bb
+    i3 2, label %bb3
+    i3 3, label %bb4
+    i3 4, label %bb5
+    i3 5, label %bb6
+  ]
+
+bb:                                               ; preds = %entry
+  br label %bb8
+
+bb3:                                              ; preds = %entry
+  br label %bb8
+
+bb4:                                              ; preds = %entry
+  br label %bb8
+
+bb5:                                              ; preds = %entry
+  br label %bb8
+
+bb6:                                              ; preds = %entry
+  br label %bb8
+
+bb7:                                              ; preds = %entry
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb6, %bb5, %bb4, %bb3, %bb, %entry
+  %result = phi i3 [ 0, %bb7 ], [ 1, %bb6 ], [ 2, %bb5 ], [ 3, %bb4 ], [ 4, %bb3 ], [ 5, %bb ], [ 6, %entry ]
+  ret i3 %result
+}
diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll
index 93b9a27..b8b8cbd 100644
--- a/test/Transforms/SimplifyCFG/MagicPointer.ll
+++ b/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -2,15 +2,7 @@
 ;
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; CHECK: switch i64 %magicptr
-; CHECK: i64 0, label
-; CHECK: i64 1, label
-; CHECK: i64 2, label
-; CHECK: i64 3, label
-; CHECK: i64 4, label
-; CHECK: }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
 @.str = private constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
@@ -18,7 +10,24 @@ target triple = "x86_64-apple-darwin10.0.0"
 @.str2 = private constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
 @.str3 = private constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
 
+@.str_as1 = private addrspace(1) constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
+@.str1_as1 = private addrspace(1) constant [4 x i8] c"one\00"      ; <[4 x i8]*> [#uses=2]
+@.str2_as1 = private addrspace(1) constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
+@.str3_as1 = private addrspace(1) constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
+
+declare i32 @puts(i8*)
+declare i32 @puts_as1(i8 addrspace(1)*)
+
 define void @f(i8* %x) nounwind ssp {
+; CHECK-LABEL: @f(
+; CHECK: switch i64 %magicptr
+; CHECK: i64 0, label
+; CHECK: i64 1, label
+; CHECK: i64 2, label
+; CHECK: i64 3, label
+; CHECK: i64 4, label
+; CHECK: }
+
 entry:
   %tobool = icmp eq i8* %x, null                  ; <i1> [#uses=1]
   br i1 %tobool, label %if.then, label %if.else
@@ -72,4 +81,69 @@ if.end21:                                         ; preds = %if.end20, %if.then
   ret void
 }
 
-declare i32 @puts(i8*)
+; Is it useful to test a version where the ptrtoints are to the same
+; size?
+define void @f_as1(i8 addrspace(1)* %x) nounwind ssp {
+; CHECK-LABEL: @f_as1(
+; CHECK: ptrtoint i8 addrspace(1)* %x to i16
+; CHECK: switch i16 %magicptr
+; CHECK: i16 0, label
+; CHECK: i16 1, label
+; CHECK: i16 2, label
+; CHECK: i16 3, label
+; CHECK: i16 4, label
+; CHECK: }
+
+entry:
+  %tobool = icmp eq i8 addrspace(1)* %x, null                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end21
+
+if.else:                                          ; preds = %entry
+  %cmp = icmp eq i8 addrspace(1)* %x, inttoptr (i64 1 to i8 addrspace(1)*)  ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %if.else
+  %call3 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str1_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end20
+
+if.else4:                                         ; preds = %if.else
+  %cmp6 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 2 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp6, label %if.then9, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %if.else4
+  %cmp8 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 3 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp8, label %if.then9, label %if.else11
+
+if.then9:                                         ; preds = %lor.lhs.false, %if.else4
+  %call10 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str2_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end19
+
+if.else11:                                        ; preds = %lor.lhs.false
+  %cmp13 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 4 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp13, label %if.then14, label %if.else16
+
+if.then14:                                        ; preds = %if.else11
+  %call15 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str3_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.else16:                                        ; preds = %if.else11
+  %call18 = call i32 @puts_as1(i8 addrspace(1)* %x) nounwind       ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.else16, %if.then14
+  br label %if.end19
+
+if.end19:                                         ; preds = %if.end, %if.then9
+  br label %if.end20
+
+if.end20:                                         ; preds = %if.end19, %if.then2
+  br label %if.end21
+
+if.end21:                                         ; preds = %if.end20, %if.then
+  ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/R600/lit.local.cfg b/test/Transforms/SimplifyCFG/R600/lit.local.cfg
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/lit.local.cfg
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
index 786fee9..4d344fa 100644
--- a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/SimplifyCFG/X86/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 71259c9..3687327 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -711,7 +711,7 @@ return:
   ret i32 %retval.0
 }
 
-define i32 @cprop(i32 %x) {
+define i32 @cprop(i32 %x, i32 %y) {
 entry:
   switch i32 %x, label %sw.default [
     i32 1, label %return
@@ -727,7 +727,8 @@ sw.bb1: br label %return
 
 sw.bb2:
   %and = and i32 %x, 1
-  %tobool = icmp ne i32 %and, 0
+  %and.ptr = inttoptr i32 %and to i8*
+  %tobool = icmp ne i8* %and.ptr, null
   %cond = select i1 %tobool, i32 -123, i32 456
   %sub = sub nsw i32 %x, %cond
   br label %return
@@ -735,13 +736,15 @@ sw.bb2:
 sw.bb3:
   %trunc = trunc i32 %x to i8
   %sext = sext i8 %trunc to i32
+  %select.i = icmp sgt i32 %sext, 0
+  %select = select i1 %select.i, i32 %sext, i32 %y
   br label %return
 
 sw.default:
   br label %return
 
 return:
-  %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
+  %retval.0 = phi i32 [ 123, %sw.default ], [ %select, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
   ret i32 %retval.0
 
 ; CHECK-LABEL: @cprop(
diff --git a/test/Transforms/SimplifyCFG/attr-noduplicate.ll b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
new file mode 100644
index 0000000..523aa51
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; This test checks that the SimplifyCFG pass won't duplicate a call to a
+; function marked noduplicate.
+;
+; CHECK-LABEL: @noduplicate
+; CHECK: call void @barrier
+; CHECK-NOT: call void @barrier
+define void @noduplicate(i32 %cond, i32* %out) {
+entry:
+  %out1 = getelementptr i32* %out, i32 1
+  %out2 = getelementptr i32* %out, i32 2
+  %cmp = icmp eq i32 %cond, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 5, i32* %out
+  br label %if.end
+
+if.end:
+  call void @barrier() #0
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:
+  store i32 5, i32* %out1
+  br label %cond.end
+
+cond.end:
+  %value = phi i32 [ 1, %cond.false ], [ 0, %if.end ]
+  store i32 %value, i32* %out2
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @barrier() #0
+
+attributes #0 = { noduplicate nounwind }
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 7fc0cbd..9d8086c 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -41,10 +41,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !15} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !15, i32 12, metadata !"clang (trunk 129006)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 131, i32 2, metadata !0, null}
 !6 = metadata !{i32 134, i32 2, metadata !0, null}
diff --git a/test/Transforms/SimplifyCFG/common-dest-folding.ll b/test/Transforms/SimplifyCFG/common-dest-folding.ll
new file mode 100644
index 0000000..0aa3b2c
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/common-dest-folding.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+;CHECK: @foo
+;CHECK: and i32 %c1, %k
+;CHECK: icmp eq i32
+;CHECK: and i32 %c2, %k
+;CHECK: icmp eq i32
+;CHECK: or i1
+;CHECK: ret
+define i32 @foo(i32 %k, i32 %c1, i32 %c2) {
+  %1 = and i32 %c1, %k
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %8, label %3
+
+; <label>:3                                       ; preds = %0
+  %4 = and i32 %c2, %k
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %8, label %6
+
+; <label>:6                                       ; preds = %3
+  %7 = tail call i32 (...)* @bar() nounwind
+  br label %8
+
+; <label>:8                                       ; preds = %3, %0, %6
+  ret i32 undef
+}
+
+;CHECK: @conduse
+;CHECK: shl i32 1, %c1
+;CHECK-NEXT: shl i32 1, %c2
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK: ret
+define i32 @conduse(i32 %k, i32 %c1, i32 %c2) #0 {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  br i1 %tmp2, label %bb9, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb3
+  %tmp8 = tail call i32 (...)* @bar() #1
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb3, %bb
+  ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 0e36066..0547fa9 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -29,12 +29,13 @@ declare i32 @bar(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !8, metadata !8, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
@@ -52,3 +53,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{i32 8, i32 3, metadata !17, null}
 !19 = metadata !{i32 9, i32 3, metadata !10, null}
 !20 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/lit.local.cfg
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 5500ba2..e1e9157 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -default-data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
 
 declare void @foo1()
 
@@ -22,6 +23,44 @@ F:              ; preds = %0
 ; CHECK:  ]
 }
 
+define void @test1_ptr(i32* %V) {
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
+}
+
+define void @test1_ptr_as1(i32 addrspace(1)* %V) {
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
+}
+
 define void @test2(i32 %V) {
         %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
         %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
@@ -79,7 +118,7 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-  
+
 ; CHECK-LABEL: @test4(
 ; CHECK:  switch i8 %c, label %lor.rhs [
 ; CHECK:    i8 62, label %lor.end
@@ -139,7 +178,7 @@ shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2,
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
         %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
         ret i1 %UnifiedRetVal
-        
+
 ; CHECK-LABEL: @test6(
 ; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
 ; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
@@ -160,7 +199,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret void
-  
+
 ; CHECK-LABEL: @test7(
 ; CHECK:   %cmp = icmp ult i32 %x, 32
 ; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
@@ -189,7 +228,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret i32 0
-  
+
 ; CHECK-LABEL: @test8(
 ; CHECK: switch.early.test:
 ; CHECK:   switch i8 %c, label %if.end [
@@ -245,7 +284,7 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
-  
+
 ; CHECK-LABEL: @test9(
 ; CHECK:   %cmp = icmp ult i8 %c, 33
 ; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 953557ff..3b449cb 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -8,15 +8,17 @@ define void @foo() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 4, i32 2, metadata !6, null}
 !6 = metadata !{i32 589835, metadata !8, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 5, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"foo.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Sink/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
index 69febc3..438fa96 100644
--- a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
+++ b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
@@ -1,5 +1,10 @@
-; RUN: opt < %s -strip -S | grep foo | count 2
-; RUN: opt < %s -strip -S | grep bar | count 2
+; RUN: opt < %s -strip -S | FileCheck %s
+
+; CHECK: foo
+; CHECK: bar
+; CHECK: foo
+; CHECK: bar
+
 @llvm.used = appending global [2 x i8*] [ i8* bitcast (i32* @foo to i8*), i8* bitcast (i32 ()* @bar to i8*) ], section "llvm.metadata"		; <[2 x i8*]*> [#uses=0]
 @foo = internal constant i32 41		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 0181c9b..5353744 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -1,4 +1,6 @@
-; RUN: opt -strip-debug < %s | llvm-dis | grep -v llvm.dbg
+; RUN: opt -strip-debug < %s -S | FileCheck %s
+
+; CHECK-NOT: llvm.dbg
 
 @x = common global i32 0                          ; <i32*> [#uses=0]
 
@@ -11,6 +13,7 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 !llvm.dbg.sp = !{!0}
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
@@ -18,7 +21,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
 !6 = metadata !{i32 524299, metadata !12, metadata !0, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -28,3 +31,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !10 = metadata !{i32 3, i32 0, metadata !6, null}
 !11 = metadata !{i32 4, i32 0, metadata !6, null}
 !12 = metadata !{metadata !"b.c", metadata !"/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll b/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
deleted file mode 100644
index b893410..0000000
--- a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt -strip-dead-debug-info < %s | llvm-dis -o %t.ll
-; RUN: grep -v bar %t.ll
-; RUN: grep -v abcd %t.ll
-
-@xyz = global i32 2                               ; <i32*> [#uses=1]
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i32 @fn() nounwind readnone ssp {
-entry:
-  ret i32 0, !dbg !17
-}
-
-define i32 @foo(i32 %i) nounwind readonly ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !14), !dbg !19
-  %.0 = load i32* @xyz, align 4                   ; <i32> [#uses=1]
-  ret i32 %.0, !dbg !20
-}
-
-!llvm.dbg.cu = !{!2}
-!llvm.dbg.sp = !{!0, !5, !9}
-!llvm.dbg.lv.bar = !{!12}
-!llvm.dbg.lv.foo = !{!14}
-!llvm.dbg.gv = !{!15, !16}
-
-!0 = metadata !{i32 524334, metadata !22, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !22, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524334, metadata !22, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 524324, metadata !22, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, metadata !22, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!11 = metadata !{metadata !8, metadata !8}
-!12 = metadata !{i32 524544, metadata !13, metadata !"bb", metadata !1, i32 5, metadata !8} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 524299, metadata !22, metadata !0, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 524545, metadata !9, metadata !"i", metadata !1, i32 7, metadata !8} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 524340, i32 0, metadata !1, metadata !"abcd", metadata !"abcd", metadata !"", metadata !1, i32 2, metadata !8, i1 true, i1 true, null} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"xyz", metadata !"xyz", metadata !"", metadata !1, i32 3, metadata !8, i1 false, i1 true, i32* @xyz} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 6, i32 0, metadata !18, null}
-!18 = metadata !{i32 524299, metadata !22, metadata !5, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 7, i32 0, metadata !9, null}
-!20 = metadata !{i32 10, i32 0, metadata !21, null}
-!21 = metadata !{i32 524299, metadata !22, metadata !9, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{metadata !"g.c", metadata !"/tmp/"}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index e480f43..2878468 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -5,18 +5,20 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.gv = !{!6}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0, null} ; [ DW_TAG_variable ]
 !7 = metadata !{i32 524326, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
 !8 = metadata !{i32 3, i32 13, metadata !9, null}
 !9 = metadata !{i32 524299, metadata !10, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW"}
 !11 = metadata !{i32 0}
+!12 = metadata !{metadata !0}
+!13 = metadata !{metadata !6}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/StripSymbols/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
new file mode 100644
index 0000000..2d687ae
--- /dev/null
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -0,0 +1,58 @@
+; RUN: opt -strip-dead-debug-info -verify %s -S | FileCheck %s
+
+; CHECK: ModuleID = '{{.*}}'
+; CHECK-NOT: bar
+; CHECK-NOT: abcd
+
+@xyz = global i32 2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #0
+
+; Function Attrs: nounwind readnone ssp
+define i32 @fn() #1 {
+entry:
+  ret i32 0, !dbg !18
+}
+
+; Function Attrs: nounwind readonly ssp
+define i32 @foo(i32 %i) #2 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  %.0 = load i32* @xyz, align 4
+  ret i32 %.0, !dbg !21
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readnone ssp }
+attributes #2 = { nounwind readonly ssp }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25}
+
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
+!2 = metadata !{null}
+!3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
+!4 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 524329, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
+!6 = metadata !{i32 524334, metadata !1, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
+!7 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 524324, metadata !1, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 524334, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
+!11 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9, metadata !9}
+!13 = metadata !{i32 524544, metadata !14, metadata !"bb", metadata !5, i32 5, metadata !9}
+!14 = metadata !{i32 524299, metadata !1, metadata !3, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!15 = metadata !{i32 524545, metadata !10, metadata !"i", metadata !5, i32 7, metadata !9}
+!16 = metadata !{i32 524340, i32 0, metadata !5, metadata !"abcd", metadata !"abcd", metadata !"", metadata !5, i32 2, metadata !9, i1 true, i1 true, null, null}
+!17 = metadata !{i32 524340, i32 0, metadata !5, metadata !"xyz", metadata !"xyz", metadata !"", metadata !5, i32 3, metadata !9, i1 false, i1 true, i32* @xyz, null}
+!18 = metadata !{i32 6, i32 0, metadata !19, null}
+!19 = metadata !{i32 524299, metadata !1, metadata !6, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!20 = metadata !{i32 7, i32 0, metadata !10, null}
+!21 = metadata !{i32 10, i32 0, metadata !22, null}
+!22 = metadata !{i32 524299, metadata !1, metadata !10, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!23 = metadata !{metadata !3, metadata !6, metadata !10}
+!24 = metadata !{metadata !16, metadata !17}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StructurizeCFG/branch-on-argument.ll b/test/Transforms/StructurizeCFG/branch-on-argument.ll
new file mode 100644
index 0000000..4eba0cd
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/branch-on-argument.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @invert_branch_on_arg_inf_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: phi i1 [ false, %Flow1 ], [ %arg.inv, %entry ]
+define void @invert_branch_on_arg_inf_loop(i32 addrspace(1)* %out, i1 %arg) {
+entry:
+  br i1 %arg, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+
+; CHECK-LABEL: @invert_branch_on_arg_jump_into_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: Flow:
+; CHECK: Flow1:
+define void @invert_branch_on_arg_jump_into_loop(i32 addrspace(1)* %out, i32 %n, i1 %arg) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+  %ptr = getelementptr i32 addrspace(1)* %out, i32 %i
+  store i32 %i, i32 addrspace(1)* %ptr, align 4
+  br i1 %arg, label %mid.loop, label %end.loop
+
+mid.loop:
+  store i32 333, i32 addrspace(1)* %out, align 4
+  br label %for.end
+
+end.loop:
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/StructurizeCFG/lit.local.cfg b/test/Transforms/StructurizeCFG/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/StructurizeCFG/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StructurizeCFG/no-branch-to-entry.ll b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
new file mode 100644
index 0000000..2e22c87
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @no_branch_to_entry_undef(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_undef(i32 addrspace(1)* %out) {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+; CHECK-LABEL: @no_branch_to_entry_true(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_true(i32 addrspace(1)* %out) {
+entry:
+  br i1 true, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
diff --git a/test/Transforms/StructurizeCFG/switch.ll b/test/Transforms/StructurizeCFG/switch.ll
new file mode 100644
index 0000000..316df57
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/switch.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; The structurizecfg pass cannot handle switch instructions, so we need to
+; make sure the lower switch pass is always run before structurizecfg.
+
+; CHECK-LABEL: @switch
+define void @switch(i32 addrspace(1)* %out, i32 %cond) nounwind {
+entry:
+; CHECK: icmp
+  switch i32 %cond, label %done [ i32 0, label %zero]
+
+; CHECK: zero:
+zero:
+; CHECK: store i32 7, i32 addrspace(1)* %out
+  store i32 7, i32 addrspace(1)* %out
+; CHECK: br label %done
+  br label %done
+
+; CHECK: done:
+done:
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/TailCallElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
index da2db5a..ba763cf 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
index 18c604a..19840aa 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True