177 files changed, 4058 insertions, 234 deletions
diff --git a/test/Transforms/ADCE/dg.exp b/test/Transforms/ADCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ADCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ADCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/dg.exp b/test/Transforms/ArgumentPromotion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ArgumentPromotion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
new file mode 100644
index 0000000..32a91ce
--- /dev/null
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -0,0 +1,112 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
+; want to select the pairs:
+; %div77 = fdiv double %sub74, %mul76.v.r1 <->   %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
+; %add84 = fadd double %sub83, 2.000000e+00 <->   %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
+; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <->   %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
+; %mul117 = fmul double %sub39.v.r1, %sub116 <->   %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
+; and so a dependency cycle would be created.
+
+declare double @fabs(double) nounwind readnone
+define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
+entry:
+  br label %go
+go:
+  %conv = sitofp i32 %n.0 to double
+  %add35 = fadd double %conv, %a
+  %sub36 = fadd double %add35, -1.000000e+00
+  %add38 = fadd double %conv, %b
+  %sub39 = fadd double %add38, -1.000000e+00
+  %add41 = fadd double %conv, %c
+  %sub42 = fadd double %add41, -1.000000e+00
+  %sub45 = fadd double %add35, -2.000000e+00
+  %sub48 = fadd double %add38, -2.000000e+00
+  %sub51 = fadd double %add41, -2.000000e+00
+  %mul52 = shl nsw i32 %n.0, 1
+  %sub53 = add nsw i32 %mul52, -1
+  %conv54 = sitofp i32 %sub53 to double
+  %sub56 = add nsw i32 %mul52, -3
+  %conv57 = sitofp i32 %sub56 to double
+  %sub59 = add nsw i32 %mul52, -5
+  %conv60 = sitofp i32 %sub59 to double
+  %mul61 = mul nsw i32 %n.0, %n.0
+  %conv62 = sitofp i32 %mul61 to double
+  %mul63 = fmul double %conv62, 3.000000e+00
+  %mul67 = fmul double %sub65, %conv
+  %add68 = fadd double %mul63, %mul67
+  %add69 = fadd double %add68, 2.000000e+00
+  %sub71 = fsub double %add69, %mul2.v.r1
+  %sub74 = fsub double %sub71, %mul73
+  %mul75 = fmul double %conv57, 2.000000e+00
+  %mul76 = fmul double %mul75, %sub42
+  %div77 = fdiv double %sub74, %mul76
+  %mul82 = fmul double %add80, %conv
+  %sub83 = fsub double %mul63, %mul82
+  %add84 = fadd double %sub83, 2.000000e+00
+  %sub86 = fsub double %add84, %mul2.v.r1
+  %sub87 = fsub double -0.000000e+00, %sub86
+  %mul88 = fmul double %sub36, %sub87
+  %mul89 = fmul double %mul88, %sub39
+  %mul90 = fmul double %conv54, 4.000000e+00
+  %mul91 = fmul double %mul90, %conv57
+  %mul92 = fmul double %mul91, %sub51
+  %mul93 = fmul double %mul92, %sub42
+  %div94 = fdiv double %mul89, %mul93
+  %mul95 = fmul double %sub45, %sub36
+  %mul96 = fmul double %mul95, %sub48
+  %mul97 = fmul double %mul96, %sub39
+  %sub99 = fsub double %conv, %a
+  %sub100 = fadd double %sub99, -2.000000e+00
+  %mul101 = fmul double %mul97, %sub100
+  %sub103 = fsub double %conv, %b
+  %sub104 = fadd double %sub103, -2.000000e+00
+  %mul105 = fmul double %mul101, %sub104
+  %mul106 = fmul double %conv57, 8.000000e+00
+  %mul107 = fmul double %mul106, %conv57
+  %mul108 = fmul double %mul107, %conv60
+  %sub111 = fadd double %add41, -3.000000e+00
+  %mul112 = fmul double %mul108, %sub111
+  %mul113 = fmul double %mul112, %sub51
+  %mul114 = fmul double %mul113, %sub42
+  %div115 = fdiv double %mul105, %mul114
+  %sub116 = fsub double -0.000000e+00, %sub36
+  %mul117 = fmul double %sub39, %sub116
+  %sub119 = fsub double %conv, %c
+  %sub120 = fadd double %sub119, -1.000000e+00
+  %mul121 = fmul double %mul117, %sub120
+  %mul123 = fmul double %mul75, %sub51
+  %mul124 = fmul double %mul123, %sub42
+  %div125 = fdiv double %mul121, %mul124
+  %mul126 = fmul double %div77, %sub
+  %add127 = fadd double %mul126, 1.000000e+00
+  %mul128 = fmul double %add127, %Anm1.0
+  %mul129 = fmul double %div94, %sub
+  %add130 = fadd double %div125, %mul129
+  %mul131 = fmul double %add130, %sub
+  %mul132 = fmul double %mul131, %Anm2.0
+  %add133 = fadd double %mul128, %mul132
+  %mul134 = fmul double %div115, %mul1
+  %mul135 = fmul double %mul134, %Anm3.0
+  %add136 = fadd double %add133, %mul135
+  %mul139 = fmul double %add127, %Bnm1.0
+  %mul143 = fmul double %mul131, %Bnm2.0
+  %add144 = fadd double %mul139, %mul143
+  %mul146 = fmul double %mul134, %Bnm3.0
+  %add147 = fadd double %add144, %mul146
+  %div148 = fdiv double %add136, %add147
+  %sub149 = fsub double %F.0, %div148
+  %div150 = fdiv double %sub149, %F.0
+  %call = tail call double @fabs(double %div150) nounwind readnone
+  %cmp = fcmp olt double %call, 0x3CB0000000000000
+  %cmp152 = icmp sgt i32 %n.0, 20000
+  %or.cond = or i1 %cmp, %cmp152
+  br i1 %or.cond, label %done, label %go
+done:
+  ret void
+; CHECK: @test1
+; CHECK: go:
+; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; FIXME: When tree pruning is deterministic, include the entire output.
+}
diff --git a/test/Transforms/BBVectorize/func-alias.ll b/test/Transforms/BBVectorize/func-alias.ll
new file mode 100644
index 0000000..9d0cc07
--- /dev/null
+++ b/test/Transforms/BBVectorize/func-alias.ll
@@ -0,0 +1,244 @@
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
+; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
+
+%struct.descriptor_dimension = type { i64, i64, i64 }
+%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
+%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+
+@.cst4 = external unnamed_addr constant [11 x i8], align 8
+@.cst823 = external unnamed_addr constant [214 x i8], align 64
+@j.4580 = external global i32
+@j1.4581 = external global i32
+@nty1.4590 = external global [2 x i8]
+@nty2.4591 = external global [2 x i8]
+@xr1.4592 = external global float
+@xr2.4593 = external global float
+@yr1.4594 = external global float
+@yr2.4595 = external global float
+
+@__main1_MOD_iave = external unnamed_addr global i32
+@__main1_MOD_igrp = external global i32
+@__main1_MOD_iounit = external global i32
+@__main1_MOD_ityp = external global i32
+@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
+@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
+@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
+
+declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
+declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
+declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
+
+define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
+; CHECK: prtmax__
+newFuncRoot:
+  br label %"<bb 34>"
+
+codeRepl80.exitStub:                              ; preds = %"<bb 34>"
+  ret i1 true
+
+"<bb 34>.<bb 25>_crit_edge.exitStub":             ; preds = %"<bb 34>"
+  ret i1 false
+
+"<bb 34>":                                        ; preds = %newFuncRoot
+  %tmp128 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp129 = getelementptr inbounds %struct.__st_parameter_common* %tmp128, i32 0, i32 2
+  store i8* getelementptr inbounds ([11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
+  %tmp130 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp131 = getelementptr inbounds %struct.__st_parameter_common* %tmp130, i32 0, i32 3
+  store i32 31495, i32* %tmp131, align 4
+  %tmp132 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
+  store i8* getelementptr inbounds ([214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
+  %tmp133 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
+  store i32 214, i32* %tmp133, align 4
+  %tmp134 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp135 = getelementptr inbounds %struct.__st_parameter_common* %tmp134, i32 0, i32 0
+  store i32 4096, i32* %tmp135, align 4
+  %iounit.8748_288 = load i32* @__main1_MOD_iounit, align 4
+  %tmp136 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp137 = getelementptr inbounds %struct.__st_parameter_common* %tmp136, i32 0, i32 1
+  store i32 %iounit.8748_288, i32* %tmp137, align 4
+  call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_289 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j.8758_290 = load i32* @j.4580, align 4
+  %D.75760_291 = sext i32 %j.8758_290 to i64
+  %iave.8736_292 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_293 = sext i32 %iave.8736_292 to i64
+  %D.75808_294 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
+  %igrp.8737_296 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_297 = sext i32 %igrp.8737_296 to i64
+  %D.75810_298 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
+  %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
+  %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
+  %ityp.8750_302 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_303 = sext i32 %ityp.8750_302 to i64
+  %D.75814_304 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
+  %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
+  %D.75817_307 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
+  %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
+  %tmp139 = bitcast [0 x float]* %tmp138 to float*
+  %D.75819_309 = getelementptr inbounds float* %tmp139, i64 %D.75818_308
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_310 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j.8758_311 = load i32* @j.4580, align 4
+  %D.75760_312 = sext i32 %j.8758_311 to i64
+  %iave.8736_313 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_314 = sext i32 %iave.8736_313 to i64
+  %D.75821_315 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
+  %igrp.8737_317 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_318 = sext i32 %igrp.8737_317 to i64
+  %D.75823_319 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
+  %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
+  %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
+  %ityp.8750_323 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_324 = sext i32 %ityp.8750_323 to i64
+  %D.75827_325 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
+  %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
+  %D.75830_328 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
+  %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
+  %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
+  %D.75832_330 = getelementptr inbounds [1 x i8]* %tmp141, i64 %D.75831_329
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_331 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j.8758_332 = load i32* @j.4580, align 4
+  %D.75760_333 = sext i32 %j.8758_332 to i64
+  %iave.8736_334 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_335 = sext i32 %iave.8736_334 to i64
+  %D.75834_336 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
+  %igrp.8737_338 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_339 = sext i32 %igrp.8737_338 to i64
+  %D.75836_340 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
+  %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
+  %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
+  %ityp.8750_344 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_345 = sext i32 %ityp.8750_344 to i64
+  %D.75840_346 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
+  %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
+  %D.75843_349 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
+  %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
+  %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
+  %D.75845_351 = getelementptr inbounds i32* %tmp143, i64 %D.75844_350
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_352 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j1.8760_353 = load i32* @j1.4581, align 4
+  %D.75773_354 = sext i32 %j1.8760_353 to i64
+  %iave.8736_355 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_356 = sext i32 %iave.8736_355 to i64
+  %D.75808_357 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
+  %igrp.8737_359 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_360 = sext i32 %igrp.8737_359 to i64
+  %D.75810_361 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
+  %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
+  %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
+  %ityp.8750_365 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_366 = sext i32 %ityp.8750_365 to i64
+  %D.75814_367 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
+  %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
+  %D.75817_370 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
+  %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
+  %tmp145 = bitcast [0 x float]* %tmp144 to float*
+  %D.75849_372 = getelementptr inbounds float* %tmp145, i64 %D.75848_371
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_373 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j1.8760_374 = load i32* @j1.4581, align 4
+  %D.75773_375 = sext i32 %j1.8760_374 to i64
+  %iave.8736_376 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_377 = sext i32 %iave.8736_376 to i64
+  %D.75821_378 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
+  %igrp.8737_380 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_381 = sext i32 %igrp.8737_380 to i64
+  %D.75823_382 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
+  %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
+  %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
+  %ityp.8750_386 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_387 = sext i32 %ityp.8750_386 to i64
+  %D.75827_388 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
+  %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
+  %D.75830_391 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
+  %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
+  %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
+  %D.75853_393 = getelementptr inbounds [1 x i8]* %tmp147, i64 %D.75852_392
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_394 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j1.8760_395 = load i32* @j1.4581, align 4
+  %D.75773_396 = sext i32 %j1.8760_395 to i64
+  %iave.8736_397 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_398 = sext i32 %iave.8736_397 to i64
+  %D.75834_399 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
+  %igrp.8737_401 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_402 = sext i32 %igrp.8737_401 to i64
+  %D.75836_403 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
+  %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
+  %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
+  %ityp.8750_407 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_408 = sext i32 %ityp.8750_407 to i64
+  %D.75840_409 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
+  %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
+  %D.75843_412 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
+  %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
+  %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
+  %D.75857_414 = getelementptr inbounds i32* %tmp149, i64 %D.75856_413
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
+; CHECK: @_gfortran_st_write_done
+  %j.8758_415 = load i32* @j.4580, align 4
+  %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
+  %j.8758_417 = load i32* @j.4580, align 4
+  %j.8770_418 = add nsw i32 %j.8758_417, 1
+  store i32 %j.8770_418, i32* @j.4580, align 4
+  %tmp150 = icmp ne i1 %D.4634_416, false
+  br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
+}
+
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
new file mode 100644
index 0000000..cea225d
--- /dev/null
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -0,0 +1,41 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %i2 = load double* %c, align 8
+  %add = fadd double %mul, %i2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %arrayidx6 = getelementptr inbounds double* %c, i64 1
+  %i5 = load double* %arrayidx6, align 8
+  %add7 = fadd double %mul5, %i5
+  %mul9 = fmul double %add, %i1
+  %add11 = fadd double %mul9, %i2
+  %mul13 = fmul double %add7, %i4
+  %add15 = fadd double %mul13, %i5
+  %mul16 = fmul double %add11, %add15
+  ret double %mul16
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %i2 = load <2 x double>* %i2.v.i0, align 8
+; CHECK: %add = fadd <2 x double> %mul, %i2
+; CHECK: %mul9 = fmul <2 x double> %add, %i1
+; CHECK: %add11 = fadd <2 x double> %mul9, %i2
+; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
+; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
+; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
+; CHECK: ret double %mul16
+}
+
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
new file mode 100644
index 0000000..bebc91a
--- /dev/null
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -0,0 +1,93 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+  br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8
+  %mul = fmul double %0, %0
+  %mul3 = fmul double %0, %1
+  %add = fadd double %mul, %mul3
+  %add4 = fadd double %1, %1
+  %add5 = fadd double %add4, %0
+  %mul6 = fmul double %0, %add5
+  %add7 = fadd double %add, %mul6
+  %mul8 = fmul double %1, %1
+  %add9 = fadd double %0, %0
+  %add10 = fadd double %add9, %0
+  %mul11 = fmul double %mul8, %add10
+  %add12 = fadd double %add7, %mul11
+  %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+  store double %add12, double* %arrayidx14, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK: %0 = load double* %arrayidx, align 8
+; CHECK: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK: %1 = load double* %arrayidx2, align 8
+; CHECK: %mul = fmul double %0, %0
+; CHECK: %mul3 = fmul double %0, %1
+; CHECK: %add = fadd double %mul, %mul3
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
+; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
+; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
+; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
+; CHECK: %add5.v.i1.2 = insertelement <2 x double> %add5.v.i1.1, double %0, i32 1
+; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
+; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %add5.v.i1.1, double %mul8, i32 1
+; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
+; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
+; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
+; CHECK: %add7 = fadd double %add, %mul6.v.r1
+; CHECK: %add12 = fadd double %add7, %mul6.v.r2
+; CHECK: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK: store double %add12, double* %arrayidx14, align 8
+; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+; CHECK-UNRL: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
+; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
+; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK-UNRL: %2 = load <2 x double>* %0, align 8
+; CHECK-UNRL: %3 = load <2 x double>* %1, align 8
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
+; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
+; CHECK-UNRL: %indvars.iv.next.1 = add i64 %indvars.iv, 2
+; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
+; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
+; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
new file mode 100644
index 0000000..84f16bd
--- /dev/null
+++ b/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+define i32 @test1() nounwind {
+; CHECK: @test1
+  %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
+  %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
+  %V4 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
+; CHECK:   %V1 = load <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
+  store float %V1, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 0), align 16
+  store float %V2, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 1), align 4
+  store float %V3, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 2), align 8
+  store float %V4, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 3), align 4
+; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
+  ret i32 0
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
new file mode 100644
index 0000000..8c9cc3c
--- /dev/null
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -0,0 +1,17 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%R  = fmul double %Y1, %Y2
+	ret double %R
+; CHECK-RD3: @test1
+; CHECK-RD2: @test1
+; CHECK-RD3-NOT: <2 x double>
+; CHECK-RD2: <2 x double>
+}
+
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
new file mode 100644
index 0000000..d9945b5
--- /dev/null
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -0,0 +1,46 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK-SL4: @test1
+; CHECK-SL4-NOT: <2 x double>
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+        ; Here we have a dependency chain: the short search limit will not
+        ; see past this chain and so will not see the second part of the
+        ; pair to vectorize.
+        %mul41 = fmul double %Z1, %Y2
+        %sub48 = fsub double %Z1, %mul41
+        %mul62 = fmul double %Z1, %sub48
+        %sub69 = fsub double %Z1, %mul62
+        %mul83 = fmul double %Z1, %sub69
+        %sub90 = fsub double %Z1, %mul83
+        %mul104 = fmul double %Z1, %sub90
+        %sub111 = fsub double %Z1, %mul104
+        %mul125 = fmul double %Z1, %sub111
+        %sub132 = fsub double %Z1, %mul125
+        %mul146 = fmul double %Z1, %sub132
+        %sub153 = fsub double %Z1, %mul146
+        ; end of chain.
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R1  = fdiv double %Z1, %Z2
+        %R   = fmul double %R1, %sub153
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
new file mode 100644
index 0000000..b2ef27b
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -0,0 +1,59 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
new file mode 100644
index 0000000..a5397ee
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -0,0 +1,110 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test1
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with extending loads and stores
+define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0f = load float* %a, align 4
+  %i0 = fpext float %i0f to double
+  %i1f = load float* %b, align 4
+  %i1 = fpext float %i1f to double
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds float* %a, i64 1
+  %i3f = load float* %arrayidx3, align 4
+  %i3 = fpext float %i3f to double
+  %arrayidx4 = getelementptr inbounds float* %b, i64 1
+  %i4f = load float* %arrayidx4, align 4
+  %i4 = fpext float %i4f to double
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
+; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
+; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
+; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
+; CHECK: %i1f = load <2 x float>* %i1f.v.i0, align 4
+; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test2
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with loads and truncating stores
+define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %mulf = fptrunc double %mul to float
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %mul5f = fptrunc double %mul5 to float
+  store float %mulf, float* %c, align 8
+  %arrayidx5 = getelementptr inbounds float* %c, i64 1
+  store float %mul5f, float* %arrayidx5, align 4
+  ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK: %0 = bitcast float* %c to <2 x float>*
+; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO: %i0 = load double* %a, align 8
+; CHECK-AO: %i1 = load double* %b, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
+; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
+; CHECK-AO: %i3 = load double* %arrayidx3, align 8
+; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
+; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
+; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
+; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
+; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK-AO: ret void
+}
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
new file mode 100644
index 0000000..904d766
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -0,0 +1,152 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair first splat)
+define double @test3(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair second splat)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test4
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain
+define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
+; CHECK: @test5
+; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	%X1 = fsub <2 x float> %A1, %B1
+	%X2 = fsub <2 x float> %A2, %B2
+; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1
+	%Y1 = fmul <2 x float> %X1, %A1
+	%Y2 = fmul <2 x float> %X2, %A2
+; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0
+	%Z1 = fadd <2 x float> %Y1, %B1
+	%Z2 = fadd <2 x float> %Y2, %B2
+; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1
+	%R  = fmul <2 x float> %Z1, %Z2
+; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2
+	ret <2 x float> %R
+; CHECK: ret <2 x float> %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+; CHECK: @test6
+; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%X1 = sub <8 x i8> %A1, %B1
+	%X2 = sub <8 x i8> %A2, %B2
+; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1
+	%Y1 = mul <8 x i8> %X1, %A1
+	%Y2 = mul <8 x i8> %X2, %A2
+; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0
+	%Z1 = add <8 x i8> %Y1, %B1
+	%Z2 = add <8 x i8> %Y2, %B2
+; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
+        %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+        %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15>
+; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
+	%R  = mul <8 x i8> %Q1, %Q2
+; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
+	ret <8 x i8> %R
+; CHECK: ret <8 x i8> %R
+}
+
+
diff --git a/test/Transforms/BlockPlacement/dg.exp b/test/Transforms/BlockPlacement/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/BlockPlacement/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BlockPlacement/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/dg.exp b/test/Transforms/CodeExtractor/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/CodeExtractor/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/CodeExtractor/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/dg.exp b/test/Transforms/CodeGenPrepare/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CodeGenPrepare/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index d0d0a5b..09e6e7d 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
 ; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 define i32 @test1(i1 %B) {
@@ -40,3 +43,11 @@ define i1 @TNAN() {
   %C = or i1 %A, %B
   ret i1 %C
 }
+
+define i128 @vector_to_int_cast() {
+  %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+  ret i128 %A
+; CHECK: @vector_to_int_cast
+; CHECK: ret i128 85070591750041656499021422275829170176
+}
+  
+\ No newline at end of file
diff --git a/test/Transforms/ConstProp/dg.exp b/test/Transforms/ConstProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstantMerge/dg.exp b/test/Transforms/ConstantMerge/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstantMerge/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll
new file mode 100644
index 0000000..eba7880
--- /dev/null
+++ b/test/Transforms/ConstantMerge/linker-private.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstantMerge/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CorrelatedValuePropagation/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
new file mode 100644
index 0000000..9b70ed2
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -0,0 +1,43 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare i32 @foo()
+
+define i32 @test1(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test1
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
+
+define i32 @test2(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp ugt i32 %a, 15
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test2
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
diff --git a/test/Transforms/DeadArgElim/dg.exp b/test/Transforms/DeadArgElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadArgElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadArgElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/dg.exp b/test/Transforms/DeadStoreElimination/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadStoreElimination/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
new file mode 100644
index 0000000..284fea4
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -0,0 +1,25 @@
+; RUN: opt  %s -dse -disable-output
+; test that we don't crash
+declare void @bar()
+
+define void @foo() {
+bb1:
+  %memtmp3.i = alloca [21 x i8], align 1
+  %0 = getelementptr inbounds [21 x i8]* %memtmp3.i, i64 0, i64 0
+  br label %bb3
+
+bb2:
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb3
+
+bb3:
+  call void @bar()
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb4
+
+bb4:
+  ret void
+
+}
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/EarlyCSE/dg.exp b/test/Transforms/EarlyCSE/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/EarlyCSE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/EarlyCSE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/dg.exp b/test/Transforms/FunctionAttrs/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/FunctionAttrs/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index e2bab19..3027acd 100644
--- a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -1,21 +1,24 @@
-; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
-; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
+; RUN: opt < %s -functionattrs -S | FileCheck %s
 @g = global i32* null		; <i32**> [#uses=1]
 
+; CHECK: define i32* @c1(i32* %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
 
+; CHECK: define void @c2(i32* %q)
 define void @c2(i32* %q) {
 	store i32* %q, i32** @g
 	ret void
 }
 
+; CHECK: define void @c3(i32* %q)
 define void @c3(i32* %q) {
 	call void @c2(i32* %q)
 	ret void
 }
 
+; CHECK: define i1 @c4(i32* %q, i32 %bitno)
 define i1 @c4(i32* %q, i32 %bitno) {
 	%tmp = ptrtoint i32* %q to i32
 	%tmp2 = lshr i32 %tmp, %bitno
@@ -29,6 +32,7 @@ l1:
 
 @lookup_table = global [2 x i1] [ i1 0, i1 1 ]
 
+; CHECK: define i1 @c5(i32* %q, i32 %bitno)
 define i1 @c5(i32* %q, i32 %bitno) {
 	%tmp = ptrtoint i32* %q to i32
 	%tmp2 = lshr i32 %tmp, %bitno
@@ -40,6 +44,8 @@ define i1 @c5(i32* %q, i32 %bitno) {
 }
 
 declare void @throw_if_bit_set(i8*, i8) readonly
+
+; CHECK: define i1 @c6(i8* %q, i8 %bit)
 define i1 @c6(i8* %q, i8 %bit) {
 	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
 		to label %ret0 unwind label %ret1
@@ -61,6 +67,7 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
 	ret i1* %lookup
 }
 
+; CHECK: define i1 @c7(i32* %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
 	%val = load i1* %ptr
@@ -68,6 +75,7 @@ define i1 @c7(i32* %q, i32 %bitno) {
 }
 
 
+; CHECK: define i32 @nc1(i32* %q, i32* nocapture %p, i1 %b)
 define i32 @nc1(i32* %q, i32* %p, i1 %b) {
 e:
 	br label %l
@@ -82,24 +90,89 @@ l:
 	ret i32 %val
 }
 
+; CHECK: define void @nc2(i32* nocapture %p, i32* %q)
 define void @nc2(i32* %p, i32* %q) {
 	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
 	ret void
 }
 
+; CHECK: define void @nc3(void ()* nocapture %p)
 define void @nc3(void ()* %p) {
 	call void %p()
 	ret void
 }
 
 declare void @external(i8*) readonly nounwind
+; CHECK: define void @nc4(i8* nocapture %p)
 define void @nc4(i8* %p) {
 	call void @external(i8* %p)
 	ret void
 }
 
+; CHECK: define void @nc5(void (i8*)* nocapture %f, i8* nocapture %p)
 define void @nc5(void (i8*)* %f, i8* %p) {
 	call void %f(i8* %p) readonly nounwind
 	call void %f(i8* nocapture %p)
 	ret void
 }
+
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* %y1_1)
+define void @test1_1(i8* %x1_1, i8* %y1_1) {
+  call i8* @test1_2(i8* %x1_1, i8* %y1_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* %y1_2)
+define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
+  call void @test1_1(i8* %x1_2, i8* %y1_2)
+  store i32* null, i32** @g
+  ret i8* %y1_2
+}
+
+; CHECK: define void @test2(i8* nocapture %x2)
+define void @test2(i8* %x2) {
+  call void @test2(i8* %x2)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3)
+define void @test3(i8* %x3, i8* %y3, i8* %z3) {
+  call void @test3(i8* %z3, i8* %y3, i8* %x3)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test4_1(i8* %x4_1)
+define void @test4_1(i8* %x4_1) {
+  call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test4_2(i8* nocapture %x4_2, i8* %y4_2, i8* nocapture %z4_2)
+define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
+  call void @test4_1(i8* null)
+  store i32* null, i32** @g
+  ret i8* %y4_2
+}
+
+declare i8* @test5_1(i8* %x5_1)
+
+; CHECK: define void @test5_2(i8* %x5_2)
+define void @test5_2(i8* %x5_2) {
+  call i8* @test5_1(i8* %x5_2)
+  store i32* null, i32** @g
+  ret void
+}
+
+declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...)
+
+; CHECK: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2)
+define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
+  call void (i8*, i8*, ...)* @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
+  store i32* null, i32** @g
+  ret void
+}
+
diff --git a/test/Transforms/GVN/commute.ll b/test/Transforms/GVN/commute.ll
new file mode 100644
index 0000000..cf4fb7f
--- /dev/null
+++ b/test/Transforms/GVN/commute.ll
@@ -0,0 +1,23 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare void @use(i32, i32)
+
+define void @foo(i32 %x, i32 %y) {
+  ; CHECK: @foo
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %y, %x
+  call void @use(i32 %add1, i32 %add2)
+  ; CHECK: @use(i32 %add1, i32 %add1)
+  ret void
+}
+
+declare void @vse(i1, i1)
+
+define void @bar(i32 %x, i32 %y) {
+  ; CHECK: @bar
+  %cmp1 = icmp ult i32 %x, %y
+  %cmp2 = icmp ugt i32 %y, %x
+  call void @vse(i1 %cmp1, i1 %cmp2)
+  ; CHECK: @vse(i1 %cmp1, i1 %cmp1)
+  ret void
+}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 0b31b01..9c28955 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -55,25 +55,6 @@ return:		; preds = %bb8
 }
 
 declare void @foo(i1)
-
-; CHECK: @test2
-define void @test2(i1 %x, i1 %y) {
-  %z = or i1 %x, %y
-  br i1 %z, label %true, label %false
-true:
-; CHECK: true:
-  %z2 = or i1 %x, %y
-  call void @foo(i1 %z2)
-; CHECK: call void @foo(i1 true)
-  br label %true
-false:
-; CHECK: false:
-  %z3 = or i1 %x, %y
-  call void @foo(i1 %z3)
-; CHECK: call void @foo(i1 false)
-  br label %false
-}
-
 declare void @bar(i32)
 
 ; CHECK: @test3
@@ -130,3 +111,141 @@ case3:
 ; CHECK: call void @bar(i32 %x)
   ret void
 }
+
+; CHECK: @test5
+define i1 @test5(i32 %x, i32 %y) {
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp ne i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp eq i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test6
+define i1 @test6(i32 %x, i32 %y) {
+  %cmp2 = icmp ne i32 %x, %y
+  %cmp = icmp eq i32 %x, %y
+  %cmp3 = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test7
+define i1 @test7(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp sle i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp sgt i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test8
+define i1 @test8(i32 %x, i32 %y) {
+  %cmp2 = icmp sle i32 %x, %y
+  %cmp = icmp sgt i32 %x, %y
+  %cmp3 = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; PR1768
+; CHECK: @test9
+define i32 @test9(i32 %i, i32 %j) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+; PR1768
+; CHECK: @test10
+define i32 @test10(i32 %j, i32 %i) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+declare i32 @yogibar()
+
+; CHECK: @test11
+define i32 @test11(i32 %x) {
+  %v0 = call i32 @yogibar()
+  %v1 = call i32 @yogibar()
+  %cmp = icmp eq i32 %v0, %v1
+  br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+  ret i32 %v1
+; CHECK: ret i32 %v0
+
+next:
+  %cmp2 = icmp eq i32 %x, %v0
+  br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+  ret i32 %v0
+; CHECK: ret i32 %x
+
+next2:
+  ret i32 0
+}
+
+; CHECK: @test12
+define i32 @test12(i32 %x) {
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+  br label %ret
+
+cond_false:
+  br label %ret
+
+ret:
+  %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ]
+  ret i32 %res
+}
diff --git a/test/Transforms/GVN/dg.exp b/test/Transforms/GVN/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GVN/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GVN/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalDCE/dg.exp b/test/Transforms/GlobalDCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalDCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalDCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
new file mode 100644
index 0000000..4c3f439
--- /dev/null
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -0,0 +1,10 @@
+; RUN: opt -globalopt < %s -S -o - | FileCheck %s
+
+@GV1 = internal global i64 1
+; CHECK: @GV1 = internal unnamed_addr constant i64 1
+
+define void @test1() {
+entry:
+  %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 834bd00..af8fa32 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -6,3 +6,46 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: @A = global i1 false
 @A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
+
+; PR11352
+
+@xs = global [2 x i32] zeroinitializer, align 4
+; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
+
+define internal void @test1() {
+entry:
+  store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
+  %0 = load i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
+  store i32 %0, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 1)
+  ret void
+}
+
+; PR12060
+
+%closure = type { i32 }
+
+@f = internal global %closure zeroinitializer, align 4
+@m = global i32 0, align 4
+; CHECK-NOT: @f
+; CHECK: @m = global i32 13
+
+define internal i32 @test2_helper(%closure* %this, i32 %b) {
+entry:
+  %0 = getelementptr inbounds %closure* %this, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  %add = add nsw i32 %1, %b
+  ret i32 %add
+}
+
+define internal void @test2() {
+entry:
+  store i32 4, i32* getelementptr inbounds (%closure* @f, i32 0, i32 0)
+  %call = call i32 @test2_helper(%closure* @f, i32 9)
+  store i32 %call, i32* @m, align 4
+  ret void
+}
+
+@llvm.global_ctors = appending constant
+  [2 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 65535, void ()* @test1 },
+   { i32, void ()* } { i32 65535, void ()* @test2 }]
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index 204f979..e3bc473 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -4,20 +4,31 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %0 = type { i32, void ()* }
 %struct.foo = type { i32* }
+%struct.bar = type { i128 }
 
 @G = global i32 0, align 4
 @H = global i32 0, align 4
 @X = global %struct.foo zeroinitializer, align 8
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+@X2 = global %struct.bar zeroinitializer, align 8
+@llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @init1 }, %0 { i32 65535, void ()* @init2 }]
 
 ; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
 ; arbitrary constant expression, the code generator can't handle it.
-define internal void @init() {
+define internal void @init1() {
 entry:
   %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
   store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
   ret void
 }
-
-; CHECK: @init
+; CHECK: @init1
 ; CHECK: store i32*
+
+; PR11705 - ptrtoint isn't safe in general in global initializers.
+define internal void @init2() {
+entry:
+  %tmp = getelementptr inbounds %struct.bar* @X2, i32 0, i32 0
+  store i128 ptrtoint (i32* @G to i128), i128* %tmp, align 16
+  ret void
+}
+; CHECK: @init2
+; CHECK: store i128
diff --git a/test/Transforms/GlobalOpt/cxx-dtor.ll b/test/Transforms/GlobalOpt/cxx-dtor.ll
index 2263562..7c6ae78 100644
--- a/test/Transforms/GlobalOpt/cxx-dtor.ll
+++ b/test/Transforms/GlobalOpt/cxx-dtor.ll
@@ -2,6 +2,7 @@
 
 %0 = type { i32, void ()* }
 %struct.A = type { i8 }
+%struct.B = type { }
 
 @a = global %struct.A zeroinitializer, align 1
 @__dso_handle = external global i8*
@@ -15,13 +16,14 @@ define internal void @__cxx_global_var_init() nounwind section "__TEXT,__StaticI
 }
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind align 2 {
-  call void @_ZN1AD2Ev(%struct.A* %this)
+  %t = bitcast %struct.A* %this to %struct.B*
+  call void @_ZN1BD1Ev(%struct.B* %t)
   ret void
 }
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
-define linkonce_odr void @_ZN1AD2Ev(%struct.A* %this) nounwind align 2 {
+define linkonce_odr void @_ZN1BD1Ev(%struct.B* %this) nounwind align 2 {
   ret void
 }
 
diff --git a/test/Transforms/GlobalOpt/dg.exp b/test/Transforms/GlobalOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalOpt/invariant.ll b/test/Transforms/GlobalOpt/invariant.ll
new file mode 100644
index 0000000..6b99193
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.ll
@@ -0,0 +1,59 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr)
+
+define void @test1(i8* %ptr) {
+  call {}* @llvm.invariant.start(i64 4, i8* %ptr)
+  ret void
+}
+
+@object1 = global i32 0
+; CHECK: @object1 = constant i32 -1
+define void @ctor1() {
+  store i32 -1, i32* @object1
+  %A = bitcast i32* @object1 to i8*
+  call void @test1(i8* %A)
+  ret void
+}
+
+
+@object2 = global i32 0
+; CHECK: @object2 = global i32 0
+define void @ctor2() {
+  store i32 -1, i32* @object2
+  %A = bitcast i32* @object2 to i8*
+  %B = call {}* @llvm.invariant.start(i64 4, i8* %A)
+  %C = bitcast {}* %B to i8*
+  ret void
+}
+
+
+@object3 = global i32 0
+; CHECK: @object3 = global i32 -1
+define void @ctor3() {
+  store i32 -1, i32* @object3
+  %A = bitcast i32* @object3 to i8*
+  call {}* @llvm.invariant.start(i64 3, i8* %A)
+  ret void
+}
+
+
+@object4 = global i32 0
+; CHECK: @object4 = global i32 -1
+define void @ctor4() {
+  store i32 -1, i32* @object4
+  %A = bitcast i32* @object4 to i8*
+  call {}* @llvm.invariant.start(i64 -1, i8* %A)
+  ret void
+}
+
+
+@llvm.global_ctors = appending constant
+  [4 x { i32, void ()* }]
+  [ { i32, void ()* } { i32 65535, void ()* @ctor1 },
+    { i32, void ()* } { i32 65535, void ()* @ctor2 },
+    { i32, void ()* } { i32 65535, void ()* @ctor3 },
+    { i32, void ()* } { i32 65535, void ()* @ctor4 } ]
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IPConstantProp/dg.exp b/test/Transforms/IPConstantProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IPConstantProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IPConstantProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/dg.exp b/test/Transforms/IndVarSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IndVarSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 9c2abd0..23fdc87 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -333,9 +333,9 @@ entry:
 
 ; CHECK: loop:
 ; CHECK: phi %structIF*
-; CHECK: phi i32*
-; CHECK: getelementptr inbounds
+; CHECK-NOT: phi
 ; CHECK: getelementptr inbounds
+; CHECK-NOT: getelementptr
 ; CHECK: exit:
 loop:
   %ptr.iv = phi %structIF* [ %ptr.inc, %latch ], [ %base, %entry ]
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
new file mode 100644
index 0000000..91ab40a
--- /dev/null
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -0,0 +1,83 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+
+@glbl = external global i32
+
+define void @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call void @inner1
+  %ptr = alloca i32
+  call void @inner1(i32* %ptr)
+  ret void
+}
+
+define void @inner1(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr i32* %ptr, i32 0
+  %D = getelementptr i32* %ptr, i32 1
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer2() {
+; CHECK: @outer2
+; CHECK: call void @inner2
+  %ptr = alloca i32
+  call void @inner2(i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction.
+define void @inner2(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr i32* %ptr, i32 0
+  %D = getelementptr i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer3() {
+; CHECK: @outer3
+; CHECK-NOT: call void @inner3
+  %ptr = alloca i32
+  call void @inner3(i32* %ptr, i1 undef)
+  ret void
+}
+
+define void @inner3(i32 *%ptr, i1 %x) {
+  %A = icmp eq i32* %ptr, null
+  %B = and i1 %x, %A
+  br i1 %A, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
diff --git a/test/Transforms/Inline/dg.exp b/test/Transforms/Inline/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Inline/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 462c29a..1f34113 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -23,15 +23,11 @@ invcont:
 	ret i32 %retval
 
 lpad:
-	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
new file mode 100644
index 0000000..ab2e954
--- /dev/null
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Check that functions with "returns_twice" calls are only inlined,
+; if they are themselve marked as such.
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @f() {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @g() {
+entry:
+; CHECK: define i32 @g
+; CHECK: call i32 @f()
+; CHECK-NOT: call i32 @a()
+  %call = call i32 @f()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @h() returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @i() {
+entry:
+; CHECK: define i32 @i
+; CHECK: call i32 @b()
+; CHECK-NOT: call i32 @h()
+  %call = call i32 @h() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Inline/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
new file mode 100644
index 0000000..abab9dc
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%opaque_struct = type opaque
+
+@G = external global [0 x %opaque_struct]
+
+declare void @foo(%opaque_struct*)
+
+define void @bar() {
+  call void @foo(%opaque_struct* bitcast ([0 x %opaque_struct]* @G to %opaque_struct*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
new file mode 100644
index 0000000..39b0594
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; Radar 10803727
+@.str = private unnamed_addr constant [35 x i8] c"\0Ain_range input (should be 0): %f\0A\00", align 1
+@.str1 = external hidden unnamed_addr constant [35 x i8], align 1
+
+declare i32 @printf(i8*, ...)
+define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %conv = uitofp i32 %val to double
+  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %cmp.i = fcmp oge double %conv, -1.000000e+00
+  br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
+; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
+
+land.rhs.i:                                       ; preds = %entry
+  %cmp1.i = fcmp olt double %conv, 1.000000e+00
+  br i1 %cmp1.i, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.rhs.i
+  %add = fadd double %conv, 5.000000e-01
+  %conv3 = fptosi double %add to i64
+  br label %return
+
+if.end.critedge:                                  ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.critedge, %land.rhs.i
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %conv3, %if.then ], [ -1, %if.end ]
+  ret i64 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
new file mode 100644
index 0000000..82cf85f
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10803154>
+
+; There should be no transformation.
+; CHECK: %a = trunc i32 %x to i8
+; CHECK: %b = icmp ne i8 %a, 0
+; CHECK: %c = and i32 %x, 16711680
+; CHECK: %d = icmp ne i32 %c, 0
+; CHECK: %e = and i1 %b, %d
+; CHECK: ret i1 %e
+
+define i1 @f1(i32 %x) {
+  %a = trunc i32 %x to i8
+  %b = icmp ne i8 %a, 0
+  %c = and i32 %x, 16711680
+  %d = icmp ne i32 %c, 0
+  %e = and i1 %b, %d
+  ret i1 %e
+}
diff --git a/test/Transforms/InstCombine/dg.exp b/test/Transforms/InstCombine/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstCombine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 016e8c5..a9ae221 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -121,8 +121,8 @@ define i1 @test12(i1 %A) {
   %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
   ret i1 %B
 ; CHECK: @test12
-; CHECK-NEXT: %B = select i1
-; CHECK-NEXT: ret i1 %B
+; CHECK-NEXT: = xor i1 %A, true
+; CHECK-NEXT: ret i1
 }
 
 ; PR6481
@@ -524,7 +524,7 @@ define i1 @test53(i32 %a, i32 %b) nounwind {
 
 ; CHECK: @test54
 ; CHECK-NEXT: %and = and i8 %a, -64
-; CHECK-NEXT icmp eq i8 %and, -128
+; CHECK-NEXT: icmp eq i8 %and, -128
 define i1 @test54(i8 %a) nounwind {
   %ext = zext i8 %a to i32
   %and = and i32 %ext, 192
@@ -580,3 +580,60 @@ define zeroext i1 @cmpabs2(i64 %val) {
   %tobool = icmp ne i64 %sub.val, 0
   ret i1 %tobool
 }
+
+; CHECK: @test58
+; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
+define void @test58() nounwind {
+  %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
+  %call = call i32 @test58_d( i64 %cast) nounwind
+  ret void
+}
+declare i32 @test58_d(i64)
+
+define i1 @test59(i8* %foo) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59
+; CHECK: ret i1 true
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32* %bit, i64 %i
+  %gep2 = getelementptr  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61
+; CHECK: icmp ult i8* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test62(i8* %a) {
+  %arrayidx1 = getelementptr inbounds i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+; CHECK: @test62
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index e31bd7d..382e6b3 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -181,10 +181,10 @@ entry:
 
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
 entry:
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true) nounwind readnone
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
   store volatile i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 true) nounwind readnone
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
   %tz.cmp = icmp ne i32 %tz, 32
   store volatile i1 %tz.cmp, i1* %c
   %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
@@ -201,16 +201,22 @@ entry:
 ; CHECK-NEXT: store volatile i1 %pop.cmp, i1* %c
 }
 
-
-define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)    ; <i32> [#uses=1]
-  %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
+define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
-  
-; CHECK: @cttz_simplify1
+
+; CHECK: @cttz_simplify1a
 ; CHECK: icmp eq i32 %x, 0
-; CHECK-NEXT: zext i1 
+; CHECK-NEXT: zext i1
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %shr3 = lshr i32 %tmp1, 5
+  ret i32 %shr3
 
+; CHECK: @cttz_simplify1b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstCombine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 132d51a..52310e3 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -542,3 +542,75 @@ define i32 @test45(i32 %a) nounwind {
 ; CHECK-NEXT: %y = lshr i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
+
+define i32 @test46(i32 %a) {
+  %y = ashr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test46
+; CHECK-NEXT: %z = ashr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test47(i32 %a) {
+  %y = lshr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test47
+; CHECK-NEXT: %z = lshr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test48(i32 %x) {
+  %A = lshr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test48
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test49(i32 %x) {
+  %A = ashr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test49
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test50(i32 %x) {
+  %A = shl nsw i32 %x, 1
+  %B = ashr i32 %A, 3
+  ret i32 %B
+; CHECK: @test50
+; CHECK-NEXT: %B = ashr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test51(i32 %x) {
+  %A = shl nuw i32 %x, 1
+  %B = lshr i32 %A, 3
+  ret i32 %B
+; CHECK: @test51
+; CHECK-NEXT: %B = lshr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test52(i32 %x) {
+  %A = shl nsw i32 %x, 3
+  %B = ashr i32 %A, 1
+  ret i32 %B
+; CHECK: @test52
+; CHECK-NEXT: %B = shl nsw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test53(i32 %x) {
+  %A = shl nuw i32 %x, 3
+  %B = lshr i32 %A, 1
+  ret i32 %B
+; CHECK: @test53
+; CHECK-NEXT: %B = shl nuw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index 47f5f30..a6066d8 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -77,3 +77,103 @@ if.then:
 if.end:
   ret void
 }
+
+define void @test5(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp eq i32 %and, 0
+  %2 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test5
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test6(i32 %a) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %and = and i32 %a, 134217728
+  %2 = icmp eq i32 %and, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test6
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp ne i32 %and, 0
+  %2 = icmp slt i32 %a, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test7
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test8(i32 %a) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %and = and i32 %a, 134217728
+  %2 = icmp ne i32 %and, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test8
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test9(i32 %a) nounwind {
+  %1 = and i32 %a, 1073741824
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test9
+; CHECK-NEXT: %1 = and i32 %a, -1073741824
+; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
new file mode 100644
index 0000000..279e4ac
--- /dev/null
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = sub i32 63, %and
+  ret i32 %sub
+
+; CHECK: @test1
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: xor i32 %and, 63
+; CHECK-NEXT: ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+  %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+  %sub = sub i32 31, %count
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK-NEXT: ctlz
+; CHECK-NEXT: xor i32 %count, 31
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = xor i32 31, %and
+  %add = add i32 %sub, 42
+  ret i32 %add
+
+; CHECK: @test3
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 37de328..b71ec8c 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -301,3 +301,29 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
 }
+
+define i64 @test29(i8* %foo, i64 %i, i64 %j) {
+  %gep1 = getelementptr inbounds i8* %foo, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test29
+; CHECK-NEXT: sub i64 %i, %j
+; CHECK-NEXT: ret i64
+}
+
+define i64 @test30(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i32* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test30
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: sub i64 %gep1.idx, %j
+; CHECK-NEXT: ret i64
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 9f3dffe..e15bfaa 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -10,6 +10,99 @@ define i1 @ptrtoint() {
 ; CHECK: ret i1 false
 }
 
+define i1 @bitcast() {
+; CHECK: @bitcast
+  %a = alloca i32
+  %b = alloca i64
+  %x = bitcast i32* %a to i8*
+  %y = bitcast i64* %b to i8*
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep() {
+; CHECK: @gep
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep2() {
+; CHECK: @gep2
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %y = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+; PR11238
+%gept = type { i32, i32 }
+@gepy = global %gept zeroinitializer, align 8
+@gepz = extern_weak global %gept
+
+define i1 @gep3() {
+; CHECK: @gep3
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep4() {
+; CHECK: @gep4
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* @gepy, i64 0, i32 0
+  %b = getelementptr %gept* @gepy, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep5() {
+; CHECK: @gep5
+  %x = alloca %gept, align 8
+  %a = getelementptr inbounds %gept* %x, i64 0, i32 1
+  %b = getelementptr %gept* @gepy, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep6(%gept* %x) {
+; Same as @gep3 but potentially null.
+; CHECK: @gep6
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep7(%gept* %x) {
+; CHECK: @gep7
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* @gepz, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep8(%gept* %x) {
+; CHECK: @gep8
+  %a = getelementptr %gept* %x, i32 1
+  %b = getelementptr %gept* %x, i32 -1
+  %equal = icmp ugt %gept* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -406,3 +499,40 @@ define i1 @mul3(i32 %X, i32 %Y) {
   ret i1 %C
 ; CHECK: ret i1 true
 }
+
+define <2 x i1> @vectorselect1(<2 x i1> %cond) {
+; CHECK: @vectorselect1
+  %invert = xor <2 x i1> %cond, <i1 1, i1 1>
+  %s = select <2 x i1> %invert, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 1, i32 1>
+  %c = icmp ne <2 x i32> %s, <i32 0, i32 0>
+  ret <2 x i1> %c
+; CHECK: ret <2 x i1> %cond
+}
+
+; PR11948
+define <2 x i1> @vectorselectcrash(i32 %arg1) {
+  %tobool40 = icmp ne i32 %arg1, 0
+  %cond43 = select i1 %tobool40, <2 x i16> <i16 -5, i16 66>, <2 x i16> <i16 46, i16 1>
+  %cmp45 = icmp ugt <2 x i16> %cond43, <i16 73, i16 21>
+  ret <2 x i1> %cmp45
+}
+
+; PR12013
+define i1 @alloca_compare(i64 %idx) {
+  %sv = alloca { i32, i32, [124 x i32] }
+  %1 = getelementptr inbounds { i32, i32, [124 x i32] }* %sv, i32 0, i32 2, i64 %idx
+  %2 = icmp eq i32* %1, null
+  ret i1 %2
+  ; CHECK: alloca_compare
+  ; CHECK: ret i1 false
+}
+
+; PR12075
+define i1 @infinite_gep() {
+  ret i1 1
+
+unreachableblock:
+  %X = getelementptr i32 *%X, i32 1
+  %Y = icmp eq i32* %X, null
+  ret i1 %Y
+}
diff --git a/test/Transforms/InstSimplify/dg.exp b/test/Transforms/InstSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Internalize/dg.exp b/test/Transforms/Internalize/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Internalize/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Internalize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/dg.exp b/test/Transforms/JumpThreading/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/JumpThreading/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/JumpThreading/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LCSSA/dg.exp b/test/Transforms/LCSSA/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LCSSA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LCSSA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/dg.exp b/test/Transforms/LICM/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LICM/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LICM/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopDeletion/dg.exp b/test/Transforms/LoopDeletion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopDeletion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopDeletion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopIdiom/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopIdiom/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/alloca.ll b/test/Transforms/LoopRotate/alloca.ll
new file mode 100644
index 0000000..fd217ea
--- /dev/null
+++ b/test/Transforms/LoopRotate/alloca.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-rotate -S | FileCheck %s
+
+; Test alloca in -loop-rotate.
+
+; We expect a different value for %ptr each iteration (according to the
+; definition of alloca). I.e. each @use must be paired with an alloca.
+
+; CHECK: call void @use(i8* %
+; CHECK: %ptr = alloca i8
+
+@e = global i16 10
+
+declare void @use(i8*)
+
+define void @test() {
+entry:
+  %end = load i16* @e
+  br label %loop
+
+loop:
+  %n.phi = phi i16 [ %n, %loop.fin ], [ 0, %entry ]
+  %ptr = alloca i8
+  %cond = icmp eq i16 %n.phi, %end
+  br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+  %n = add i16 %n.phi, 1
+  call void @use(i8* %ptr)
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/dg.exp b/test/Transforms/LoopRotate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopRotate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopRotate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
new file mode 100644
index 0000000..f422724
--- /dev/null
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info | FileCheck %s
+; PR2624 unroll multiple exits
+
+@mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+; CHECK: @f
+; CHECK-NOT: bb4
+define i8 @f() {
+entry:
+	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb4, %entry
+	%mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]		; <i8> [#uses=4]
+	zext i8 %mode.0 to i32		; <i32>:1 [#uses=1]
+	getelementptr [4 x i32]* @mode_table, i32 0, i32 %1		; <i32*>:2 [#uses=1]
+	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	icmp eq i32 %3, %0		; <i1>:4 [#uses=1]
+	br i1 %4, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	ret i8 %mode.0
+
+bb2:		; preds = %bb
+	icmp eq i8 %mode.0, 1		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb5, label %bb4
+
+bb4:		; preds = %bb2
+	%indvar.next = add i8 %mode.0, 1		; <i8> [#uses=1]
+	br label %bb
+
+bb5:		; preds = %bb2
+	tail call void @raise_exception( ) noreturn
+	unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn
diff --git a/test/Transforms/LoopSimplify/dg.exp b/test/Transforms/LoopSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
new file mode 100644
index 0000000..392a8bc
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -0,0 +1,39 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11571: handle a postinc user outside of for.body7 that requires
+; recursive expansion of a quadratic recurrence within for.body7. LSR
+; needs to forget that for.body7 is a postinc loop during expansion.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@b = external global [121 x i32]
+
+; CHECK: @vb
+;   Outer recurrence:
+; CHECK: %lsr.iv1 = phi [121 x i32]*
+;   Inner recurrence:
+; CHECK: %lsr.iv = phi i32
+;   Outer step (relative to inner recurrence):
+; CHECK: %scevgep = getelementptr i1* %{{.*}}, i32 %lsr.iv
+;   Outer use:
+; CHECK: %lsr.iv3 = phi [121 x i32]* [ %lsr.iv1, %for.body43.preheader ]
+define void @vb() nounwind {
+for.cond.preheader:
+  br label %for.body7
+
+for.body7:
+  %indvars.iv77 = phi i32 [ %indvars.iv.next78, %for.body7 ], [ 1, %for.cond.preheader ]
+  %bf.072 = phi i32 [ %t1, %for.body7 ], [ 0, %for.cond.preheader ]
+  %t1 = add i32 %bf.072, %indvars.iv77
+  %indvars.iv.next78 = add i32 %indvars.iv77, 1
+  br i1 undef, label %for.body43, label %for.body7
+
+for.body43:
+  %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
+  %inc44 = add nsw i32 %bf.459, 1
+  %arrayidx45 = getelementptr inbounds [121 x i32]* @b, i32 0, i32 %bf.459
+  %t2 = load i32* %arrayidx45, align 4
+  br label %for.body43
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
new file mode 100644
index 0000000..d7f5723
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
@@ -0,0 +1,88 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10619599> "SelectionDAGBuilder shouldn't visit PHI nodes!" assert.
+; <rdar://10655343> SCEVExpander segfault on simple test case
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-f128:128:128-n8:16:32"
+target triple = "i386-apple-darwin"
+
+; LSR should convert the inner loop (bb7.us) IV (j.01.us) into float*.
+; This involves a nested AddRec, the outer AddRec's loop invariant components
+; cannot find a preheader, so they should be expanded in the loop header
+; (bb7.lr.ph.us) below the existing phi i.12.us.
+; Currently, LSR won't kick in on such loops.
+; CHECK: @nopreheader
+; CHECK: bb7.us:
+; CHECK-NOT: phi float*
+; CHECK: %j.01.us = phi i32
+; CHECK-NOT: phi float*
+define void @nopreheader(float* nocapture %a, i32 %n) nounwind {
+entry:
+  %0 = sdiv i32 %n, undef
+  indirectbr i8* undef, [label %bb10.preheader]
+
+bb10.preheader:                                   ; preds = %bb4
+  indirectbr i8* undef, [label %bb8.preheader.lr.ph, label %return]
+
+bb8.preheader.lr.ph:                              ; preds = %bb10.preheader
+  indirectbr i8* null, [label %bb7.lr.ph.us, label %bb9]
+
+bb7.lr.ph.us:                                     ; preds = %bb9.us, %bb8.preheader.lr.ph
+  %i.12.us = phi i32 [ %2, %bb9.us ], [ 0, %bb8.preheader.lr.ph ]
+  %tmp30 = mul i32 %0, %i.12.us
+  indirectbr i8* undef, [label %bb7.us]
+
+bb7.us:                                           ; preds = %bb7.lr.ph.us, %bb7.us
+  %j.01.us = phi i32 [ 0, %bb7.lr.ph.us ], [ %1, %bb7.us ]
+  %tmp31 = add i32 %tmp30, %j.01.us
+  %scevgep9 = getelementptr float* %a, i32 %tmp31
+  store float undef, float* %scevgep9, align 1
+  %1 = add nsw i32 %j.01.us, 1
+  indirectbr i8* undef, [label %bb9.us, label %bb7.us]
+
+bb9.us:                                           ; preds = %bb7.us
+  %2 = add nsw i32 %i.12.us, 1
+  indirectbr i8* undef, [label %bb7.lr.ph.us, label %return]
+
+bb9:                                              ; preds = %bb9, %bb8.preheader.lr.ph
+  indirectbr i8* undef, [label %bb9, label %return]
+
+return:                                           ; preds = %bb9, %bb9.us, %bb10.preheader
+  ret void
+}
+
+; In this case, SCEVExpander simply cannot materialize the AddRecExpr
+; that LSR picks. We must detect that %bb8.preheader does not have a
+; preheader and avoid performing LSR on %bb7.
+; CHECK: @nopreheader2
+; CHECK: bb7:
+; CHECK: %indvar = phi i32
+define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind {
+entry:
+  indirectbr i8* undef, [label %bb]
+
+bb:                                               ; preds = %bb, %entry
+  indirectbr i8* undef, [label %bb3, label %bb]
+
+bb3:                                              ; preds = %bb3, %bb
+  indirectbr i8* undef, [label %bb8.preheader, label %bb3]
+
+bb8.preheader:                                    ; preds = %bb9, %bb3
+  %indvar5 = phi i32 [ %indvar.next6, %bb9 ], [ 0, %bb3 ]
+  %tmp26 = add i32 %indvar5, 13
+  indirectbr i8* null, [label %bb7]
+
+bb7:                                              ; preds = %bb8.preheader, %bb7
+  %indvar = phi i32 [ 0, %bb8.preheader ], [ %indvar.next, %bb7 ]
+  %scevgep = getelementptr [200 x i32]* %Array2, i32 %tmp26, i32 %indvar
+  store i32 undef, i32* %scevgep, align 4
+  %indvar.next = add i32 %indvar, 1
+  indirectbr i8* undef, [label %bb9, label %bb7]
+
+bb9:                                              ; preds = %bb7
+  %indvar.next6 = add i32 %indvar5, 1
+  indirectbr i8* undef, [label %return, label %bb8.preheader]
+
+return:                                           ; preds = %bb9
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
new file mode 100644
index 0000000..3036a7e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
@@ -0,0 +1,113 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10701050> "Cannot split an edge from an IndirectBrInst" assert.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; while.cond197 is a dominates the simplified loop while.cond238 but
+; has no with no preheader.
+;
+; CHECK: @nopreheader
+; CHECK: %while.cond238
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: indirectbr
+define void @nopreheader(i8* %end) nounwind {
+entry:
+  br label %while.cond179
+
+while.cond179:                                    ; preds = %if.end434, %if.end369, %if.end277, %if.end165
+  %s.1 = phi i8* [ undef, %if.end434 ], [ %incdec.ptr356, %if.end348 ], [ undef, %entry ]
+  indirectbr i8* undef, [label %land.rhs184, label %while.end453]
+
+land.rhs184:                                      ; preds = %while.cond179
+  indirectbr i8* undef, [label %while.end453, label %while.cond197]
+
+while.cond197:                                    ; preds = %land.rhs202, %land.rhs184
+  %0 = phi i64 [ %indvar.next11, %land.rhs202 ], [ 0, %land.rhs184 ]
+  indirectbr i8* undef, [label %land.rhs202, label %while.end215]
+
+land.rhs202:                                      ; preds = %while.cond197
+  %indvar.next11 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.end215, label %while.cond197]
+
+while.end215:                                     ; preds = %land.rhs202, %while.cond197
+  indirectbr i8* undef, [label %PREMATURE, label %if.end221]
+
+if.end221:                                        ; preds = %while.end215
+  indirectbr i8* undef, [label %while.cond238.preheader, label %lor.lhs.false227]
+
+lor.lhs.false227:                                 ; preds = %if.end221
+  indirectbr i8* undef, [label %while.cond238.preheader, label %if.else]
+
+while.cond238.preheader:                          ; preds = %lor.lhs.false227, %if.end221
+  %tmp16 = add i64 %0, 2
+  indirectbr i8* undef, [label %while.cond238]
+
+while.cond238:                                    ; preds = %land.rhs243, %while.cond238.preheader
+  %1 = phi i64 [ %indvar.next15, %land.rhs243 ], [ 0, %while.cond238.preheader ]
+  %tmp36 = add i64 %tmp16, %1
+  %s.3 = getelementptr i8* %s.1, i64 %tmp36
+  %cmp241 = icmp ult i8* %s.3, %end
+  indirectbr i8* undef, [label %land.rhs243, label %while.end256]
+
+land.rhs243:                                      ; preds = %while.cond238
+  %indvar.next15 = add i64 %1, 1
+  indirectbr i8* undef, [label %while.end256, label %while.cond238]
+
+while.end256:                                     ; preds = %land.rhs243, %while.cond238
+  indirectbr i8* undef, [label %PREMATURE]
+
+if.else:                                          ; preds = %lor.lhs.false227
+  indirectbr i8* undef, [label %if.then297, label %if.else386]
+
+if.then297:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %PREMATURE, label %if.end307]
+
+if.end307:                                        ; preds = %if.then297
+  indirectbr i8* undef, [label %if.end314, label %FAIL]
+
+if.end314:                                        ; preds = %if.end307
+  indirectbr i8* undef, [label %if.end340]
+
+if.end340:                                        ; preds = %while.end334
+  indirectbr i8* undef, [label %PREMATURE, label %if.end348]
+
+if.end348:                                        ; preds = %if.end340
+  %incdec.ptr356 = getelementptr inbounds i8* undef, i64 2
+  indirectbr i8* undef, [label %while.cond179]
+
+if.else386:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %while.end453, label %if.end434]
+
+if.end434:                                        ; preds = %if.then428, %if.end421
+  indirectbr i8* undef, [label %while.cond179]
+
+while.end453:                                     ; preds = %if.else386, %land.rhs184, %while.cond179
+  indirectbr i8* undef, [label %PREMATURE, label %if.end459]
+
+if.end459:                                        ; preds = %while.end453
+  indirectbr i8* undef, [label %if.then465, label %FAIL]
+
+if.then465:                                       ; preds = %if.end459
+  indirectbr i8* undef, [label %return, label %if.then479]
+
+if.then479:                                       ; preds = %if.then465
+  indirectbr i8* undef, [label %return]
+
+FAIL:                                             ; preds = %if.end459, %if.end307, %land.lhs.true142, %land.lhs.true131, %while.end
+  indirectbr i8* undef, [label %DECL_FAIL]
+
+PREMATURE:                                        ; preds = %while.end453, %while.end415, %if.end340, %while.end334, %if.then297, %while.end256, %while.end215
+  indirectbr i8* undef, [label %return, label %if.then495]
+
+if.then495:                                       ; preds = %PREMATURE
+  indirectbr i8* undef, [label %return]
+
+DECL_FAIL:                                        ; preds = %if.then488, %FAIL, %land.lhs.true99, %lor.lhs.false, %if.end83, %if.then39, %if.end
+  indirectbr i8* undef, [label %return]
+
+return:                                           ; preds = %if.then512, %if.end504, %DECL_FAIL, %if.then495, %PREMATURE, %if.then479, %if.then465, %if.then69, %if.end52, %if.end19, %if.then
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
new file mode 100644
index 0000000..9189d79
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; A9: @simple
+; no expensive address computation in the preheader
+; A9: lsl
+; A9-NOT: lsl
+; A9: %loop
+; no complex address modes
+; A9-NOT: lsl
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; A9: @user
+; stride multiples computed in the preheader
+; A9: lsl
+; A9: lsl
+; A9: %loop
+; complex address modes
+; A9: lsl
+; A9: lsl
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; A9: extrastride:
+; no spills
+; A9-NOT: str
+; only one stride multiple in the preheader
+; A9: lsl
+; A9-NOT: {{str r|lsl}}
+; A9: %for.body{{$}}
+; no complex address modes or reloads
+; A9-NOT: {{ldr .*[sp]|lsl}}
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; A9: foldedidx:
+; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @testNeon is an important example of the nead for ivchains.
+;
+; Currently we have three extra add.w's that keep the store address
+; live past the next increment because ISEL is unfortunately undoing
+; the store chain. ISEL also fails to convert the stores to
+; post-increment addressing. However, the loads should use
+; post-increment addressing, no add's or add.w's beyond the three
+; mentioned. Most importantly, there should be no spills or reloads!
+;
+; CHECK: testNeon:
+; CHECK: %.lr.ph
+; CHECK-NOT: lsl.w
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK-NOT: add.w r
+; CHECK: bne
+define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
+  %1 = icmp sgt i32 %limit, 0
+  br i1 %1, label %.lr.ph, label %45
+
+.lr.ph:                                           ; preds = %0
+  %2 = shl nsw i32 %ref_stride, 1
+  %3 = mul nsw i32 %ref_stride, 3
+  %4 = shl nsw i32 %ref_stride, 2
+  %5 = mul nsw i32 %ref_stride, 5
+  %6 = mul nsw i32 %ref_stride, 6
+  %7 = mul nsw i32 %ref_stride, 7
+  %8 = shl nsw i32 %ref_stride, 3
+  %9 = sub i32 0, %8
+  %10 = mul i32 %limit, -64
+  br label %11
+
+; <label>:11                                      ; preds = %11, %.lr.ph
+  %.05 = phi i8* [ %ref_data, %.lr.ph ], [ %42, %11 ]
+  %counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
+  %result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
+  %.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
+  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+  %13 = getelementptr inbounds i8* %.05, i32 %ref_stride
+  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+  %15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
+  %16 = bitcast <2 x i64> %15 to <16 x i8>
+  %17 = getelementptr inbounds <16 x i8>* %.012, i32 1
+  store <16 x i8> %16, <16 x i8>* %.012, align 4
+  %18 = getelementptr inbounds i8* %.05, i32 %2
+  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+  %20 = getelementptr inbounds i8* %.05, i32 %3
+  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+  %22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
+  %23 = bitcast <2 x i64> %22 to <16 x i8>
+  %24 = getelementptr inbounds <16 x i8>* %.012, i32 2
+  store <16 x i8> %23, <16 x i8>* %17, align 4
+  %25 = getelementptr inbounds i8* %.05, i32 %4
+  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+  %27 = getelementptr inbounds i8* %.05, i32 %5
+  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+  %29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
+  %30 = bitcast <2 x i64> %29 to <16 x i8>
+  %31 = getelementptr inbounds <16 x i8>* %.012, i32 3
+  store <16 x i8> %30, <16 x i8>* %24, align 4
+  %32 = getelementptr inbounds i8* %.05, i32 %6
+  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+  %34 = getelementptr inbounds i8* %.05, i32 %7
+  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+  %36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
+  %37 = bitcast <2 x i64> %36 to <16 x i8>
+  store <16 x i8> %37, <16 x i8>* %31, align 4
+  %38 = add <16 x i8> %16, %23
+  %39 = add <16 x i8> %38, %30
+  %40 = add <16 x i8> %39, %37
+  %41 = add <16 x i8> %result.03, %40
+  %42 = getelementptr i8* %.05, i32 %9
+  %43 = getelementptr inbounds <16 x i8>* %.012, i32 -64
+  %44 = add nsw i32 %counter.04, 1
+  %exitcond = icmp eq i32 %44, %limit
+  br i1 %exitcond, label %._crit_edge, label %11
+
+._crit_edge:                                      ; preds = %11
+  %scevgep = getelementptr <16 x i8>* %data, i32 %10
+  br label %45
+
+; <label>:45                                      ; preds = %._crit_edge, %0
+  %result.0.lcssa = phi <16 x i8> [ %41, %._crit_edge ], [ zeroinitializer, %0 ]
+  %.01.lcssa = phi <16 x i8>* [ %scevgep, %._crit_edge ], [ %data, %0 ]
+  store <16 x i8> %result.0.lcssa, <16 x i8>* %.01.lcssa, align 4
+  ret void
+}
+
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
new file mode 100644
index 0000000..d622529
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
new file mode 100644
index 0000000..2dcaab8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
+
+declare i1 @check() nounwind
+declare i1 @foo(i8*, i8*, i8*) nounwind
+
+; Check that redundant phi elimination ran
+; CHECK: @test
+; CHECK: %while.body.i
+; CHECK: movs
+; CHECK-NOT: movs
+; CHECK: %for.end.i
+define i32 @test(i8* %base) nounwind uwtable ssp {
+entry:
+  br label %while.body.lr.ph.i
+
+while.body.lr.ph.i:                               ; preds = %cond.true.i
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %cond.true29.i, %while.body.lr.ph.i
+  %indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
+  %i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
+  %sext.i = shl i64 %i.05.i, 32
+  %idx.ext.i = ashr exact i64 %sext.i, 32
+  %add.ptr.sum.i = add i64 %idx.ext.i, 16
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %while.body.i
+  %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
+  %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
+  %arrayidx22.i = getelementptr inbounds i8* %base, i64 %add.ptr.sum
+  %0 = load i8* %arrayidx22.i, align 1
+  %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+  %cmp = call i1 @check() nounwind
+  br i1 %cmp, label %for.end.i, label %for.body.i
+
+for.end.i:                                        ; preds = %for.body.i
+  %add.ptr.i144 = getelementptr inbounds i8* %base, i64 %add.ptr.sum.i
+  %cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
+  br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
+
+cond.true29.i:                                    ; preds = %for.end.i
+  %indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
+  br i1 false, label %exit, label %while.body.i
+
+cond.false35.i:                                   ; preds = %for.end.i
+  unreachable
+
+exit:                                 ; preds = %cond.true29.i, %cond.true.i
+  ret i32 0
+}
+
+%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
+
+@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
+
+; PR11782: SCEVExpander assert
+;
+; Test phi reuse after LSR that requires SCEVExpander to hoist an
+; interesting GEP.
+;
+; CHECK: @test2
+; CHECK: %entry
+; CHECK-NOT: mov
+; CHECK: jne
+define void @test2(i32 %n) nounwind uwtable {
+entry:
+  br i1 undef, label %while.end, label %for.cond468
+
+for.cond468:                                      ; preds = %if.then477, %entry
+  %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
+  %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
+  %k.0 = load i32* %k.0.in, align 4
+  %0 = trunc i64 %indvars.iv1163 to i32
+  %cmp469 = icmp slt i32 %0, %n
+  br i1 %cmp469, label %for.body471, label %for.inc498
+
+for.body471:                                      ; preds = %for.cond468
+  %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
+  %1 = load i32* %first, align 4
+  br i1 undef, label %if.then477, label %for.inc498
+
+if.then477:                                       ; preds = %for.body471
+  %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
+  %indvars.iv.next1164 = add i64 %indvars.iv1163, 1
+  br label %for.cond468
+
+for.inc498:                                       ; preds = %for.inc498, %for.body471, %for.cond468
+  br label %for.inc498
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/dg.exp b/test/Transforms/LoopStrengthReduce/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/LoopStrengthReduce/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
new file mode 100644
index 0000000..e42b67f
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -0,0 +1,300 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; X64: @simple
+; %x * 4
+; X64: shlq $2
+; no other address computation in the preheader
+; X64-NEXT: xorl
+; X64-NEXT: .align
+; X64: %loop
+; no complex address modes
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @simple
+; no expensive address computation in the preheader
+; X32-NOT: imul
+; X32: %loop
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; X64: @user
+; X64: shlq $4
+; X64: lea
+; X64: lea
+; X64: %loop
+; complex address modes
+; X64: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @user
+; expensive address computation in the preheader
+; X32: imul
+; X32: %loop
+; complex address modes
+; X32: (%{{[^)]+}},%{{[^)]+}},
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; X64: extrastride:
+; We currently don't handle this on X64 because the sexts cause
+; strange increment expressions like this:
+; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+;
+; X32: extrastride:
+; no spills in the preheader
+; X32-NOT: mov{{.*}}(%esp){{$}}
+; X32: %for.body{{$}}
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; no reloads
+; X32-NOT: (%esp)
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; X64: foldedidx:
+; X64: movzbl -3(
+;
+; X32: foldedidx:
+; X32: movzbl -3(
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @multioper tests instructions with multiple IV user operands. We
+; should be able to chain them independent of each other.
+;
+; X64: @multioper
+; X64: %for.body
+; X64: movl %{{.*}},4)
+; X64-NEXT: leal 1(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 2(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 3(
+; X64-NEXT: movl %{{.*}},4)
+;
+; X32: @multioper
+; X32: %for.body
+; X32: movl %{{.*}},4)
+; X32-NEXT: leal 1(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 2(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 3(
+; X32-NEXT: movl %{{.*}},4)
+define void @multioper(i32* %a, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
+  %i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
+  store i32 %i, i32* %p, align 4
+  %inc1 = or i32 %i, 1
+  %add.ptr.i1 = getelementptr inbounds i32* %p, i32 1
+  store i32 %inc1, i32* %add.ptr.i1, align 4
+  %inc2 = add nsw i32 %i, 2
+  %add.ptr.i2 = getelementptr inbounds i32* %p, i32 2
+  store i32 %inc2, i32* %add.ptr.i2, align 4
+  %inc3 = add nsw i32 %i, 3
+  %add.ptr.i3 = getelementptr inbounds i32* %p, i32 3
+  store i32 %inc3, i32* %add.ptr.i3, align 4
+  %p.next = getelementptr inbounds i32* %p, i32 4
+  %inc4 = add nsw i32 %i, 4
+  %cmp = icmp slt i32 %inc4, %n
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; @testCmpZero has a ICmpZero LSR use that should not be hidden from
+; LSR. Profitable chains should have more than one nonzero increment
+; anyway.
+;
+; X32: @testCmpZero
+; X32: %for.body82.us
+; X32: dec
+; X32: jne
+define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+entry:
+  %dest0 = getelementptr inbounds i8* %src, i32 %srcidx
+  %source0 = getelementptr inbounds i8* %dst, i32 %dstidx
+  %add.ptr79.us.sum = add i32 %srcidx, %len
+  %lftr.limit = getelementptr i8* %src, i32 %add.ptr79.us.sum
+  br label %for.body82.us
+
+for.body82.us:
+  %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
+  %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
+  %0 = bitcast i8* %source to i32*
+  %1 = load i32* %0, align 4
+  %trunc = trunc i32 %1 to i8
+  %add.ptr83.us = getelementptr inbounds i8* %source, i32 4
+  %incdec.ptr91.us = getelementptr inbounds i8* %dest, i32 1
+  store i8 %trunc, i8* %dest, align 1
+  %exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
+  br i1 %exitcond, label %return, label %for.body82.us
+
+return:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
new file mode 100644
index 0000000..d8e0aa9
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; X64: sharedidx:
+; X64: %for.body.preheader
+; X64-NOT: leal ({{.*}},4)
+; X64: %for.body.1
+
+; X32: sharedidx:
+; X32: %for.body.2
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: %for.body.3
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
new file mode 100644
index 0000000..84bd88c
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/dg.exp b/test/Transforms/LoopStrengthReduce/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopStrengthReduce/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
new file mode 100644
index 0000000..b87bf62
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -0,0 +1,70 @@
+; RUN: opt -loop-reduce %s
+; we used to crash on this one
+
+declare i8* @_Znwm()
+declare i32 @__gxx_personality_v0(...)
+declare void @g()
+define void @f() {
+bb0:
+  br label %bb1
+bb1:
+  %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ]
+  %v1 = add nsw i64 %v0, 1
+  br i1 undef, label %bb2, label %bb1
+bb2:
+  %v2 = icmp eq i64 %v0, 0
+  br i1 %v2, label %bb6, label %bb3
+bb3:
+  %v3 = invoke noalias i8* @_Znwm()
+          to label %bb5 unwind label %bb4
+bb4:
+  %v4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb5:
+  %v5 = bitcast i8* %v3 to i32**
+  %add.ptr.i = getelementptr inbounds i32** %v5, i64 %v0
+  br label %bb6
+bb6:
+  %v6 = phi i32** [ null, %bb2 ], [ %add.ptr.i, %bb5 ]
+  invoke void @g()
+          to label %bb7 unwind label %bb8
+bb7:
+  unreachable
+bb8:
+  %v7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb9:
+  resume { i8*, i32 } zeroinitializer
+}
+
+
+define void @h() {
+bb1:
+  invoke void @g() optsize
+          to label %bb2 unwind label %bb5
+bb2:
+  %arrayctor.cur = phi i8* [ undef, %bb1 ], [ %arrayctor.next, %bb3 ]
+  invoke void @g() optsize
+          to label %bb3 unwind label %bb6
+bb3:
+  %arrayctor.next = getelementptr inbounds i8* %arrayctor.cur, i64 1
+  br label %bb2
+bb4:
+  ret void
+bb5:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @g() optsize
+          to label %bb4 unwind label %bb7
+bb6:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %arraydestroy.isempty = icmp eq i8* undef, %arrayctor.cur
+  ret void
+bb7:
+  %lpad.nonloopexit = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll
new file mode 100644
index 0000000..ce7ad19
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ivchain.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+;
+; PR11782: bad cast to AddRecExpr.
+; A sign extend feeds an IVUser and cannot be hoisted into the AddRec.
+; CollectIVChains should bailout on this case.
+
+%struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
+
+; CHECK: @test
+; CHECK: for.body:
+; CHECK: lsr.iv = phi %struct
+; CHECK: br
+define i32 @test(i8* %h, i32 %more) nounwind uwtable {
+entry:
+  br i1 undef, label %land.end238, label %return
+
+land.end238:                                      ; preds = %if.end229
+  br label %for.body
+
+for.body:                                         ; preds = %sw.epilog, %land.end238
+  %fbh.0 = phi %struct* [ undef, %land.end238 ], [ %incdec.ptr, %sw.epilog ]
+  %column_n.0 = phi i16 [ 0, %land.end238 ], [ %inc601, %sw.epilog ]
+  %conv250 = sext i16 %column_n.0 to i32
+  %add257 = add nsw i32 %conv250, 1
+  %conv258 = trunc i32 %add257 to i16
+  %cmp263 = icmp ult i16 undef, 2
+  br label %if.end388
+
+if.end388:                                        ; preds = %if.then380, %if.else356
+  %ColLength = getelementptr inbounds %struct* %fbh.0, i64 0, i32 7
+  %call405 = call signext i16 @SQLColAttribute(i8* undef, i16 zeroext %conv258, i16 zeroext 1003, i8* null, i16 signext 0, i16* null, i64* %ColLength) nounwind
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb542, %sw.bb523, %if.end475
+  %inc601 = add i16 %column_n.0, 1
+  %incdec.ptr = getelementptr inbounds %struct* %fbh.0, i64 1
+  br label %for.body
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+declare signext i16 @SQLColAttribute(i8*, i16 zeroext, i16 zeroext, i8*, i16 signext, i16*, i64*)
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 76aa08c..96904c6 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; PR9939
 
-; LSR should property handle the post-inc offset when folding the
+; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
 ; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
new file mode 100644
index 0000000..ee7b1e8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+%struct.nsTArray = type { i8 }
+%struct.nsTArrayHeader = type { i32 }
+
+define void @_Z6foobarR8nsTArray(%struct.nsTArray* %aValues, i32 %foo, %struct.nsTArrayHeader* %bar) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %_ZN8nsTArray9ElementAtEi.exit, %entry
+  %i.06 = phi i32 [ %add, %_ZN8nsTArray9ElementAtEi.exit ], [ 0, %entry ]
+  %call.i = call %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev() nounwind
+  %add.ptr.i = getelementptr inbounds %struct.nsTArrayHeader* %call.i, i32 1
+  %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
+  %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
+  %add = add nsw i32 %i.06, 1
+  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  br label %_ZN8nsTArray9ElementAtEi.exit
+
+_ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
+  %arrayidx.i = getelementptr inbounds %struct.nsTArray* %tmp, i32 %add
+  call void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray* %arrayidx, %struct.nsTArray* %arrayidx.i) nounwind
+  %cmp = icmp slt i32 %add, %foo
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %_ZN8nsTArray9ElementAtEi.exit
+  ret void
+}
+
+declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.nsTArray*)
+
+declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12048.ll b/test/Transforms/LoopStrengthReduce/pr12048.ll
new file mode 100644
index 0000000..7e0f2ad
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12048.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+define void @resolve_name() nounwind uwtable ssp {
+  br label %while.cond40.preheader
+while.cond132.while.cond.loopexit_crit_edge:
+  br label %while.cond40.preheader
+while.cond40.preheader:
+  br label %while.cond40
+while.cond40:
+  %indvars.iv194 = phi i8* [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ]
+  %tmp.1 = phi i8* [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ]
+  switch i8 undef, label %while.body51 [
+    i8 0, label %if.then59
+  ]
+while.body51:                                     ; preds = %land.end50
+  %incdec.ptr = getelementptr inbounds i8* %tmp.1, i64 1
+  %scevgep = getelementptr i8* %indvars.iv194, i64 1
+  br label %while.cond40
+if.then59:                                        ; preds = %while.end
+  br i1 undef, label %if.then64, label %if.end113
+if.then64:                                        ; preds = %if.then59
+  %incdec.ptr88.tmp.2 = select i1 undef, i8* undef, i8* undef
+  br label %if.end113
+if.end113:                                        ; preds = %if.then64, %if.then59
+  %tmp.4 = phi i8* [ %incdec.ptr88.tmp.2, %if.then64 ], [ undef, %if.then59 ]
+  %tmp.4195 = ptrtoint i8* %tmp.4 to i64
+  br  label %while.cond132.preheader
+while.cond132.preheader:                          ; preds = %if.end113
+  %cmp133173 = icmp eq i8* %tmp.1, %tmp.4
+  br i1 %cmp133173, label %while.cond40.preheader, label %while.body139.lr.ph
+while.body139.lr.ph:                              ; preds = %while.cond132.preheader
+  %scevgep198 = getelementptr i8* %indvars.iv194, i64 0
+  %scevgep198199 = ptrtoint i8* %scevgep198 to i64
+  br label %while.body139
+while.body139:                                    ; preds = %while.body139, %while.body139.lr.ph
+  %start_of_var.0177 = phi i8* [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ]
+  br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139
+}
diff --git a/test/Transforms/LoopUnroll/dg.exp b/test/Transforms/LoopUnroll/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnroll/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 21c0ec3..05d98d5 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -1,5 +1,5 @@
-; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 1 loop-unswitch - Number of switches unswitched
diff --git a/test/Transforms/LoopUnswitch/dg.exp b/test/Transforms/LoopUnswitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnswitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerAtomic/dg.exp b/test/Transforms/LowerAtomic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerAtomic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerAtomic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/dg.exp b/test/Transforms/LowerExpectIntrinsic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerExpectIntrinsic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/dg.exp b/test/Transforms/LowerInvoke/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerInvoke/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerInvoke/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/dg.exp b/test/Transforms/LowerSwitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerSwitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerSwitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/dg.exp b/test/Transforms/Mem2Reg/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Mem2Reg/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Mem2Reg/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/dg.exp b/test/Transforms/MemCpyOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MemCpyOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/dg.exp b/test/Transforms/MergeFunc/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MergeFunc/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MergeFunc/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
new file mode 100644
index 0000000..8c7b5b1
--- /dev/null
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s
+; rdar://10227311
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }]
+
+@x = global i32 0
+
+declare i32 @bar() nounwind
+
+define i32 @foo() nounwind {
+entry:
+  ret i32 5
+}
+
+define internal void @__cxx_global_var_init() {
+entry:
+  %call = call i32 @foo()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+define internal void @__dxx_global_var_init() {
+entry:
+  %call = call i32 @bar()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_x()
+; CHECK-NOT: @objc
+; CHECK: }
+define internal void @_GLOBAL__I_x() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__cxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_y()
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: }
+define internal void @_GLOBAL__I_y() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__dxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleasePoolPush()
+declare void @objc_autoreleasePoolPop(i8*)
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 44c2602..552f4e0 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -786,7 +786,7 @@ C:
 @__block_holder_tmp_1 = external constant %block1
 define void @test23() {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
@@ -801,13 +801,28 @@ entry:
 ; CHECK: }
 define void @test23b(i8* %p) {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind
+  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
   call void @callee()
   call void @use_pointer(i8* %p)
   call void @objc_release(i8* %p) nounwind
   ret void
 }
 
+; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
+
+; CHECK: define void @test23c(
+; CHECK: @objc_retainBlock
+; CHECK: @objc_release
+; CHECK: }
+define void @test23c() {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  ret void
+}
+
 ; Any call can decrement a retain count.
 
 ; CHECK: define void @test24(
diff --git a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
index 4ad78e7..4a9b314 100644
--- a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
 
-; CHECK: call void @objc_storeStrong(i8**
+; CHECK: tail call void @objc_storeStrong(i8**
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index fda2ff4..4ff0596 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -9,7 +9,7 @@ declare void @objc_release(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
diff --git a/test/Transforms/ObjCARC/dg.exp b/test/Transforms/ObjCARC/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ObjCARC/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ObjCARC/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
new file mode 100644
index 0000000..9728f6e
--- /dev/null
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+%struct.__block_descriptor = type { i64, i64 }
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
+; metadata and eliminate the retainBlock+release pair here.
+; rdar://10803830.
+
+; CHECK: define void @test0(
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+; There is no !clang.arc.no_objc_arc_exceptions
+; metadata here, so the optimizer shouldn't eliminate anything.
+
+; CHECK: define void @test0_no_metadata(
+; CHECK: call i8* @objc_retainBlock(
+; CHECK: invoke
+; CHECK: call void @objc_release(
+; CHECK: }
+define void @test0_no_metadata() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+declare i8* @objc_retainBlock(i8*)
+declare void @objc_release(i8*)
+declare void @_Block_object_dispose(i8*, i32)
+declare i32 @__objc_personality_v0(...)
+declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
+
+!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
new file mode 100644
index 0000000..a5170e3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-load.ll
@@ -0,0 +1,51 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; rdar://10803830
+; The optimizer should be able to prove that the block does not
+; "escape", so the retainBlock+release pair can be eliminated.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.__block_descriptor = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
+
+; CHECK: define void @test() {
+; CHECK-NOT: @objc
+; CHECK: declare i8* @objc_retainBlock(i8*)
+; CHECK: declare void @objc_release(i8*)
+
+define void @test() {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
+  store i32 1073741824, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
+  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
+  store i32 4, i32* %block.captured, align 8
+  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
+  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
+  %tmp3 = bitcast i8* %tmp2 to i8**
+  %tmp4 = load i8** %tmp3, align 8
+  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
+  %call = call i32 %tmp5(i8* %tmp1)
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
new file mode 100644
index 0000000..b3b62d3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -0,0 +1,138 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+!0 = metadata !{}
+
+declare i8* @objc_retain(i8*)
+declare void @callee(i8)
+declare void @use_pointer(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_retainBlock(i8*)
+declare i8* @objc_autorelease(i8*)
+
+; Basic retainBlock+release elimination.
+
+; CHECK: define void @test0(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but there's no copy_on_escape metadata, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_no_metadata(i8* %tmp) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_no_metadata(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but the pointer escapes, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0_escape, but there's no intervening call.
+
+; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_just_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Basic nested retainBlock+release elimination.
+
+; CHECK: define void @test1(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK-NOT: @objc
+; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but there's no copy_on_escape metadata, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release.
+
+; CHECK: define void @test1_no_metadata(i8* %tmp) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_no_metadata(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but the pointer escapes, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release
+
+; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: store i8* %tmp2, i8** %z
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/PhaseOrdering/dg.exp b/test/Transforms/PhaseOrdering/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PhaseOrdering/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/dg.exp b/test/Transforms/PruneEH/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PruneEH/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PruneEH/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/dg.exp b/test/Transforms/Reassociate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Reassociate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Reassociate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index 7546bf5..f62ed70 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -21,10 +21,6 @@ define internal i32 @f() {
 
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SCCP/dg.exp b/test/Transforms/SCCP/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SCCP/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SCCP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
deleted file mode 100644
index 39954d8..0000000
--- a/test/Transforms/ScalarRepl/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/ScalarRepl/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
new file mode 100644
index 0000000..cb5101c
--- /dev/null
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
+; rdar://10589171
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.foo = type { i32, i32 }
+
+@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %f = alloca %struct.foo, align 4
+  %x.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 0
+  store i32 1, i32* %x.i, align 4
+  %y.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 1
+  br label %while.cond.i
+
+; CHECK: while.cond.i:
+; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+; CHECK-NOT: phi
+while.cond.i:                                     ; preds = %while.cond.backedge.i, %entry
+  %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+  %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+  %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+  %cmp.i = icmp sgt i32 %left.0.i, 0
+  br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
+
+while.cond.i.func.exit_crit_edge:                 ; preds = %while.cond.i
+  br label %func.exit
+
+while.body.i:                                     ; preds = %while.cond.i
+  %dec.i = add nsw i32 %left.0.i, -1
+  switch i32 1, label %while.body.i.func.exit_crit_edge [
+    i32 0, label %while.cond.backedge.i
+    i32 1, label %sw.bb.i
+  ]
+
+while.body.i.func.exit_crit_edge:                 ; preds = %while.body.i
+  br label %func.exit
+
+sw.bb.i:                                          ; preds = %while.body.i
+  %cmp2.i = icmp eq i32 %tmp, 1
+  br i1 %cmp2.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %sw.bb.i
+  store i32 %pos.0.i, i32* %x.i, align 4
+  br label %if.end.i
+
+; CHECK: if.end.i:
+; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+; CHECK-NOT: phi
+if.end.i:                                         ; preds = %if.then.i, %sw.bb.i
+  %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+  store i32 %tmp1, i32* %y.i, align 4
+  br label %while.cond.backedge.i
+
+; CHECK: while.cond.backedge.i:
+; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+; CHECK-NOT: phi
+while.cond.backedge.i:                            ; preds = %if.end.i, %while.body.i
+  %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+  %xtmp.i = add i32 %pos.0.i, 1
+  br label %while.cond.i
+
+; CHECK: func.exit:
+; CHECK-NOT: load
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
+  %tmp3 = load i32* %x.i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index 568e61c..e2765e5 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ; PR3354
 ; Do not merge bb1 into the entry block, it might trap.
 
 @G = extern_weak global i32
 
+; CHECK: @test(
+; CHECK: br i1 %tmp25
+; CHECK: bb1:
+; CHECK: sdiv
+
 define i32 @test(i32 %tmp21, i32 %tmp24) {
 	%tmp25 = icmp sle i32 %tmp21, %tmp24		
 	br i1 %tmp25, label %bb2, label %bb1	
@@ -18,6 +23,11 @@ bb6:
 	ret i32 927
 }
 
+; CHECK: @test2(
+; CHECK: br i1 %tmp34
+; CHECK: bb5:
+; CHECK: sdiv
+
 define i32 @test2(i32 %tmp21, i32 %tmp24, i1 %tmp34) {
 	br i1 %tmp34, label %bb5, label %bb6
 
diff --git a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
deleted file mode 100644
index ebacf2f..0000000
--- a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: opt %s -simplifycfg -disable-output
-; PR8445
-
-define void @test() {
-      unwind
-}
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 5cfc77c..a61867f 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -1,7 +1,10 @@
-; RUN: opt < %s -simplifycfg  -S | grep select
-; RUN: opt < %s -simplifycfg  -S | grep br | count 2
+; RUN: opt < %s -simplifycfg -phi-node-folding-threshold=2 -S | FileCheck %s
 
-define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK: @test1
 entry:
         %tmp1 = icmp eq i32 %b, 0
         br i1 %tmp1, label %bb1, label %bb3
@@ -9,6 +12,11 @@ entry:
 bb1:            ; preds = %entry
 	%tmp2 = icmp sgt i32 %c, 1
 	br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: add i32 %a, 1
+; CHECK-NEXT: select i1 %tmp2, i32 %tmp3, i32 %a
+; CHECK-NEXT: br label %bb3
 
 bb2:		; preds = bb1
 	%tmp3 = add i32 %a, 1
@@ -19,3 +27,20 @@ bb3:		; preds = %bb2, %entry
         %tmp5 = sub i32 %tmp4, 1
 	ret i32 %tmp5
 }
+
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i8 @test2(i8 %a) {
+; CHECK: @test2
+  br i1 undef, label %bb_true, label %bb_false
+bb_true:
+  %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  br label %join
+bb_false:
+  br label %join
+join:
+  %c = phi i8 [%b, %bb_true], [%a, %bb_false]
+; CHECK: select
+  ret i8 %c
+}
+
diff --git a/test/Transforms/SimplifyCFG/dg.exp b/test/Transforms/SimplifyCFG/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyCFG/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyCFG/multiple-phis.ll b/test/Transforms/SimplifyCFG/multiple-phis.ll
new file mode 100644
index 0000000..7845423
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/multiple-phis.ll
@@ -0,0 +1,39 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; It's not worthwhile to if-convert one of the phi nodes and leave
+; the other behind, because that still requires a branch. If
+; SimplifyCFG if-converts one of the phis, it should do both.
+
+; CHECK:      %div.high.addr.0 = select i1 %cmp1, i32 %div, i32 %high.addr.0
+; CHECK-NEXT: %low.0.add2 = select i1 %cmp1, i32 %low.0, i32 %add2
+; CHECK-NEXT: br label %while.cond
+
+define i32 @upper_bound(i32* %r, i32 %high, i32 %k) nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %if.then, %if.else, %entry
+  %high.addr.0 = phi i32 [ %high, %entry ], [ %div, %if.then ], [ %high.addr.0, %if.else ]
+  %low.0 = phi i32 [ 0, %entry ], [ %low.0, %if.then ], [ %add2, %if.else ]
+  %cmp = icmp ult i32 %low.0, %high.addr.0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %add = add i32 %low.0, %high.addr.0
+  %div = udiv i32 %add, 2
+  %idxprom = zext i32 %div to i64
+  %arrayidx = getelementptr inbounds i32* %r, i64 %idxprom
+  %0 = load i32* %arrayidx
+  %cmp1 = icmp ult i32 %k, %0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %while.body
+  br label %while.cond
+
+if.else:                                          ; preds = %while.body
+  %add2 = add i32 %div, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %low.0
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
new file mode 100644
index 0000000..c791785
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -0,0 +1,88 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+declare void @helper(i32)
+
+define void @test1(i1 %a, i1 %b) {
+; CHECK: @test1
+entry:
+  br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !0
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test2(i1 %a, i1 %b) {
+; CHECK: @test2
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK-NOT: !prof
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !2
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test3(i1 %a, i1 %b) {
+; CHECK: @test3
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test4(i1 %a, i1 %b) {
+; CHECK: @test4
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
+; CHECK-NOT: !2
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 009f05e..7654d02 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -35,6 +35,6 @@ if.end:
   ret i8* %x.addr
 
 ; CHECK: @test2
-; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y
-; CHECK: ret i8* %x.addr
+; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y
+; CHECK: ret i8* %incdec.ptr.y
 }
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index fc83ec2..3b0c48b 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -15,8 +15,8 @@ c:
   ret i32 5
 ; CHECK: @test1
 ; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: %merge = select i1 %cond, i32 5, i32 0
-; CHECK: ret i32 %merge
+; CHECK: %. = select i1 %cond, i32 5, i32 0
+; CHECK: ret i32 %.
 }
 
 
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 5494a65..673a62b 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -115,7 +115,7 @@ entry:
 cont:
 ; CHECK: %lt = icmp slt i64 %x, %y
     %lt = icmp slt i64 %x, %y
-; CHECK-NEXT: br i1 %lt, label %a, label %r
+; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
     %qux = select i1 %lt, i32 0, i32 2
     switch i32 %qux, label %bees [
         i32 0, label %a
diff --git a/test/Transforms/SimplifyCFG/unreachable-blocks.ll b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
new file mode 100644
index 0000000..1df0eab
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
@@ -0,0 +1,28 @@
+; RUN: opt -simplifycfg < %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; PR11825
+define void @test1() {
+entry:
+  br label %return
+
+while_block:                                      ; preds = %and_if_cont2, %and_if_cont
+  %newlen = sub i32 %newlen, 1
+  %newptr = getelementptr i8* %newptr, i64 1
+  %test = icmp sgt i32 %newlen, 0
+  br i1 %test, label %and_if1, label %and_if_cont2
+
+and_if1:                                          ; preds = %while_block
+  %char = load i8* %newptr
+  %test2 = icmp ule i8 %char, 32
+  br label %and_if_cont2
+
+and_if_cont2:                                     ; preds = %and_if1, %while_block
+  %a18 = phi i1 [ %test, %while_block ], [ %test2, %and_if1 ]
+  br i1 %a18, label %while_block, label %return
+
+return:                                           ; preds = %and_if_cont2, %and_if_cont
+  ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index c98e79a..489c993 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -3,7 +3,7 @@
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
 @str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
 
-; CHECK: internal unnamed_addr constant [12 x i8] c"hello world\00"
+; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
new file mode 100644
index 0000000..6a8ce8c
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/cos.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @foo(double %d) nounwind readnone {
+; CHECK: @foo
+    %1 = fsub double -0.000000e+00, %d
+    %2 = call double @cos(double %1) nounwind readnone
+; CHECK: call double @cos(double %d)
+    ret double %2
+}
+
+declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/dg.exp b/test/Transforms/SimplifyLibCalls/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyLibCalls/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyLibCalls/lit.local.cfg b/test/Transforms/SimplifyLibCalls/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Sink/dg.exp b/test/Transforms/Sink/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Sink/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Sink/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/dg.exp b/test/Transforms/StripSymbols/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/StripSymbols/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/StripSymbols/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/dg.exp b/test/Transforms/TailCallElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailCallElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/TailCallElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailDup/X86/dg.exp b/test/Transforms/TailDup/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/TailDup/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
new file mode 100644
index 0000000..84bd88c
--- /dev/null
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/TailDup/dg.exp b/test/Transforms/TailDup/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailDup/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
new file mode 100644
index 0000000..39c8039
--- /dev/null
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -0,0 +1,12 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True