From dce4a407a24b04eebc6a376f8e62b41aaa7b071f Mon Sep 17 00:00:00 2001
From: Stephen Hines <srhines@google.com>
Date: Thu, 29 May 2014 02:49:00 -0700
Subject: Update LLVM for 3.5 rebase (r209712).

Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
---
 test/Analysis/BlockFrequencyInfo/bad_input.ll      |  50 +++
 test/Analysis/BlockFrequencyInfo/basic.ll          |  55 +--
 .../Analysis/BlockFrequencyInfo/double_backedge.ll |  27 ++
 test/Analysis/BlockFrequencyInfo/double_exit.ll    | 165 ++++++++
 test/Analysis/BlockFrequencyInfo/irreducible.ll    | 421 +++++++++++++++++++++
 .../BlockFrequencyInfo/loop_with_branch.ll         |  44 +++
 .../nested_loop_with_branches.ll                   |  59 +++
 test/Analysis/BranchProbabilityInfo/loop.ll        |  42 +-
 test/Analysis/BranchProbabilityInfo/pr18705.ll     |  58 +++
 test/Analysis/CostModel/AArch64/lit.local.cfg      |   3 +
 test/Analysis/CostModel/AArch64/select.ll          |  38 ++
 test/Analysis/CostModel/AArch64/store.ll           |  22 ++
 test/Analysis/CostModel/ARM64/lit.local.cfg        |   3 -
 test/Analysis/CostModel/ARM64/select.ll            |  38 --
 test/Analysis/CostModel/ARM64/store.ll             |  22 --
 test/Analysis/CostModel/PowerPC/ext.ll             |   2 +-
 test/Analysis/CostModel/PowerPC/insert_extract.ll  |   4 +-
 test/Analysis/CostModel/PowerPC/load_store.ll      |   8 +-
 test/Analysis/CostModel/X86/intrinsic-cost.ll      |  28 ++
 test/Analysis/CostModel/X86/vdiv-cost.ll           |  92 +++++
 test/Analysis/CostModel/X86/vselect-cost.ll        | 126 ++++++
 test/Analysis/Delinearization/a.ll                 |  11 -
 test/Analysis/Delinearization/gcd_multiply_expr.ll | 153 ++++++++
 test/Analysis/Delinearization/himeno_1.ll          |  10 -
 test/Analysis/Delinearization/himeno_2.ll          |  10 -
 .../iv_times_constant_in_subscript.ll              |  45 +++
 test/Analysis/Delinearization/lit.local.cfg        |   2 +-
 .../multidim_ivs_and_integer_offsets_3d.ll         |  10 -
 .../multidim_ivs_and_integer_offsets_nts_3d.ll     |  10 -
 .../multidim_ivs_and_parameteric_offsets_3d.ll     |  10 -
 .../Delinearization/multidim_only_ivs_2d.ll        |   5 -
 .../Delinearization/multidim_only_ivs_2d_nested.ll |   2 +
 .../Delinearization/multidim_only_ivs_3d.ll        |  10 -
 .../Delinearization/multidim_only_ivs_3d_cast.ll   |  10 -
 ...tidim_two_accesses_different_delinearization.ll |  43 +++
 test/Analysis/Delinearization/undef.ll             |  38 ++
 test/Analysis/DependenceAnalysis/Banerjee.ll       |  22 +-
 test/Analysis/DependenceAnalysis/GCD.ll            |  16 +-
 test/Analysis/LazyCallGraph/basic.ll               |  50 +++
 test/Analysis/ScalarEvolution/max-trip-count.ll    | 109 ++++++
 40 files changed, 1655 insertions(+), 218 deletions(-)
 create mode 100644 test/Analysis/BlockFrequencyInfo/bad_input.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/double_backedge.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/double_exit.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/irreducible.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
 create mode 100644 test/Analysis/BranchProbabilityInfo/pr18705.ll
 create mode 100644 test/Analysis/CostModel/AArch64/lit.local.cfg
 create mode 100644 test/Analysis/CostModel/AArch64/select.ll
 create mode 100644 test/Analysis/CostModel/AArch64/store.ll
 delete mode 100644 test/Analysis/CostModel/ARM64/lit.local.cfg
 delete mode 100644 test/Analysis/CostModel/ARM64/select.ll
 delete mode 100644 test/Analysis/CostModel/ARM64/store.ll
 create mode 100644 test/Analysis/CostModel/X86/vdiv-cost.ll
 create mode 100644 test/Analysis/CostModel/X86/vselect-cost.ll
 create mode 100644 test/Analysis/Delinearization/gcd_multiply_expr.ll
 create mode 100644 test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
 create mode 100644 test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
 create mode 100644 test/Analysis/Delinearization/undef.ll

(limited to 'test/Analysis')

diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
new file mode 100644
index 0000000..bcdc1e6
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+declare void @g(i32 %x)
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'branch_weight_0':
+; CHECK-NEXT: block-frequency-info: branch_weight_0
+define void @branch_weight_0(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %for.body
+
+; Check that we get 1,4 instead of 0,3.
+; CHECK-NEXT: for.body: float = 4.0,
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void @g(i32 %i)
+  %inc = add i32 %i, 1
+  %cmp = icmp ugt i32 %inc, %a
+  br i1 %cmp, label %for.end, label %for.body, !prof !0
+
+; CHECK-NEXT: for.end: float = 1.0, int = [[ENTRY]]
+for.end:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 3}
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'infinite_loop'
+; CHECK-NEXT: block-frequency-info: infinite_loop
+define void @infinite_loop(i1 %x) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br i1 %x, label %for.body, label %for.end, !prof !1
+
+; Check that the loop scale maxes out at 4096, giving 2048 here.
+; CHECK-NEXT: for.body: float = 2048.0,
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void @g(i32 %i)
+  %inc = add i32 %i, 1
+  br label %for.body
+
+; Check that the exit weight is half of entry, since half is lost in the
+; infinite loop above.
+; CHECK-NEXT: for.end: float = 0.5,
+for.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index ce29fb5..006e6ab 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -1,13 +1,14 @@
 ; RUN: opt < %s -analyze -block-freq | FileCheck %s
 
 define i32 @test1(i32 %i, i32* %a) {
-; CHECK: Printing analysis {{.*}} for function 'test1'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test1':
+; CHECK-NEXT: block-frequency-info: test1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   br label %body
 
 ; Loop backedges are weighted and thus their bodies have a greater frequency.
-; CHECK: body = 32.0
+; CHECK-NEXT: body: float = 32.0,
 body:
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
@@ -18,29 +19,29 @@ body:
   %exitcond = icmp eq i32 %next, %i
   br i1 %exitcond, label %exit, label %body
 
-; CHECK: exit = 1.0
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   ret i32 %sum
 }
 
 define i32 @test2(i32 %i, i32 %a, i32 %b) {
-; CHECK: Printing analysis {{.*}} for function 'test2'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test2':
+; CHECK-NEXT: block-frequency-info: test2
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   %cond = icmp ult i32 %i, 42
   br i1 %cond, label %then, label %else, !prof !0
 
 ; The 'then' branch is predicted more likely via branch weight metadata.
-; CHECK: then = 0.94116
+; CHECK-NEXT: then: float = 0.9411{{[0-9]*}},
 then:
   br label %exit
 
-; CHECK: else = 0.05877
+; CHECK-NEXT: else: float = 0.05882{{[0-9]*}},
 else:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1.0.
-; CHECK: exit = 0.99993
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   %result = phi i32 [ %a, %then ], [ %b, %else ]
   ret i32 %result
@@ -49,37 +50,37 @@ exit:
 !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
 
 define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
-; CHECK: Printing analysis {{.*}} for function 'test3'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test3':
+; CHECK-NEXT: block-frequency-info: test3
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   switch i32 %i, label %case_a [ i32 1, label %case_b
                                  i32 2, label %case_c
                                  i32 3, label %case_d
                                  i32 4, label %case_e ], !prof !1
 
-; CHECK: case_a = 0.04998
+; CHECK-NEXT: case_a: float = 0.05,
 case_a:
   br label %exit
 
-; CHECK: case_b = 0.04998
+; CHECK-NEXT: case_b: float = 0.05,
 case_b:
   br label %exit
 
 ; The 'case_c' branch is predicted more likely via branch weight metadata.
-; CHECK: case_c = 0.79998
+; CHECK-NEXT: case_c: float = 0.8,
 case_c:
   br label %exit
 
-; CHECK: case_d = 0.04998
+; CHECK-NEXT: case_d: float = 0.05,
 case_d:
   br label %exit
 
-; CHECK: case_e = 0.04998
+; CHECK-NEXT: case_e: float = 0.05,
 case_e:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1.0.
-; CHECK: exit = 0.99993
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   %result = phi i32 [ %a, %case_a ],
                     [ %b, %case_b ],
@@ -91,44 +92,50 @@ exit:
 
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
 
-; CHECK: Printing analysis {{.*}} for function 'nested_loops'
-; CHECK: entry = 1.0
-; This test doesn't seem to be assigning sensible frequencies to nested loops.
 define void @nested_loops(i32 %a) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loops':
+; CHECK-NEXT: block-frequency-info: nested_loops
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   br label %for.cond1.preheader
 
+; CHECK-NEXT: for.cond1.preheader: float = 4001.0,
 for.cond1.preheader:
   %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
   br label %for.cond4.preheader
 
+; CHECK-NEXT: for.cond4.preheader: float = 16008001.0,
 for.cond4.preheader:
   %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
   %add = add i32 %y.023, %x.024
   br label %for.body6
 
+; CHECK-NEXT: for.body6: float = 64048012001.0,
 for.body6:
   %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
   %add7 = add i32 %add, %z.022
-  tail call void @g(i32 %add7) #2
+  tail call void @g(i32 %add7)
   %inc = add i32 %z.022, 1
   %cmp5 = icmp ugt i32 %inc, %a
   br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
 
+; CHECK-NEXT: for.inc8: float = 16008001.0,
 for.inc8:
   %inc9 = add i32 %y.023, 1
   %cmp2 = icmp ugt i32 %inc9, %a
   br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
 
+; CHECK-NEXT: for.inc11: float = 4001.0,
 for.inc11:
   %inc12 = add i32 %x.024, 1
   %cmp = icmp ugt i32 %inc12, %a
   br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
 
+; CHECK-NEXT: for.end13: float = 1.0, int = [[ENTRY]]
 for.end13:
   ret void
 }
 
-declare void @g(i32) #1
+declare void @g(i32)
 
 !2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}
diff --git a/test/Analysis/BlockFrequencyInfo/double_backedge.ll b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
new file mode 100644
index 0000000..df8217c
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+define void @double_backedge(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_backedge':
+; CHECK-NEXT: block-frequency-info: double_backedge
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 10.0,
+  br i1 %x, label %exit, label %loop.1, !prof !0
+
+loop.1:
+; CHECK-NEXT: loop.1: float = 9.0,
+  br i1 %x, label %loop, label %loop.2, !prof !1
+
+loop.2:
+; CHECK-NEXT: loop.2: float = 5.0,
+  br label %loop
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 9}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 5}
diff --git a/test/Analysis/BlockFrequencyInfo/double_exit.ll b/test/Analysis/BlockFrequencyInfo/double_exit.ll
new file mode 100644
index 0000000..75f664d
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/double_exit.ll
@@ -0,0 +1,165 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_exit':
+; CHECK-NEXT: block-frequency-info: double_exit
+define i32 @double_exit(i32 %N) {
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %outer
+
+; Mass = 1
+; Backedge mass = 1/3, exit mass = 2/3
+; Loop scale = 3/2
+; Pseudo-edges = exit
+; Pseudo-mass = 1
+; Frequency = 1*3/2*1 = 3/2
+; CHECK-NEXT: outer: float = 1.5,
+outer:
+  %I.0 = phi i32 [ 0, %entry ], [ %inc6, %outer.inc ]
+  %Return.0 = phi i32 [ 0, %entry ], [ %Return.1, %outer.inc ]
+  %cmp = icmp slt i32 %I.0, %N
+  br i1 %cmp, label %inner, label %exit, !prof !2 ; 2:1
+
+; Mass = 1
+; Backedge mass = 3/5, exit mass = 2/5
+; Loop scale = 5/2
+; Pseudo-edges = outer.inc @ 1/5, exit @ 1/5
+; Pseudo-mass = 2/3
+; Frequency = 3/2*1*5/2*2/3 = 5/2
+; CHECK-NEXT: inner: float = 2.5,
+inner:
+  %Return.1 = phi i32 [ %Return.0, %outer ], [ %call4, %inner.inc ]
+  %J.0 = phi i32 [ %I.0, %outer ], [ %inc, %inner.inc ]
+  %cmp2 = icmp slt i32 %J.0, %N
+  br i1 %cmp2, label %inner.body, label %outer.inc, !prof !1 ; 4:1
+
+; Mass = 4/5
+; Frequency = 5/2*4/5 = 2
+; CHECK-NEXT: inner.body: float = 2.0,
+inner.body:
+  %call = call i32 @c2(i32 %I.0, i32 %J.0)
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %exit, label %inner.inc, !prof !0 ; 3:1
+
+; Mass = 3/5
+; Frequency = 5/2*3/5 = 3/2
+; CHECK-NEXT: inner.inc: float = 1.5,
+inner.inc:
+  %call4 = call i32 @logic2(i32 %Return.1, i32 %I.0, i32 %J.0)
+  %inc = add nsw i32 %J.0, 1
+  br label %inner
+
+; Mass = 1/3
+; Frequency = 3/2*1/3 = 1/2
+; CHECK-NEXT: outer.inc: float = 0.5,
+outer.inc:
+  %inc6 = add nsw i32 %I.0, 1
+  br label %outer
+
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  %Return.2 = phi i32 [ %Return.1, %inner.body ], [ %Return.0, %outer ]
+  ret i32 %Return.2
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+
+declare i32 @c2(i32, i32)
+declare i32 @logic2(i32, i32, i32)
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_exit_in_loop':
+; CHECK-NEXT: block-frequency-info: double_exit_in_loop
+define i32 @double_exit_in_loop(i32 %N) {
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %outer
+
+; Mass = 1
+; Backedge mass = 1/2, exit mass = 1/2
+; Loop scale = 2
+; Pseudo-edges = exit
+; Pseudo-mass = 1
+; Frequency = 1*2*1 = 2
+; CHECK-NEXT: outer: float = 2.0,
+outer:
+  %I.0 = phi i32 [ 0, %entry ], [ %inc12, %outer.inc ]
+  %Return.0 = phi i32 [ 0, %entry ], [ %Return.3, %outer.inc ]
+  %cmp = icmp slt i32 %I.0, %N
+  br i1 %cmp, label %middle, label %exit, !prof !3 ; 1:1
+
+; Mass = 1
+; Backedge mass = 1/3, exit mass = 2/3
+; Loop scale = 3/2
+; Pseudo-edges = outer.inc
+; Pseudo-mass = 1/2
+; Frequency = 2*1*3/2*1/2 = 3/2
+; CHECK-NEXT: middle: float = 1.5,
+middle:
+  %J.0 = phi i32 [ %I.0, %outer ], [ %inc9, %middle.inc ]
+  %Return.1 = phi i32 [ %Return.0, %outer ], [ %Return.2, %middle.inc ]
+  %cmp2 = icmp slt i32 %J.0, %N
+  br i1 %cmp2, label %inner, label %outer.inc, !prof !2 ; 2:1
+
+; Mass = 1
+; Backedge mass = 3/5, exit mass = 2/5
+; Loop scale = 5/2
+; Pseudo-edges = middle.inc @ 1/5, outer.inc @ 1/5
+; Pseudo-mass = 2/3
+; Frequency = 3/2*1*5/2*2/3 = 5/2
+; CHECK-NEXT: inner: float = 2.5,
+inner:
+  %Return.2 = phi i32 [ %Return.1, %middle ], [ %call7, %inner.inc ]
+  %K.0 = phi i32 [ %J.0, %middle ], [ %inc, %inner.inc ]
+  %cmp5 = icmp slt i32 %K.0, %N
+  br i1 %cmp5, label %inner.body, label %middle.inc, !prof !1 ; 4:1
+
+; Mass = 4/5
+; Frequency = 5/2*4/5 = 2
+; CHECK-NEXT: inner.body: float = 2.0,
+inner.body:
+  %call = call i32 @c3(i32 %I.0, i32 %J.0, i32 %K.0)
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %outer.inc, label %inner.inc, !prof !0 ; 3:1
+
+; Mass = 3/5
+; Frequency = 5/2*3/5 = 3/2
+; CHECK-NEXT: inner.inc: float = 1.5,
+inner.inc:
+  %call7 = call i32 @logic3(i32 %Return.2, i32 %I.0, i32 %J.0, i32 %K.0)
+  %inc = add nsw i32 %K.0, 1
+  br label %inner
+
+; Mass = 1/3
+; Frequency = 3/2*1/3 = 1/2
+; CHECK-NEXT: middle.inc: float = 0.5,
+middle.inc:
+  %inc9 = add nsw i32 %J.0, 1
+  br label %middle
+
+; Mass = 1/2
+; Frequency = 2*1/2 = 1
+; CHECK-NEXT: outer.inc: float = 1.0,
+outer.inc:
+  %Return.3 = phi i32 [ %Return.2, %inner.body ], [ %Return.1, %middle ]
+  %inc12 = add nsw i32 %I.0, 1
+  br label %outer
+
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret i32 %Return.0
+}
+
+!3 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+
+declare i32 @c3(i32, i32, i32)
+declare i32 @logic3(i32, i32, i32, i32)
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible.ll b/test/Analysis/BlockFrequencyInfo/irreducible.ll
new file mode 100644
index 0000000..af4ad15
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/irreducible.ll
@@ -0,0 +1,421 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; A loop with multiple exits isn't irreducible.  It should be handled
+; correctly.
+;
+; CHECK-LABEL: Printing analysis {{.*}} for function 'multiexit':
+; CHECK-NEXT: block-frequency-info: multiexit
+define void @multiexit(i1 %x) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %loop.1
+
+; CHECK-NEXT: loop.1: float = 2.0,
+loop.1:
+  br i1 %x, label %exit.1, label %loop.2, !prof !0
+
+; CHECK-NEXT: loop.2: float = 1.75,
+loop.2:
+  br i1 %x, label %exit.2, label %loop.1, !prof !1
+
+; CHECK-NEXT: exit.1: float = 0.25,
+exit.1:
+  br label %return
+
+; CHECK-NEXT: exit.2: float = 0.75,
+exit.2:
+  br label %return
+
+; CHECK-NEXT: return: float = 1.0, int = [[ENTRY]]
+return:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 7}
+!1 = metadata !{metadata !"branch_weights", i32 3, i32 4}
+
+; Irreducible control flow
+; ========================
+;
+; LoopInfo defines a loop as a non-trivial SCC dominated by a single block,
+; called the header.  A given loop, L, can have sub-loops, which are loops
+; within the subgraph of L that excludes the header.
+;
+; In addition to loops, -block-freq has limited support for irreducible SCCs,
+; which are SCCs with multiple entry blocks.  Irreducible SCCs are discovered
+; on they fly, and modelled as loops with multiple headers.
+;
+; The headers of irreducible sub-SCCs consist of its entry blocks and all nodes
+; that are targets of a backedge within it (excluding backedges within true
+; sub-loops).
+;
+; -block-freq is currently designed to act like a block is inserted that
+; intercepts all the edges to the headers.  All backedges and entries point to
+; this block.  Its successors are the headers, which split the frequency
+; evenly.
+;
+; There are a number of testcases below.  Only the first two have detailed
+; explanations.
+;
+; Testcase #1
+; ===========
+;
+; In this case c1 and c2 should have frequencies of 15/7 and 13/7,
+; respectively.  To calculate this, consider assigning 1.0 to entry, and
+; distributing frequency iteratively (to infinity).  At the first iteration,
+; entry gives 3/4 to c1 and 1/4 to c2.  At every step after, c1 and c2 give 3/4
+; of what they have to each other.  Somehow, all of it comes out to exit.
+;
+;       c1 = 3/4 + 1/4*3/4 + 3/4*3^2/4^2 + 1/4*3^3/4^3 + 3/4*3^3/4^3 + ...
+;       c2 = 1/4 + 3/4*3/4 + 1/4*3^2/4^2 + 3/4*3^3/4^3 + 1/4*3^3/4^3 + ...
+;
+; Simplify by splitting up the odd and even terms of the series and taking out
+; factors so that the infite series matches:
+;
+;       c1 =  3/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;          +  3/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;       c2 =  1/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;          +  9/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;
+;       c1 = 15/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;       c2 = 13/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;
+; Since this geometric series sums to 16/7:
+;
+;       c1 = 15/7
+;       c2 = 13/7
+;
+; If we treat c1 and c2 as members of the same loop, the exit frequency of the
+; loop as a whole is 1/4, so the loop scale should be 4.  Summing c1 and c2
+; gives 28/7, or 4.0, which is nice confirmation of the math above.
+;
+; -block-freq currently treats the two nodes as equals.
+define void @multientry(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'multientry':
+; CHECK-NEXT: block-frequency-info: multientry
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %c1, label %c2, !prof !2
+
+c1:
+; CHECK-NEXT: c1: float = 2.0,
+; The "correct" answer is: float = 2.142857{{[0-9]*}},
+  br i1 %x, label %c2, label %exit, !prof !2
+
+c2:
+; CHECK-NEXT: c2: float = 2.0,
+; The "correct" answer is: float = 1.857142{{[0-9]*}},
+  br i1 %x, label %c1, label %exit, !prof !2
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; Testcase #2
+; ===========
+;
+; In this case c1 and c2 should be treated as equals in a single loop.  The
+; exit frequency is 1/3, so the scaling factor for the loop should be 3.0.  The
+; loop is entered 2/3 of the time, and c1 and c2 split the total loop frequency
+; evenly (1/2), so they should each have frequencies of 1.0 (3.0*2/3*1/2).
+; Another way of computing this result is by assigning 1.0 to entry and showing
+; that c1 and c2 should accumulate frequencies of:
+;
+;       1/3   +   2/9   +   4/27  +   8/81  + ...
+;     2^0/3^1 + 2^1/3^2 + 2^2/3^3 + 2^3/3^4 + ...
+;
+; At the first step, c1 and c2 each get 1/3 of the entry.  At each subsequent
+; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined.  This
+; infinite series sums to 1.
+;
+; Since the currently algorithm *always* assumes entry blocks are equal,
+; -block-freq gets the right answers here.
+define void @crossloops(i2 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops':
+; CHECK-NEXT: block-frequency-info: crossloops
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+c1:
+; CHECK-NEXT: c1: float = 1.0,
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+c2:
+; CHECK-NEXT: c2: float = 1.0,
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+
+!3 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2}
+
+; A true loop with irreducible control flow inside.
+define void @loop_around_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_around_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 4.0, int = [[HEAD:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !4
+
+left:
+; CHECK-NEXT: left: float = 8.0,
+  br i1 %x, label %right, label %loop.end, !prof !5
+
+right:
+; CHECK-NEXT: right: float = 8.0,
+  br i1 %x, label %left, label %loop.end, !prof !5
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 4.0, int = [[HEAD]]
+  br i1 %x, label %loop, label %exit, !prof !5
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!4 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!5 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; Two unrelated irreducible SCCs.
+define void @two_sccs(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'two_sccs':
+; CHECK-NEXT: block-frequency-info: two_sccs
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %a, label %b, !prof !6
+
+a:
+; CHECK-NEXT: a: float = 0.75,
+  br i1 %x, label %a.left, label %a.right, !prof !7
+
+a.left:
+; CHECK-NEXT: a.left: float = 1.5,
+  br i1 %x, label %a.right, label %exit, !prof !6
+
+a.right:
+; CHECK-NEXT: a.right: float = 1.5,
+  br i1 %x, label %a.left, label %exit, !prof !6
+
+b:
+; CHECK-NEXT: b: float = 0.25,
+  br i1 %x, label %b.left, label %b.right, !prof !7
+
+b.left:
+; CHECK-NEXT: b.left: float = 0.625,
+  br i1 %x, label %b.right, label %exit, !prof !8
+
+b.right:
+; CHECK-NEXT: b.right: float = 0.625,
+  br i1 %x, label %b.left, label %exit, !prof !8
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!6 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!7 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!8 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+
+; A true loop inside irreducible control flow.
+define void @loop_inside_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_inside_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_inside_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !9
+
+left:
+; CHECK-NEXT: left: float = 2.0,
+  br i1 %x, label %right, label %exit, !prof !10
+
+right:
+; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 6.0,
+  br i1 %x, label %loop, label %right.end, !prof !11
+
+right.end:
+; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]]
+  br i1 %x, label %left, label %exit, !prof !10
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!9 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!10 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!11 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+
+; Irreducible control flow in a branch that's in a true loop.
+define void @loop_around_branch_with_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 2.0, int = [[LOOP:[0-9]+]]
+  br i1 %x, label %normal, label %irreducible.entry, !prof !12
+
+normal:
+; CHECK-NEXT: normal: float = 1.5,
+  br label %loop.end
+
+irreducible.entry:
+; CHECK-NEXT: irreducible.entry: float = 0.5, int = [[IRREDUCIBLE:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !13
+
+left:
+; CHECK-NEXT: left: float = 1.0,
+  br i1 %x, label %right, label %irreducible.exit, !prof !12
+
+right:
+; CHECK-NEXT: right: float = 1.0,
+  br i1 %x, label %left, label %irreducible.exit, !prof !12
+
+irreducible.exit:
+; CHECK-NEXT: irreducible.exit: float = 0.5, int = [[IRREDUCIBLE]]
+  br label %loop.end
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 2.0, int = [[LOOP]]
+  br i1 %x, label %loop, label %exit, !prof !13
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!12 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!13 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+
+; Irreducible control flow between two true loops.
+define void @loop_around_branch_with_irreducible_around_loop(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible_around_loop':
+; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible_around_loop
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 3.0, int = [[LOOP:[0-9]+]]
+  br i1 %x, label %normal, label %irreducible, !prof !14
+
+normal:
+; CHECK-NEXT: normal: float = 2.0,
+  br label %loop.end
+
+irreducible:
+; CHECK-NEXT: irreducible: float = 1.0,
+  br i1 %x, label %left, label %right, !prof !15
+
+left:
+; CHECK-NEXT: left: float = 2.0,
+  br i1 %x, label %right, label %loop.end, !prof !16
+
+right:
+; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]]
+  br label %right.loop
+
+right.loop:
+; CHECK-NEXT: right.loop: float = 10.0,
+  br i1 %x, label %right.loop, label %right.end, !prof !17
+
+right.end:
+; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]]
+  br i1 %x, label %left, label %loop.end, !prof !16
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 3.0, int = [[LOOP]]
+  br i1 %x, label %loop, label %exit, !prof !14
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!14 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!15 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!16 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!17 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+
+; An irreducible SCC with a non-header.
+define void @nonheader(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nonheader':
+; CHECK-NEXT: block-frequency-info: nonheader
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !18
+
+left:
+; CHECK-NEXT: left: float = 1.0,
+  br i1 %x, label %bottom, label %exit, !prof !19
+
+right:
+; CHECK-NEXT: right: float = 1.0,
+  br i1 %x, label %bottom, label %exit, !prof !20
+
+bottom:
+; CHECK-NEXT: bottom: float = 1.0,
+  br i1 %x, label %left, label %right, !prof !18
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!18 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!19 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!20 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; An irreducible SCC with an irreducible sub-SCC.  In the current version of
+; -block-freq, this means an extra header.
+;
+; This testcases uses non-trivial branch weights.  The CHECK statements here
+; will start to fail if we change -block-freq to be more accurate.  Currently,
+; we expect left, right and top to be treated as equal headers.
+define void @nonentry_header(i1 %x, i2 %y) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nonentry_header':
+; CHECK-NEXT: block-frequency-info: nonentry_header
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !21
+
+left:
+; CHECK-NEXT: left: float = 3.0,
+  br i1 %x, label %top, label %bottom, !prof !22
+
+right:
+; CHECK-NEXT: right: float = 3.0,
+  br i1 %x, label %top, label %bottom, !prof !22
+
+top:
+; CHECK-NEXT: top: float = 3.0,
+  switch i2 %y, label %exit [ i2 0, label %left
+                              i2 1, label %right
+                              i2 2, label %bottom ], !prof !23
+
+bottom:
+; CHECK-NEXT: bottom: float = 4.5,
+  br label %top
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!21 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!22 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!23 = metadata !{metadata !"branch_weights", i32 8, i32 1, i32 3, i32 12}
diff --git a/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
new file mode 100644
index 0000000..9d27b6b
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_with_branch':
+; CHECK-NEXT: block-frequency-info: loop_with_branch
+define void @loop_with_branch(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  %skip_loop = call i1 @foo0(i32 %a)
+  br i1 %skip_loop, label %skip, label %header, !prof !0
+
+; CHECK-NEXT: skip: float = 0.25,
+skip:
+  br label %exit
+
+; CHECK-NEXT: header: float = 4.5,
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+  %i.next = add i32 %i, 1
+  %choose = call i2 @foo1(i32 %i)
+  switch i2 %choose, label %exit [ i2 0, label %left
+                                   i2 1, label %right ], !prof !1
+
+; CHECK-NEXT: left: float = 1.5,
+left:
+  br label %back
+
+; CHECK-NEXT: right: float = 2.25,
+right:
+  br label %back
+
+; CHECK-NEXT: back: float = 3.75,
+back:
+  br label %header
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret void
+}
+
+declare i1 @foo0(i32)
+declare i2 @foo1(i32)
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 3}
diff --git a/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
new file mode 100644
index 0000000..d93ffce
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loop_with_branches'
+; CHECK-NEXT: block-frequency-info: nested_loop_with_branches
+define void @nested_loop_with_branches(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  %v0 = call i1 @foo0(i32 %a)
+  br i1 %v0, label %exit, label %outer, !prof !0
+
+; CHECK-NEXT: outer: float = 12.0,
+outer:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %inner.end ], [ %i.next, %no_inner ]
+  %i.next = add i32 %i, 1
+  %do_inner = call i1 @foo1(i32 %i)
+  br i1 %do_inner, label %no_inner, label %inner, !prof !0
+
+; CHECK-NEXT: inner: float = 36.0,
+inner:
+  %j = phi i32 [ 0, %outer ], [ %j.next, %inner.end ]
+  %side = call i1 @foo3(i32 %j)
+  br i1 %side, label %left, label %right, !prof !0
+
+; CHECK-NEXT: left: float = 9.0,
+left:
+  %v4 = call i1 @foo4(i32 %j)
+  br label %inner.end
+
+; CHECK-NEXT: right: float = 27.0,
+right:
+  %v5 = call i1 @foo5(i32 %j)
+  br label %inner.end
+
+; CHECK-NEXT: inner.end: float = 36.0,
+inner.end:
+  %stay_inner = phi i1 [ %v4, %left ], [ %v5, %right ]
+  %j.next = add i32 %j, 1
+  br i1 %stay_inner, label %inner, label %outer, !prof !1
+
+; CHECK-NEXT: no_inner: float = 3.0,
+no_inner:
+  %continue = call i1 @foo6(i32 %i)
+  br i1 %continue, label %outer, label %exit, !prof !1
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret void
+}
+
+declare i1 @foo0(i32)
+declare i1 @foo1(i32)
+declare i1 @foo2(i32)
+declare i1 @foo3(i32)
+declare i1 @foo4(i32)
+declare i1 @foo5(i32)
+declare i1 @foo6(i32)
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 3, i32 1}
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
index b648cbb..40f1111 100644
--- a/test/Analysis/BranchProbabilityInfo/loop.ll
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -15,7 +15,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
@@ -55,8 +55,8 @@ for.body:
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
   call void @g1()
   br i1 %cmp27, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
+; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
 
 for.body3:
   %j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
@@ -91,8 +91,8 @@ do.body:
   %0 = load i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %do.body1, label %if.end
-; CHECK: edge do.body -> do.body1 probability is 62 / 124 = 50%
-; CHECK: edge do.body -> if.end probability is 62 / 124 = 50%
+; CHECK: edge do.body -> do.body1 probability is 16 / 32 = 50%
+; CHECK: edge do.body -> if.end probability is 16 / 32 = 50%
 
 do.body1:
   %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -165,7 +165,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
@@ -209,7 +209,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
@@ -261,14 +261,14 @@ for.body:
   %0 = load i32* %c, align 4
   %cmp1 = icmp eq i32 %0, %i.011
   br i1 %cmp1, label %for.inc5, label %if.end
-; CHECK: edge for.body -> for.inc5 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> if.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.inc5 probability is 16 / 32 = 50%
+; CHECK: edge for.body -> if.end probability is 16 / 32 = 50%
 
 if.end:
   call void @g1()
   br i1 %cmp38, label %for.body4, label %for.end
-; CHECK: edge if.end -> for.body4 probability is 62 / 124 = 50%
-; CHECK: edge if.end -> for.end probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.body4 probability is 20 / 32 = 62.5%
+; CHECK: edge if.end -> for.end probability is 12 / 32 = 37.5%
 
 for.body4:
   %j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
@@ -282,7 +282,7 @@ for.body4:
 for.end:
   call void @g3()
   br label %for.inc5
-; CHECK: edge for.end -> for.inc5 probability is 124 / 124 = 100%
+; CHECK: edge for.end -> for.inc5 probability is 16 / 16 = 100%
 
 for.inc5:
   %inc6 = add nsw i32 %i.011, 1
@@ -314,35 +314,35 @@ for.body:
   %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
   call void @g1()
   br i1 %cmp216, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
+; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
 
 for.body3:
   %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
   %0 = load i32* %c, align 4
   %cmp4 = icmp eq i32 %0, %j.017
   br i1 %cmp4, label %for.inc, label %if.end
-; CHECK: edge for.body3 -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge for.body3 -> if.end probability is 62 / 124 = 50%
+; CHECK: edge for.body3 -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge for.body3 -> if.end probability is 16 / 32 = 50%
 
 if.end:
   %1 = load i32* %arrayidx5, align 4
   %cmp6 = icmp eq i32 %1, %j.017
   br i1 %cmp6, label %for.inc, label %if.end8
-; CHECK: edge if.end -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge if.end -> if.end8 probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge if.end -> if.end8 probability is 16 / 32 = 50%
 
 if.end8:
   %2 = load i32* %arrayidx9, align 4
   %cmp10 = icmp eq i32 %2, %j.017
   br i1 %cmp10, label %for.inc, label %if.end12
-; CHECK: edge if.end8 -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge if.end8 -> if.end12 probability is 62 / 124 = 50%
+; CHECK: edge if.end8 -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge if.end8 -> if.end12 probability is 16 / 32 = 50%
 
 if.end12:
   call void @g2()
   br label %for.inc
-; CHECK: edge if.end12 -> for.inc probability is 124 / 124 = 100%
+; CHECK: edge if.end12 -> for.inc probability is 16 / 16 = 100%
 
 for.inc:
   %inc = add nsw i32 %j.017, 1
diff --git a/test/Analysis/BranchProbabilityInfo/pr18705.ll b/test/Analysis/BranchProbabilityInfo/pr18705.ll
new file mode 100644
index 0000000..9f239b4
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/pr18705.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+; Since neither of while.body's out-edges is an exit or a back edge,
+; calcLoopBranchHeuristics should return early without setting the weights.
+; calcFloatingPointHeuristics, which is run later, sets the weights.
+;
+; CHECK: edge while.body -> if.then probability is 20 / 32 = 62.5%
+; CHECK: edge while.body -> if.else probability is 12 / 32 = 37.5%
+
+define void @foo1(i32 %n, i32* nocapture %b, i32* nocapture %c, i32* nocapture %d, float* nocapture readonly %f0, float* nocapture readonly %f1) {
+entry:
+  %tobool8 = icmp eq i32 %n, 0
+  br i1 %tobool8, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  %0 = sext i32 %n to i64
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ %0, %while.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+  %b.addr.011 = phi i32* [ %b, %while.body.lr.ph ], [ %b.addr.1, %if.end ]
+  %d.addr.010 = phi i32* [ %d, %while.body.lr.ph ], [ %incdec.ptr4, %if.end ]
+  %c.addr.09 = phi i32* [ %c, %while.body.lr.ph ], [ %c.addr.1, %if.end ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %arrayidx = getelementptr inbounds float* %f0, i64 %indvars.iv.next
+  %1 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %f1, i64 %indvars.iv.next
+  %2 = load float* %arrayidx2, align 4
+  %cmp = fcmp une float %1, %2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.011, i64 1
+  %3 = load i32* %b.addr.011, align 4
+  %add = add nsw i32 %3, 12
+  store i32 %add, i32* %b.addr.011, align 4
+  br label %if.end
+
+if.else:
+  %incdec.ptr3 = getelementptr inbounds i32* %c.addr.09, i64 1
+  %4 = load i32* %c.addr.09, align 4
+  %sub = add nsw i32 %4, -13
+  store i32 %sub, i32* %c.addr.09, align 4
+  br label %if.end
+
+if.end:
+  %c.addr.1 = phi i32* [ %c.addr.09, %if.then ], [ %incdec.ptr3, %if.else ]
+  %b.addr.1 = phi i32* [ %incdec.ptr, %if.then ], [ %b.addr.011, %if.else ]
+  %incdec.ptr4 = getelementptr inbounds i32* %d.addr.010, i64 1
+  store i32 14, i32* %d.addr.010, align 4
+  %5 = trunc i64 %indvars.iv.next to i32
+  %tobool = icmp eq i32 %5, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
diff --git a/test/Analysis/CostModel/AArch64/lit.local.cfg b/test/Analysis/CostModel/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c420349
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
diff --git a/test/Analysis/CostModel/AArch64/select.ll b/test/Analysis/CostModel/AArch64/select.ll
new file mode 100644
index 0000000..216dc5d
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/select.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; CHECK-LABEL: select
+define void @select() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+  ; Vector values - check for vectors that have a high cost because they end up
+  ; scalarized.
+  ; CHECK: cost of 320 {{.*}} select
+  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
+
+  ; CHECK: cost of 160 {{.*}} select
+  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
+  ; CHECK: cost of 320 {{.*}} select
+  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
+
+  ; CHECK: cost of 80 {{.*}} select
+  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+  ; CHECK: cost of 160 {{.*}} select
+  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+  ; CHECK: cost of 320 {{.*}} select
+  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+
+    ret void
+}
diff --git a/test/Analysis/CostModel/AArch64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
new file mode 100644
index 0000000..0c9883c
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/store.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+; CHECK-LABEL: store
+define void @store() {
+    ; Stores of <2 x i64> should be expensive because we don't split them and
+    ; and unaligned 16b stores have bad performance.
+    ; CHECK: cost of 12 {{.*}} store
+    store <2 x i64> undef, <2 x i64> * undef
+
+    ; We scalarize the loads/stores because there is no vector register name for
+    ; these types (they get extended to v.4h/v.2s).
+    ; CHECK: cost of 16 {{.*}} store
+    store <2 x i8> undef, <2 x i8> * undef
+    ; CHECK: cost of 64 {{.*}} store
+    store <4 x i8> undef, <4 x i8> * undef
+    ; CHECK: cost of 16 {{.*}} load
+    load <2 x i8> * undef
+    ; CHECK: cost of 64 {{.*}} load
+    load <4 x i8> * undef
+
+    ret void
+}
diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/ARM64/lit.local.cfg
deleted file mode 100644
index 84ac981..0000000
--- a/test/Analysis/CostModel/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/ARM64/select.ll
deleted file mode 100644
index 216dc5d..0000000
--- a/test/Analysis/CostModel/ARM64/select.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-
-; CHECK-LABEL: select
-define void @select() {
-    ; Scalar values
-  ; CHECK: cost of 1 {{.*}} select
-  %v1 = select i1 undef, i8 undef, i8 undef
-  ; CHECK: cost of 1 {{.*}} select
-  %v2 = select i1 undef, i16 undef, i16 undef
-  ; CHECK: cost of 1 {{.*}} select
-  %v3 = select i1 undef, i32 undef, i32 undef
-  ; CHECK: cost of 1 {{.*}} select
-  %v4 = select i1 undef, i64 undef, i64 undef
-  ; CHECK: cost of 1 {{.*}} select
-  %v5 = select i1 undef, float undef, float undef
-  ; CHECK: cost of 1 {{.*}} select
-  %v6 = select i1 undef, double undef, double undef
-
-  ; Vector values - check for vectors that have a high cost because they end up
-  ; scalarized.
-  ; CHECK: cost of 320 {{.*}} select
-  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
-
-  ; CHECK: cost of 160 {{.*}} select
-  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
-  ; CHECK: cost of 320 {{.*}} select
-  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
-
-  ; CHECK: cost of 80 {{.*}} select
-  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-  ; CHECK: cost of 160 {{.*}} select
-  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-  ; CHECK: cost of 320 {{.*}} select
-  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-
-    ret void
-}
diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/ARM64/store.ll
deleted file mode 100644
index 0c9883c..0000000
--- a/test/Analysis/CostModel/ARM64/store.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-; CHECK-LABEL: store
-define void @store() {
-    ; Stores of <2 x i64> should be expensive because we don't split them and
-    ; and unaligned 16b stores have bad performance.
-    ; CHECK: cost of 12 {{.*}} store
-    store <2 x i64> undef, <2 x i64> * undef
-
-    ; We scalarize the loads/stores because there is no vector register name for
-    ; these types (they get extended to v.4h/v.2s).
-    ; CHECK: cost of 16 {{.*}} store
-    store <2 x i8> undef, <2 x i8> * undef
-    ; CHECK: cost of 64 {{.*}} store
-    store <4 x i8> undef, <4 x i8> * undef
-    ; CHECK: cost of 16 {{.*}} load
-    load <2 x i8> * undef
-    ; CHECK: cost of 64 {{.*}} load
-    load <4 x i8> * undef
-
-    ret void
-}
diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll
index daaa8f5..7d6a14e 100644
--- a/test/Analysis/CostModel/PowerPC/ext.ll
+++ b/test/Analysis/CostModel/PowerPC/ext.ll
@@ -13,7 +13,7 @@ define void @exts() {
   ; CHECK: cost of 1 {{.*}} sext
   %v3 = sext <4 x i16> undef to <4 x i32>
 
-  ; CHECK: cost of 216 {{.*}} sext
+  ; CHECK: cost of 112 {{.*}} sext
   %v4 = sext <8 x i16> undef to <8 x i32>
 
   ret void
diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll
index f51963d..8dc0031 100644
--- a/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -3,13 +3,13 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @insert(i32 %arg) {
-  ; CHECK: cost of 13 {{.*}} insertelement
+  ; CHECK: cost of 10 {{.*}} insertelement
   %x = insertelement <4 x i32> undef, i32 %arg, i32 0
   ret i32 undef
 }
 
 define i32 @extract(<4 x i32> %arg) {
-  ; CHECK: cost of 13 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
   %x = extractelement <4 x i32> %arg, i32 0
   ret i32 %x
 }
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 8145a1d..368f0a7 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -31,9 +31,15 @@ define i32 @loads(i32 %arg) {
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
-  ; CHECK: cost of 60 {{.*}} load
+  ; CHECK: cost of 48 {{.*}} load
   load <4 x i16>* undef, align 2
 
+  ; CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+
+  ; CHECK: cost of 46 {{.*}} load
+  load <3 x float>* undef, align 1
+
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
index 8eeee81..3b27b52 100644
--- a/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -58,3 +58,31 @@ for.end:                                          ; preds = %vector.body
 }
 
 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)  nounwind readnone
+
+define void @test3(float* nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %f, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+  store <4 x float> %2, <4 x float>* %1, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; CORE2: Printing analysis 'Cost Model Analysis' for function 'test3':
+; CORE2: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+
+; COREI7: Printing analysis 'Cost Model Analysis' for function 'test3':
+; COREI7: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+
+}
+
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll
new file mode 100644
index 0000000..c8e4557
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; SSE2: Found an estimated cost of 15 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test2(<8 x i32> %a) {
+  %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; SSE2: Found an estimated cost of 30 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i16> @test3(<8 x i16> %a) {
+  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test4(<16 x i16> %a) {
+  %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <8 x i16> @test5(<8 x i16> %a) {
+  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i8> @test7(<16 x i8> %a) {
+  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 320 for instruction:   %div
+; AVX2: Found an estimated cost of 320 for instruction:   %div
+}
+
+define <4 x i32> @test8(<4 x i32> %a) {
+  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 19 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test9(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 38 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test10(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 160 for instruction:   %div
+; AVX2: Found an estimated cost of 160 for instruction:   %div
+}
diff --git a/test/Analysis/CostModel/X86/vselect-cost.ll b/test/Analysis/CostModel/X86/vselect-cost.ll
new file mode 100644
index 0000000..2416777
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vselect-cost.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+
+; Verify the cost of vector select instructions.
+
+; SSE41 added blend instructions with an immediate for <2 x double> and
+; <4 x float>. Integers of the same size should also use those instructions.
+
+define <2 x i64> @test_2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2i64':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %sel = select <2 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+  %sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
+}
+
+define <2 x double> @test_2double(<2 x double> %a, <2 x double> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2double':
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %sel = select <2 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+  %sel = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
+  ret <2 x double> %sel
+}
+
+define <4 x i32> @test_4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i32':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+define <4 x float> @test_4float(<4 x float> %a, <4 x float> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4float':
+; SSE2: Cost Model: {{.*}} 7 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %sel
+}
+
+define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_16i8':
+; SSE2: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+  %sel = select <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
+}
+
+; AVX added blend instructions with an immediate for <4 x double> and
+; <8 x float>. Integers of the same size should also use those instructions.
+define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i64':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
+}
+
+define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4double':
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a, <4 x double> %b
+  ret <4 x double> %sel
+}
+
+define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8i32':
+; SSE2: Cost Model: {{.*}} 16 for instruction:   %sel = select <8 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+  %sel = select <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
+}
+
+define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8float':
+; SSE2: Cost Model: {{.*}} 14 for instruction:   %sel = select <8 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+  %sel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %a, <8 x float> %b
+  ret <8 x float> %sel
+}
+
+; AVX2
+define <16 x i16> @test_16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK:Printing analysis 'Cost Model Analysis' for function 'test_16i16':
+; SSE2: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <16 x i1>
+;;; FIXME: This AVX cost is obviously wrong. We shouldn't be scalarizing.
+; AVX: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+  %sel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
+}
+
+define <32 x i8> @test_32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_32i8':
+; SSE2: Cost Model: {{.*}} 64 for instruction:   %sel = select <32 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <32 x i1>
+;;; FIXME: This AVX cost is obviously wrong. We shouldn't be scalarizing.
+; AVX: Cost Model: {{.*}} 64 for instruction:   %sel = select <32 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <32 x i1>
+  %sel = select <32 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
+}
+
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
index 9308749..efebcc4 100644
--- a/test/Analysis/Delinearization/a.ll
+++ b/test/Analysis/Delinearization/a.ll
@@ -12,17 +12,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes.
 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
 
-; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes.
-; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>]
-
-; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes.
-; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>]
-
-; Function Attrs: nounwind uwtable
 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
 entry:
   %cmp32 = icmp sgt i64 %n, 0
diff --git a/test/Analysis/Delinearization/gcd_multiply_expr.ll b/test/Analysis/Delinearization/gcd_multiply_expr.ll
new file mode 100644
index 0000000..f962f6d
--- /dev/null
+++ b/test/Analysis/Delinearization/gcd_multiply_expr.ll
@@ -0,0 +1,153 @@
+; RUN: opt < %s -basicaa -da -analyze -delinearize
+;
+; a, b, c, d, g, h;
+; char *f;
+; static fn1(p1) {
+;   char *e = p1;
+;   for (; d;) {
+;     a = 0;
+;     for (;; ++a)
+;       for (; b; ++b)
+;         c = e[b + a];
+;   }
+; }
+;
+; fn2() {
+;   for (;;)
+;     fn1(&f[g * h]);
+; }
+
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@f = common global i8* null, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+
+define i32 @fn2() {
+entry:
+  %.pr = load i32* @d, align 4
+  %phitmp = icmp eq i32 %.pr, 0
+  br label %for.cond
+
+for.cond:
+  %0 = phi i1 [ true, %for.cond ], [ %phitmp, %entry ]
+  br i1 %0, label %for.cond, label %for.cond2thread-pre-split.preheader.i
+
+for.cond2thread-pre-split.preheader.i:
+  %1 = load i32* @g, align 4
+  %2 = load i32* @h, align 4
+  %mul = mul nsw i32 %2, %1
+  %3 = load i8** @f, align 4
+  %.pr.pre.i = load i32* @b, align 4
+  br label %for.cond2thread-pre-split.i
+
+for.cond2thread-pre-split.i:
+  %.pr.i = phi i32 [ 0, %for.inc5.i ], [ %.pr.pre.i, %for.cond2thread-pre-split.preheader.i ]
+  %storemerge.i = phi i32 [ %inc6.i, %for.inc5.i ], [ 0, %for.cond2thread-pre-split.preheader.i ]
+  store i32 %storemerge.i, i32* @a, align 4
+  %tobool31.i = icmp eq i32 %.pr.i, 0
+  br i1 %tobool31.i, label %for.inc5.i, label %for.body4.preheader.i
+
+for.body4.preheader.i:
+  %4 = icmp slt i32 %.pr.i, -7
+  %add.i = add i32 %storemerge.i, %mul
+  br i1 %4, label %for.body4.i.preheader, label %for.body4.ur.i.preheader
+
+for.body4.i.preheader:
+  %5 = sub i32 -8, %.pr.i
+  %6 = lshr i32 %5, 3
+  %7 = mul i32 %6, 8
+  br label %for.body4.i
+
+for.body4.i:
+  %8 = phi i32 [ %inc.7.i, %for.body4.i ], [ %.pr.i, %for.body4.i.preheader ]
+  %arrayidx.sum1 = add i32 %add.i, %8
+  %arrayidx.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum1
+  %9 = load i8* %arrayidx.i, align 1
+  %conv.i = sext i8 %9 to i32
+  store i32 %conv.i, i32* @c, align 4
+  %inc.i = add nsw i32 %8, 1
+  store i32 %inc.i, i32* @b, align 4
+  %arrayidx.sum2 = add i32 %add.i, %inc.i
+  %arrayidx.1.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum2
+  %10 = load i8* %arrayidx.1.i, align 1
+  %conv.1.i = sext i8 %10 to i32
+  store i32 %conv.1.i, i32* @c, align 4
+  %inc.1.i = add nsw i32 %8, 2
+  store i32 %inc.1.i, i32* @b, align 4
+  %arrayidx.sum3 = add i32 %add.i, %inc.1.i
+  %arrayidx.2.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum3
+  %11 = load i8* %arrayidx.2.i, align 1
+  %conv.2.i = sext i8 %11 to i32
+  store i32 %conv.2.i, i32* @c, align 4
+  %inc.2.i = add nsw i32 %8, 3
+  store i32 %inc.2.i, i32* @b, align 4
+  %arrayidx.sum4 = add i32 %add.i, %inc.2.i
+  %arrayidx.3.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum4
+  %12 = load i8* %arrayidx.3.i, align 1
+  %conv.3.i = sext i8 %12 to i32
+  store i32 %conv.3.i, i32* @c, align 4
+  %inc.3.i = add nsw i32 %8, 4
+  store i32 %inc.3.i, i32* @b, align 4
+  %arrayidx.sum5 = add i32 %add.i, %inc.3.i
+  %arrayidx.4.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum5
+  %13 = load i8* %arrayidx.4.i, align 1
+  %conv.4.i = sext i8 %13 to i32
+  store i32 %conv.4.i, i32* @c, align 4
+  %inc.4.i = add nsw i32 %8, 5
+  store i32 %inc.4.i, i32* @b, align 4
+  %arrayidx.sum6 = add i32 %add.i, %inc.4.i
+  %arrayidx.5.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum6
+  %14 = load i8* %arrayidx.5.i, align 1
+  %conv.5.i = sext i8 %14 to i32
+  store i32 %conv.5.i, i32* @c, align 4
+  %inc.5.i = add nsw i32 %8, 6
+  store i32 %inc.5.i, i32* @b, align 4
+  %arrayidx.sum7 = add i32 %add.i, %inc.5.i
+  %arrayidx.6.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum7
+  %15 = load i8* %arrayidx.6.i, align 1
+  %conv.6.i = sext i8 %15 to i32
+  store i32 %conv.6.i, i32* @c, align 4
+  %inc.6.i = add nsw i32 %8, 7
+  store i32 %inc.6.i, i32* @b, align 4
+  %arrayidx.sum8 = add i32 %add.i, %inc.6.i
+  %arrayidx.7.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum8
+  %16 = load i8* %arrayidx.7.i, align 1
+  %conv.7.i = sext i8 %16 to i32
+  store i32 %conv.7.i, i32* @c, align 4
+  %inc.7.i = add nsw i32 %8, 8
+  store i32 %inc.7.i, i32* @b, align 4
+  %tobool3.7.i = icmp sgt i32 %inc.7.i, -8
+  br i1 %tobool3.7.i, label %for.inc5.loopexit.ur-lcssa.i, label %for.body4.i
+
+for.inc5.loopexit.ur-lcssa.i:
+  %17 = add i32 %.pr.i, 8
+  %18 = add i32 %17, %7
+  %19 = icmp eq i32 %18, 0
+  br i1 %19, label %for.inc5.i, label %for.body4.ur.i.preheader
+
+for.body4.ur.i.preheader:
+  %.ph = phi i32 [ %18, %for.inc5.loopexit.ur-lcssa.i ], [ %.pr.i, %for.body4.preheader.i ]
+  br label %for.body4.ur.i
+
+for.body4.ur.i:
+  %20 = phi i32 [ %inc.ur.i, %for.body4.ur.i ], [ %.ph, %for.body4.ur.i.preheader ]
+  %arrayidx.sum = add i32 %add.i, %20
+  %arrayidx.ur.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum
+  %21 = load i8* %arrayidx.ur.i, align 1
+  %conv.ur.i = sext i8 %21 to i32
+  store i32 %conv.ur.i, i32* @c, align 4
+  %inc.ur.i = add nsw i32 %20, 1
+  store i32 %inc.ur.i, i32* @b, align 4
+  %tobool3.ur.i = icmp eq i32 %inc.ur.i, 0
+  br i1 %tobool3.ur.i, label %for.inc5.i.loopexit, label %for.body4.ur.i
+
+for.inc5.i.loopexit:
+  br label %for.inc5.i
+
+for.inc5.i:
+  %inc6.i = add nsw i32 %storemerge.i, 1
+  br label %for.cond2thread-pre-split.i
+}
diff --git a/test/Analysis/Delinearization/himeno_1.ll b/test/Analysis/Delinearization/himeno_1.ll
index 9458bd2..c94ca7a 100644
--- a/test/Analysis/Delinearization/himeno_1.ll
+++ b/test/Analysis/Delinearization/himeno_1.ll
@@ -31,16 +31,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
 ; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
-
-; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
-
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
diff --git a/test/Analysis/Delinearization/himeno_2.ll b/test/Analysis/Delinearization/himeno_2.ll
index a290066..c256384 100644
--- a/test/Analysis/Delinearization/himeno_2.ll
+++ b/test/Analysis/Delinearization/himeno_2.ll
@@ -31,16 +31,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
 ; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
-
-; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
-
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
diff --git a/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
new file mode 100644
index 0000000..01a4b96
--- /dev/null
+++ b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; Derived from the following code:
+;
+; void foo(long n, long m, long b, double A[n][m]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       A[2i+b][2j] = 1.0;
+; }
+
+; AddRec: {{((%m * %b * sizeof(double)) + %A),+,(2 * %m * sizeof(double))}<%for.i>,+,(2 * sizeof(double))}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>]
+
+
+define void @foo(i64 %n, i64 %m, i64 %b, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  %outerdim = mul nsw i64 %i, 2
+  %outerdim2 = add nsw i64 %outerdim, %b
+  %tmp = mul nsw i64 %outerdim2, %m
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %prodj = mul i64 %j, 2
+  %vlaarrayidx.sum = add i64 %prodj, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/lit.local.cfg b/test/Analysis/Delinearization/lit.local.cfg
index 19eebc0..c6106e4 100644
--- a/test/Analysis/Delinearization/lit.local.cfg
+++ b/test/Analysis/Delinearization/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll']
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 82cab16..ae80ebc 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
 
-; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>]
-
-; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index a1e779f..75080da 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
 
-; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>]
-
-; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
 entry:
   %add = add nsw i64 %p, %o
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index a52a4c9..e921444 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
 
-; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>]
-
-; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index d68a158..48bec08 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -13,11 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
 
-; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
index 7207420..810188f 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -1,4 +1,6 @@
 ; RUN: opt < %s -analyze -delinearize | FileCheck %s
+; XFAIL: *
+; We do not recognize anymore variable size arrays.
 
 ; extern void bar(long n, long m, double A[n][m]);
 ;
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
index 24f9583..aad0f09 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{(-1 + %o),+,%o}<%for.j>]
-
-; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
index e151610..9e406d1 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -12,16 +12,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
 
-; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>]
-
-; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes.
-; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>]
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
new file mode 100644
index 0000000..6a98507
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
@@ -0,0 +1,43 @@
+; RUN: opt -basicaa -da -analyze -da-delinearize < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double *A) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       *(A + i * n + j) = 1.0;
+;       *(A + j * m + i) = 2.0;
+; }
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %tmp = mul nsw i64 %i, %m
+  %vlaarrayidx.sum = add i64 %j, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %tmp1 = mul nsw i64 %j, %n
+  %vlaarrayidx.sum1 = add i64 %i, %tmp1
+  %arrayidx1 = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum1
+  store double 1.0, double* %arrayidx1
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/undef.ll b/test/Analysis/Delinearization/undef.ll
new file mode 100644
index 0000000..8ee64e3
--- /dev/null
+++ b/test/Analysis/Delinearization/undef.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -analyze -delinearize
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(double* %Ey) {
+entry:
+  br i1 undef, label %for.cond55.preheader, label %for.end324
+
+for.cond55.preheader:
+  %iz.069 = phi i64 [ %inc323, %for.inc322 ], [ 0, %entry ]
+  br i1 undef, label %for.cond58.preheader, label %for.inc322
+
+for.cond58.preheader:
+  %iy.067 = phi i64 [ %inc320, %for.end ], [ 0, %for.cond55.preheader ]
+  br i1 undef, label %for.body60, label %for.end
+
+for.body60:
+  %ix.062 = phi i64 [ %inc, %for.body60 ], [ 0, %for.cond58.preheader ]
+  %0 = mul i64 %iz.069, undef
+  %tmp5 = add i64 %iy.067, %0
+  %tmp6 = mul i64 %tmp5, undef
+  %arrayidx69.sum = add i64 undef, %tmp6
+  %arrayidx70 = getelementptr inbounds double* %Ey, i64 %arrayidx69.sum
+  %1 = load double* %arrayidx70, align 8
+  %inc = add nsw i64 %ix.062, 1
+  br i1 false, label %for.body60, label %for.end
+
+for.end:
+  %inc320 = add nsw i64 %iy.067, 1
+  br i1 undef, label %for.cond58.preheader, label %for.inc322
+
+for.inc322:
+  %inc323 = add nsw i64 %iz.069, 1
+  br i1 undef, label %for.cond55.preheader, label %for.end324
+
+for.end324:
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 5c17064..883a06d 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -24,7 +24,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee0':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - flow [<= <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -83,10 +83,10 @@ entry:
 ; CHECK: da analyze - output [* *]!
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee1':
-; DELIN: da analyze - none
-; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [* <>]!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - output [* *]!
 
@@ -218,7 +218,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee3':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-9 -9]!
+; DELIN: da analyze - flow [> >]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -336,7 +336,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee5':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [9 9]!
+; DELIN: da analyze - flow [< <]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -395,7 +395,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee6':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 -9]!
+; DELIN: da analyze - flow [=> <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -454,7 +454,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee7':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-1 0]!
+; DELIN: da analyze - flow [> <=]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -513,7 +513,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee8':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-1 -1]!
+; DELIN: da analyze - flow [> <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -571,7 +571,7 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee9':
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - flow [<= =|<]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
@@ -750,7 +750,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee12':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 -11]!
+; DELIN: da analyze - flow [= <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 7efa8b5..7eca18e 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -24,10 +24,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd0'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - flow [=> *|<]!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -85,10 +85,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd1'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -147,10 +147,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd2'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -410,10 +410,10 @@ entry:
 ; CHECK: da analyze - output [* *]!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd6'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - output [* *]!
 
diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll
index ebadb75..b8108d9 100644
--- a/test/Analysis/LazyCallGraph/basic.ll
+++ b/test/Analysis/LazyCallGraph/basic.ll
@@ -124,3 +124,53 @@ define void @test2() {
   load i8** bitcast (void ()** @h to i8**)
   ret void
 }
+
+; Verify the SCCs formed.
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f7
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f6
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f5
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f4
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f3
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f2
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f1
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test2
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f12
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f11
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f10
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f9
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f8
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test1
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test0
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index 0cdbdf5..31f06a4 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -98,3 +98,112 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK: Determining loop execution counts for: @test
 ; CHECK-NEXT: backedge-taken count is
 ; CHECK-NEXT: max backedge-taken count is -1
+
+; PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
+; CHECK-LABEL: @pr19799
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: max backedge-taken count is 1
+@a = common global i32 0, align 4
+
+define i32 @pr19799() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp eq i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %cmp.i = icmp slt i32 %storemerge1.i, 0
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
+
+; PR18886: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
+; CHECK-LABEL: @pr18886
+; CHECK: Loop %for.body: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body: max backedge-taken count is 3
+@aa = global i64 0, align 8
+
+define i32 @pr18886() {
+entry:
+  store i64 -21, i64* @aa, align 8
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ -21, %entry ], [ %add, %for.cond ]
+  %tobool = icmp eq i64 %storemerge1, 0
+  %add = add nsw i64 %storemerge1, 8
+  br i1 %tobool, label %return, label %for.cond
+
+for.cond:
+  store i64 %add, i64* @aa, align 8
+  %cmp = icmp slt i64 %add, 9
+  br i1 %cmp, label %for.body, label %return
+
+return:
+  %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  ret i32 %retval.0
+}
+
+; Here we have a must-exit loop latch that is not computable and a
+; may-exit early exit that can only have one non-exiting iteration
+; before the check is forever skipped.
+;
+; CHECK-LABEL: @cannot_compute_mustexit
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: Unpredictable max backedge-taken count. 
+@b = common global i32 0, align 4
+
+define i32 @cannot_compute_mustexit() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp eq i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %ld = load volatile i32* @b
+  %cmp.i = icmp ne i32 %ld, 0
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
+
+; This loop has two must-exits, both of which dominate the latch. The
+; MaxBECount should be the minimum of them.
+;
+; CHECK-LABEL: @two_mustexit
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: max backedge-taken count is 1
+define i32 @two_mustexit() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp sgt i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %cmp.i = icmp slt i32 %storemerge1.i, 3
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
-- 
cgit v1.1