From 36b56886974eae4f9c5ebc96befd3e7bfe5de338 Mon Sep 17 00:00:00 2001
From: Stephen Hines <srhines@google.com>
Date: Wed, 23 Apr 2014 16:57:46 -0700
Subject: Update to LLVM 3.5a.

Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
---
 test/Analysis/BasicAA/2007-11-05-SizeCrash.ll      |   2 +-
 .../BasicAA/2007-12-08-OutOfBoundsCrash.ll         |   2 +-
 .../BasicAA/2014-03-18-Maxlookup-reached.ll        |  36 +++++
 test/Analysis/BasicAA/noalias-bugs.ll              |  33 ++++
 test/Analysis/BasicAA/phi-aa.ll                    |  47 ++++++
 test/Analysis/BasicAA/pr18573.ll                   |  53 +++++++
 test/Analysis/CostModel/ARM/cast.ll                |  34 ++---
 test/Analysis/CostModel/ARM64/lit.local.cfg        |   3 +
 test/Analysis/CostModel/ARM64/select.ll            |  38 +++++
 test/Analysis/CostModel/ARM64/store.ll             |  22 +++
 test/Analysis/CostModel/PowerPC/ext.ll             |  21 +++
 test/Analysis/CostModel/PowerPC/load_store.ll      |   5 +
 test/Analysis/CostModel/X86/cast.ll                |  97 +++++++++---
 test/Analysis/CostModel/X86/cmp.ll                 |   4 +-
 test/Analysis/CostModel/X86/scalarize.ll           |  41 +++++
 test/Analysis/CostModel/X86/vshift-cost.ll         | 167 +++++++++++++++++++++
 test/Analysis/DependenceAnalysis/Banerjee.ll       | 107 ++++++++++++-
 test/Analysis/DependenceAnalysis/GCD.ll            |  83 +++++++++-
 test/Analysis/LazyCallGraph/basic.ll               | 126 ++++++++++++++++
 test/Analysis/Lint/address-spaces.ll               |  25 +++
 .../ScalarEvolution/2009-04-22-TruncCast.ll        |   2 +-
 .../ScalarEvolution/2012-03-26-LoadConstant.ll     |   2 +-
 .../ScalarEvolution/2012-05-18-LoopPredRecurse.ll  |   2 +-
 test/Analysis/ScalarEvolution/and-xor.ll           |  18 ++-
 test/Analysis/ScalarEvolution/fold.ll              |  26 ++++
 test/Analysis/ScalarEvolution/nsw-offset.ll        |   2 +-
 test/Analysis/ScalarEvolution/trip-count-pow2.ll   |  53 +++++++
 test/Analysis/ScalarEvolution/trip-count-switch.ll |  30 ++++
 test/Analysis/ScalarEvolution/xor-and.ll           |  13 --
 .../Analysis/ScalarEvolution/zext-signed-addrec.ll |  81 ++++++++++
 test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll  |   4 +-
 test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll  |  18 +--
 32 files changed, 1123 insertions(+), 74 deletions(-)
 create mode 100644 test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
 create mode 100644 test/Analysis/BasicAA/noalias-bugs.ll
 create mode 100644 test/Analysis/BasicAA/pr18573.ll
 create mode 100644 test/Analysis/CostModel/ARM64/lit.local.cfg
 create mode 100644 test/Analysis/CostModel/ARM64/select.ll
 create mode 100644 test/Analysis/CostModel/ARM64/store.ll
 create mode 100644 test/Analysis/CostModel/PowerPC/ext.ll
 create mode 100644 test/Analysis/CostModel/X86/scalarize.ll
 create mode 100644 test/Analysis/CostModel/X86/vshift-cost.ll
 create mode 100644 test/Analysis/LazyCallGraph/basic.ll
 create mode 100644 test/Analysis/Lint/address-spaces.ll
 create mode 100644 test/Analysis/ScalarEvolution/trip-count-pow2.ll
 create mode 100644 test/Analysis/ScalarEvolution/trip-count-switch.ll
 delete mode 100644 test/Analysis/ScalarEvolution/xor-and.ll
 create mode 100644 test/Analysis/ScalarEvolution/zext-signed-addrec.ll

(limited to 'test/Analysis')

diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index 563d332..32d9930 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
         %struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 }
         %struct.usb_bus = type { %struct.device* }
         %struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] }
-@uhci_pci_ids = external constant [1 x %struct.pci_device_id]           ; <[1 x %struct.pci_device_id]*> [#uses=1]
+@uhci_pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
 
 @__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids     
         ; <[1 x %struct.pci_device_id]*> [#uses=0]
diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index 52d0af1..cd997ea 100644
--- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 }
 	%struct.usb_bus = type { %struct.device* }
 	%struct.usb_hcd = type { %struct.usb_bus, [0 x i64] }
-@pci_ids = external constant [1 x %struct.pci_device_id]		; <[1 x %struct.pci_device_id]*> [#uses=1]
+@pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
 
 @__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids		; <[1 x %struct.pci_device_id]*> [#uses=0]
 
diff --git a/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll b/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
new file mode 100644
index 0000000..bc2512e
--- /dev/null
+++ b/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -gvn  -S | FileCheck %s
+
+; PR15967
+; BasicAA claims no alias when there is (due to a problem when the MaxLookup
+; limit was reached).
+
+target datalayout = "e"
+
+%struct.foo = type { i32, i32 }
+
+define i32 @main() {
+  %t = alloca %struct.foo, align 4
+  %1 = getelementptr inbounds %struct.foo* %t, i32 0, i32 0
+  store i32 1, i32* %1, align 4
+  %2 = getelementptr inbounds %struct.foo* %t, i64 1
+  %3 = bitcast %struct.foo* %2 to i8*
+  %4 = getelementptr inbounds i8* %3, i32 -1
+  store i8 0, i8* %4
+  %5 = getelementptr inbounds i8* %4, i32 -1
+  store i8 0, i8* %5
+  %6 = getelementptr inbounds i8* %5, i32 -1
+  store i8 0, i8* %6
+  %7 = getelementptr inbounds i8* %6, i32 -1
+  store i8 0, i8* %7
+  %8 = getelementptr inbounds i8* %7, i32 -1
+  store i8 0, i8* %8
+  %9 = getelementptr inbounds i8* %8, i32 -1
+  store i8 0, i8* %9
+  %10 = getelementptr inbounds i8* %9, i32 -1
+  store i8 0, i8* %10
+  %11 = getelementptr inbounds i8* %10, i32 -1
+  store i8 0, i8* %11
+  %12 = load i32* %1, align 4
+  ret i32 %12
+; CHECK: ret i32 %12
+}
diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll
new file mode 100644
index 0000000..2bcc14f
--- /dev/null
+++ b/test/Analysis/BasicAA/noalias-bugs.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; We incorrectly returned noalias in the example below for "ptr.64" and
+; "either_ptr.64".
+; PR18460
+
+%nested = type { %nested.i64 }
+%nested.i64 = type { i64 }
+
+define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
+                     i32 %a, i32 %b) {
+  %ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0
+  %ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0
+  %ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0
+  %cmp = icmp ult i32 %a, %b
+  %either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr
+  %either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0
+
+; Because either_ptr.64 and ptr.64 can alias (we used to return noalias)
+; elimination of the first store is not valid.
+
+; CHECK: store i64 2
+; CHECK: load
+; CHECK; store i64 1
+
+  store i64 2, i64* %ptr.64, align 8
+  %r = load i64* %either_ptr.64, align 8
+  store i64 1, i64* %ptr.64, align 8
+  ret i64 %r
+}
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 6aa26c1..74279e1 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -1,10 +1,14 @@
 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
 ; rdar://7282591
 
 @X = common global i32 0
 @Y = common global i32 0
 @Z = common global i32 0
 
+; CHECK-LABEL: foo
 ; CHECK:  NoAlias: i32* %P, i32* @Z
 
 define void @foo(i32 %cond) nounwind {
@@ -29,3 +33,46 @@ bb2:
 return:
   ret void
 }
+
+; Pointers can vary in between iterations of loops.
+; PR18068
+
+; CHECK-LABEL: pr18068
+; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+
+define i32 @pr18068(i32* %jj7, i32* %j) {
+entry:
+  %oa5 = alloca [100 x i32], align 16
+  br label %codeRepl
+
+codeRepl:
+  %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ]
+  %targetBlock = call i1 @cond(i32* %jj7)
+  br i1 %targetBlock, label %for.body, label %bye
+
+for.body:
+  %1 = load i32* %jj7, align 4
+  %idxprom4 = zext i32 %1 to i64
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4
+  %2 = load i32* %arrayidx5, align 4
+  %sub6 = sub i32 %2, 6
+  store i32 %sub6, i32* %arrayidx5, align 4
+  ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store.
+  %3 = load i32* %0, align 4
+  store i32 %3, i32* %arrayidx5, align 4
+  %sub11 = add i32 %1, -1
+  %idxprom12 = zext i32 %sub11 to i64
+  %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12
+  call void @inc(i32* %jj7)
+  br label %codeRepl
+
+bye:
+  %.reload = load i32* %jj7, align 4
+  ret i32 %.reload
+}
+
+declare i1 @cond(i32*)
+
+declare void @inc(i32*)
+
+
diff --git a/test/Analysis/BasicAA/pr18573.ll b/test/Analysis/BasicAA/pr18573.ll
new file mode 100644
index 0000000..1d2a316
--- /dev/null
+++ b/test/Analysis/BasicAA/pr18573.ll
@@ -0,0 +1,53 @@
+; RUN: opt %s -O2 -S | FileCheck %s
+
+; Check that llvm.x86.avx2.gather.d.ps.256 intrinsic is not eliminated as gather and store memory accesses are based on arr.ptr
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readonly
+declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) #0
+
+; Function Attrs: nounwind
+define <8 x float> @foo1(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 {
+allocas:
+  %vix = load <8 x i32>* %vix.ptr, align 4
+  %t1.ptr = getelementptr i8* %arr.ptr, i8 4
+  
+  %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
+  store i8 1, i8* %t1.ptr, align 4
+
+  %v2 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
+  %res = fadd <8 x float> %v1, %v2
+
+  ret <8 x float> %res
+}
+; CHECK: foo1
+; CHECK: llvm.x86.avx2.gather.d.ps.256
+; CHECK: store
+; CHECK: llvm.x86.avx2.gather.d.ps.256
+
+; Check that second gather is eliminated as gather and store memory accesses are based on different no-aliasing pointers
+
+; Function Attrs: nounwind
+define <8 x float> @foo2(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 {
+allocas:
+  %vix = load <8 x i32>* %vix.ptr, align 4
+  %t1.ptr = getelementptr i8* %arr.ptr, i8 4
+  
+  %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
+  store i8 1, i8* %t2.ptr, align 4
+
+  %v2 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
+  %res = fadd <8 x float> %v1, %v2
+
+  ret <8 x float> %res
+}
+; CHECK: foo2
+; CHECK: llvm.x86.avx2.gather.d.ps.256
+; CHECK: store
+; CHECK-NOT: llvm.x86.avx2.gather.d.ps.256
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind "target-cpu"="corei7-avx" "target-features"="+avx2,+popcnt,+cmov,+f16c,+rdrnd,+fma" }
+attributes #2 = { nounwind }
+
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 0cdd61c..662110f 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -221,9 +221,9 @@ define i32 @casts() {
   %r96 = fptoui <2 x float> undef to <2 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r97 = fptosi <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 24 {{.*}} fptoui
+  ; CHECK: cost of 28 {{.*}} fptoui
   %r98 = fptoui <2 x float> undef to <2 x i64>
-  ; CHECK: cost of 24 {{.*}} fptosi
+  ; CHECK: cost of 28 {{.*}} fptosi
   %r99 = fptosi <2 x float> undef to <2 x i64>
 
   ; CHECK: cost of 8 {{.*}} fptoui
@@ -242,9 +242,9 @@ define i32 @casts() {
   %r106 = fptoui <2 x double> undef to <2 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r107 = fptosi <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 24 {{.*}} fptoui
+  ; CHECK: cost of 28 {{.*}} fptoui
   %r108 = fptoui <2 x double> undef to <2 x i64>
-  ; CHECK: cost of 24 {{.*}} fptosi
+  ; CHECK: cost of 28 {{.*}} fptosi
   %r109 = fptosi <2 x double> undef to <2 x i64>
 
   ; CHECK: cost of 16 {{.*}} fptoui
@@ -263,9 +263,9 @@ define i32 @casts() {
   %r116 = fptoui <4 x float> undef to <4 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 48 {{.*}} fptoui
+  ; CHECK: cost of 56 {{.*}} fptoui
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK: cost of 48 {{.*}} fptosi
+  ; CHECK: cost of 56 {{.*}} fptosi
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
   ; CHECK: cost of 16 {{.*}} fptoui
@@ -284,9 +284,9 @@ define i32 @casts() {
   %r126 = fptoui <4 x double> undef to <4 x i32>
   ; CHECK: cost of 16 {{.*}} fptosi
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 48 {{.*}} fptoui
+  ; CHECK: cost of 56 {{.*}} fptoui
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK: cost of 48 {{.*}} fptosi
+  ; CHECK: cost of 56 {{.*}} fptosi
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
   ; CHECK: cost of 32 {{.*}} fptoui
@@ -305,9 +305,9 @@ define i32 @casts() {
   %r136 = fptoui <8 x float> undef to <8 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 96 {{.*}} fptoui
+  ; CHECK: cost of 112 {{.*}} fptoui
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK: cost of 96 {{.*}} fptosi
+  ; CHECK: cost of 112 {{.*}} fptosi
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
   ; CHECK: cost of 32 {{.*}} fptoui
@@ -326,9 +326,9 @@ define i32 @casts() {
   %r146 = fptoui <8 x double> undef to <8 x i32>
   ; CHECK: cost of 32 {{.*}} fptosi
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 96 {{.*}} fptoui
+  ; CHECK: cost of 112 {{.*}} fptoui
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK: cost of 96 {{.*}} fptosi
+  ; CHECK: cost of 112 {{.*}} fptosi
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
   ; CHECK: cost of 64 {{.*}} fptoui
@@ -347,9 +347,9 @@ define i32 @casts() {
   %r156 = fptoui <16 x float> undef to <16 x i32>
   ; CHECK: cost of 4 {{.*}} fptosi
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 192 {{.*}} fptoui
+  ; CHECK: cost of 224 {{.*}} fptoui
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK: cost of 192 {{.*}} fptosi
+  ; CHECK: cost of 224 {{.*}} fptosi
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
   ; CHECK: cost of 64 {{.*}} fptoui
@@ -368,9 +368,9 @@ define i32 @casts() {
   %r166 = fptoui <16 x double> undef to <16 x i32>
   ; CHECK: cost of 64 {{.*}} fptosi
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 192 {{.*}} fptoui
+  ; CHECK: cost of 224 {{.*}} fptoui
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK: cost of 192 {{.*}} fptosi
+  ; CHECK: cost of 224 {{.*}} fptosi
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
   ; CHECK: cost of 8 {{.*}} uitofp
@@ -528,7 +528,7 @@ define i32 @casts() {
   %r242 = uitofp <16 x i8> undef to <16 x double>
   ; CHECK: cost of 64 {{.*}} sitofp
   %r243 = sitofp <16 x i8> undef to <16 x double>
-  ; C4ECK: cost of 64 {{.*}} uitofp
+  ; CHECK: cost of 64 {{.*}} uitofp
   %r244 = uitofp <16 x i16> undef to <16 x double>
   ; CHECK: cost of 64 {{.*}} sitofp
   %r245 = sitofp <16 x i16> undef to <16 x double>
diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/ARM64/lit.local.cfg
new file mode 100644
index 0000000..84ac981
--- /dev/null
+++ b/test/Analysis/CostModel/ARM64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/ARM64/select.ll
new file mode 100644
index 0000000..216dc5d
--- /dev/null
+++ b/test/Analysis/CostModel/ARM64/select.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; CHECK-LABEL: select
+define void @select() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+  ; Vector values - check for vectors that have a high cost because they end up
+  ; scalarized.
+  ; CHECK: cost of 320 {{.*}} select
+  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
+
+  ; CHECK: cost of 160 {{.*}} select
+  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
+  ; CHECK: cost of 320 {{.*}} select
+  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
+
+  ; CHECK: cost of 80 {{.*}} select
+  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+  ; CHECK: cost of 160 {{.*}} select
+  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+  ; CHECK: cost of 320 {{.*}} select
+  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+
+    ret void
+}
diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/ARM64/store.ll
new file mode 100644
index 0000000..0c9883c
--- /dev/null
+++ b/test/Analysis/CostModel/ARM64/store.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+; CHECK-LABEL: store
+define void @store() {
+    ; Stores of <2 x i64> should be expensive because we don't split them and
+    ; and unaligned 16b stores have bad performance.
+    ; CHECK: cost of 12 {{.*}} store
+    store <2 x i64> undef, <2 x i64> * undef
+
+    ; We scalarize the loads/stores because there is no vector register name for
+    ; these types (they get extended to v.4h/v.2s).
+    ; CHECK: cost of 16 {{.*}} store
+    store <2 x i8> undef, <2 x i8> * undef
+    ; CHECK: cost of 64 {{.*}} store
+    store <4 x i8> undef, <4 x i8> * undef
+    ; CHECK: cost of 16 {{.*}} load
+    load <2 x i8> * undef
+    ; CHECK: cost of 64 {{.*}} load
+    load <4 x i8> * undef
+
+    ret void
+}
diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll
new file mode 100644
index 0000000..daaa8f5
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/ext.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @exts() {
+
+  ; CHECK: cost of 1 {{.*}} sext
+  %v1 = sext i16 undef to i32
+
+  ; CHECK: cost of 1 {{.*}} sext
+  %v2 = sext <2 x i16> undef to <2 x i32>
+
+  ; CHECK: cost of 1 {{.*}} sext
+  %v3 = sext <4 x i16> undef to <4 x i32>
+
+  ; CHECK: cost of 216 {{.*}} sext
+  %v4 = sext <8 x i16> undef to <8 x i32>
+
+  ret void
+}
+
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index c77cce9..8145a1d 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -29,6 +29,11 @@ define i32 @loads(i32 %arg) {
   ; CHECK: cost of 4 {{.*}} load
   load i128* undef, align 4
 
+  ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
+  ; this with a small expense, but we don't currently.
+  ; CHECK: cost of 60 {{.*}} load
+  load <4 x i16>* undef, align 2
+
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index f3c1283..7f97b17 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -1,10 +1,11 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 define i32 @add(i32 %arg) {
-
+; CHECK-LABEL: for function 'add'
   ; -- Same size registeres --
   ;CHECK: cost of 1 {{.*}} zext
   %A = zext <4 x i1> undef to <4 x i32>
@@ -33,57 +34,106 @@ define i32 @add(i32 %arg) {
 }
 
 define i32 @zext_sext(<8 x i1> %in) {
-  ;CHECK: cost of 6 {{.*}} zext
+; CHECK-AVX2-LABEL: for function 'zext_sext'
+; CHECK-AVX-LABEL: for function 'zext_sext'
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
   %Z = zext <8 x i1> %in to <8 x i32>
-  ;CHECK: cost of 9 {{.*}} sext
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 7 {{.*}} sext
   %S = sext <8 x i1> %in to <8 x i32>
 
-  ;CHECK: cost of 1 {{.*}} zext
+  ;CHECK-AVX2: cost of 1 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
   %A1 = zext <16 x i8> undef to <16 x i16>
-  ;CHECK: cost of 1 {{.*}} sext
+  ;CHECK-AVX2: cost of 1 {{.*}} sext
+  ;CHECK-AVX: cost of 4 {{.*}} sext
   %A2 = sext <16 x i8> undef to <16 x i16>
-  ;CHECK: cost of 1 {{.*}} sext
+  ;CHECK-AVX2: cost of 1 {{.*}} sext
+  ;CHECK-AVX: cost of 4 {{.*}} sext
   %A = sext <8 x i16> undef to <8 x i32>
-  ;CHECK: cost of 1 {{.*}} zext
+  ;CHECK-AVX2: cost of 1 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
   %B = zext <8 x i16> undef to <8 x i32>
-  ;CHECK: cost of 1 {{.*}} sext
+  ;CHECK-AVX2: cost of 1 {{.*}} sext
+  ;CHECK-AVX: cost of 4 {{.*}} sext
   %C = sext <4 x i32> undef to <4 x i64>
-  ;CHECK: cost of 6 {{.*}} sext
-  %C1 = sext <4 x i8> undef to <4 x i64>
-  ;CHECK: cost of 6 {{.*}} sext
-  %C2 = sext <4 x i16> undef to <4 x i64>
 
-  ;CHECK: cost of 1 {{.*}} zext
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
+  %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 7 {{.*}} sext
+  %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 3 {{.*}} zext
+  %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 6 {{.*}} sext
+  %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
+
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
+  %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 6 {{.*}} sext
+  %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
+
+  ;CHECK-AVX2: cost of 1 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
 
-  ;CHECK: cost of 1 {{.*}} trunc
+  ;CHECK-AVX2: cost of 2 {{.*}} trunc
+  ;CHECK-AVX: cost of 4 {{.*}} trunc
   %E = trunc <4 x i64> undef to <4 x i32>
-  ;CHECK: cost of 1 {{.*}} trunc
+  ;CHECK-AVX2: cost of 2 {{.*}} trunc
+  ;CHECK-AVX: cost of 5 {{.*}} trunc
   %F = trunc <8 x i32> undef to <8 x i16>
-  ;CHECK: cost of 2 {{.*}} trunc
+  ;CHECK-AVX2: cost of 4 {{.*}} trunc
+  ;CHECK-AVX: cost of 4 {{.*}} trunc
   %F1 = trunc <16 x i16> undef to <16 x i8>
-
-  ;CHECK: cost of 3 {{.*}} trunc
+  ;CHECK-AVX2: cost of 2 {{.*}} trunc
+  ;CHECK-AVX: cost of 4 {{.*}} trunc
+  %F2 = trunc <8 x i32> undef to <8 x i8>
+  ;CHECK-AVX2: cost of 2 {{.*}} trunc
+  ;CHECK-AVX: cost of 4 {{.*}} trunc
+  %F3 = trunc <4 x i64> undef to <4 x i8>
+
+  ;CHECK-AVX2: cost of 4 {{.*}} trunc
+  ;CHECK-AVX: cost of 9 {{.*}} trunc
   %G = trunc <8 x i64> undef to <8 x i32>
 
   ret i32 undef
 }
 
 define i32 @masks8(<8 x i1> %in) {
-  ;CHECK: cost of 6 {{.*}} zext
+; CHECK-AVX2-LABEL: for function 'masks8'
+; CHECK-AVX-LABEL: for function 'masks8'
+
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
   %Z = zext <8 x i1> %in to <8 x i32>
-  ;CHECK: cost of 9 {{.*}} sext
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 7 {{.*}} sext
   %S = sext <8 x i1> %in to <8 x i32>
   ret i32 undef
 }
 
 define i32 @masks4(<4 x i1> %in) {
-  ;CHECK: cost of 8 {{.*}} sext
+; CHECK-AVX2-LABEL: for function 'masks4'
+; CHECK-AVX-LABEL: for function 'masks4'
+
+  ;CHECK-AVX2: cost of 3 {{.*}} zext
+  ;CHECK-AVX: cost of 4 {{.*}} zext
+  %Z = zext <4 x i1> %in to <4 x i64>
+  ;CHECK-AVX2: cost of 3 {{.*}} sext
+  ;CHECK-AVX: cost of 6 {{.*}} sext
   %S = sext <4 x i1> %in to <4 x i64>
   ret i32 undef
 }
 
 define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+; CHECK-LABEL: for function 'sitofp4'
   ; CHECK: cost of 3 {{.*}} sitofp
   %A1 = sitofp <4 x i1> %a to <4 x float>
   ; CHECK: cost of 3 {{.*}} sitofp
@@ -107,6 +157,7 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+; CHECK-LABEL: for function 'sitofp8'
   ; CHECK: cost of 8 {{.*}} sitofp
   %A1 = sitofp <8 x i1> %a to <8 x float>
 
@@ -122,6 +173,7 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
 }
 
 define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+; CHECK-LABEL: for function 'uitofp4'
   ; CHECK: cost of 7 {{.*}} uitofp
   %A1 = uitofp <4 x i1> %a to <4 x float>
   ; CHECK: cost of 7 {{.*}} uitofp
@@ -145,6 +197,7 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+; CHECK-LABEL: for function 'uitofp8'
   ; CHECK: cost of 6 {{.*}} uitofp
   %A1 = uitofp <8 x i1> %a to <8 x float>
 
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
index 713b374..9f2bdb3 100644
--- a/test/Analysis/CostModel/X86/cmp.ll
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=AVX1 %s
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=AVX2 %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Analysis/CostModel/X86/scalarize.ll b/test/Analysis/CostModel/X86/scalarize.ll
new file mode 100644
index 0000000..fc25fcb
--- /dev/null
+++ b/test/Analysis/CostModel/X86/scalarize.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
+
+; Test vector scalarization costs.
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+%i4 = type <4 x i32>
+%i8 = type <2 x i64>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+declare %i4 @llvm.bswap.v4i32(%i4)
+declare %i8 @llvm.bswap.v2i64(%i8)
+
+declare %i4 @llvm.ctpop.v4i32(%i4)
+declare %i8 @llvm.ctpop.v2i64(%i8)
+
+; CHECK32-LABEL: test_scalarized_intrinsics
+; CHECK64-LABEL: test_scalarized_intrinsics
+define void @test_scalarized_intrinsics() {
+        %r1 = add %i8 undef, undef
+
+; CHECK32: cost of 12 {{.*}}bswap.v4i32
+; CHECK64: cost of 12 {{.*}}bswap.v4i32
+        %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}bswap.v2i64
+; CHECK64: cost of 6 {{.*}}bswap.v2i64
+        %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
+
+; CHECK32: cost of 12 {{.*}}ctpop.v4i32
+; CHECK64: cost of 12 {{.*}}ctpop.v4i32
+        %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}ctpop.v2i64
+; CHECK64: cost of 6 {{.*}}ctpop.v2i64
+        %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
+
+; CHECK32: ret
+; CHECK64: ret
+        ret void
+}
diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll
new file mode 100644
index 0000000..84d7246
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-cost.ll
@@ -0,0 +1,167 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+
+; Verify the cost of vector shift left instructions.
+
+; We always emit a single pmullw in the case of v8i16 vector shifts by
+; non-uniform constant.
+
+define <8 x i16> @test1(<8 x i16> %a) {
+  %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; CHECK: Found an estimated cost of 1 for instruction:   %shl
+
+
+define <8 x i16> @test2(<8 x i16> %a) {
+  %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
+  ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; CHECK: Found an estimated cost of 1 for instruction:   %shl
+
+
+; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
+; Make sure that the estimated cost is always 1 except for the case where
+; we only have SSE2 support. With SSE2, we are forced to special lower the
+; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
+
+define <4 x i32> @test3(<4 x i32> %a) {
+  %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
+  ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction:   %shl
+; SSE41: Found an estimated cost of 1 for instruction:   %shl
+; AVX: Found an estimated cost of 1 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+define <4 x i32> @test4(<4 x i32> %a) {
+  %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 6 for instruction:   %shl
+; SSE41: Found an estimated cost of 1 for instruction:   %shl
+; AVX: Found an estimated cost of 1 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <2 x i64> @test5(<2 x i64> %a) {
+  %shl = shl <2 x i64> %a, <i64 2, i64 3>
+  ret <2 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 20 for instruction:   %shl
+; SSE41: Found an estimated cost of 20 for instruction:   %shl
+; AVX: Found an estimated cost of 20 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; v16i16 and v8i32 shift left by non-uniform constant are lowered into
+; vector multiply instructions.  With AVX (but not AVX2), the vector multiply
+; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
+;
+; With AVX2, instruction vpmullw works with 256bit quantities and
+; therefore there is no need to split the resulting vector multiply into
+; a sequence of two multiply.
+;
+; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
+; the cost computed in the case of 'test1'. That is because the backend
+; simply emits 2 pmullw with no extract/insert.
+
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <16 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 2 for instruction:   %shl
+; SSE41: Found an estimated cost of 2 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
+; the cost computed in the case of 'test3'. That is because the multiply
+; is type-legalized into two 4i32 vector multiply.
+
+define <8 x i32> @test7(<8 x i32> %a) {
+  %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+  ret <8 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 12 for instruction:   %shl
+; SSE41: Found an estimated cost of 2 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <4 x i64> @test8(<4 x i64> %a) {
+  %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+  ret <4 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 40 for instruction:   %shl
+; SSE41: Found an estimated cost of 40 for instruction:   %shl
+; AVX: Found an estimated cost of 40 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; Same as 'test6', with the difference that the cost is double.
+
+define <32 x i16> @test9(<32 x i16> %a) {
+  %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <32 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 4 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+
+
+; Same as 'test7', except that now the cost is double.
+
+define <16 x i32> @test10(<16 x i32> %a) {
+  %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+  ret <16 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 24 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a sequence of
+; two vpsllvq instructions. Therefore, the expected cost is only 2.
+; In all other cases, this shift is scalarized as we don't have vpsllv
+; instructions.
+
+define <8 x i64> @test11(<8 x i64> %a) {
+  %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+  ret <8 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test11':
+; SSE2: Found an estimated cost of 80 for instruction:   %shl
+; SSE41: Found an estimated cost of 80 for instruction:   %shl
+; AVX: Found an estimated cost of 80 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+
+
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 09e8fd2..5c17064 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
 
 ; ModuleID = 'Banerjee.bc'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -21,6 +22,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee0':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ]
@@ -73,6 +82,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* *]!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee1':
+; DELIN: da analyze - none
+; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none
+; DELIN: da analyze - confused!
+; DELIN: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   %0 = add i64 %n, 1
   br label %for.cond1.preheader
@@ -140,6 +157,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee2':
+; DELIN: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -191,6 +216,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee3':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [-9 -9]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -242,6 +275,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee4':
+; DELIN: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -293,6 +334,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee5':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [9 9]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -344,6 +393,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee6':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [0 -9]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -395,6 +452,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee7':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [-1 0]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -446,6 +511,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee8':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [-1 -1]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -497,6 +570,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee9':
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [<= =|<]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -549,6 +630,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee10':
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [<> =]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -600,6 +689,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee11':
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [<= <>]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -651,6 +748,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'banerjee12':
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent flow [0 -11]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index bb31d11..7efa8b5 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
 
 ; ModuleID = 'GCD.bc'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -22,6 +23,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd0'
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [=> *|<]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -75,6 +84,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd1'
+; DELIN: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -129,6 +146,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd2'
+; DELIN: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -183,6 +208,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd3'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [<> *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -235,6 +268,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd4'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -297,6 +338,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 
+; DELIN: 'Dependence Analysis' for function 'gcd5'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [<> *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -360,6 +409,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* *]!
 
+; DELIN: 'Dependence Analysis' for function 'gcd6'
+; DELIN: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -432,6 +489,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* *]!
 
+; DELIN: 'Dependence Analysis' for function 'gcd7'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [* *|<]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -516,6 +581,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* *]!
 
+; DELIN: 'Dependence Analysis' for function 'gcd8'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -595,6 +668,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* *]!
 
+; DELIN: 'Dependence Analysis' for function 'gcd9'
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [* *|<]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - confused!
+; DELIN: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll
new file mode 100644
index 0000000..ebadb75
--- /dev/null
+++ b/test/Analysis/LazyCallGraph/basic.ll
@@ -0,0 +1,126 @@
+; RUN: opt -disable-output -passes=print-cg %s 2>&1 | FileCheck %s
+;
+; Basic validation of the call graph analysis used in the new pass manager.
+
+define void @f() {
+; CHECK-LABEL: Call edges in function: f
+; CHECK-NOT: ->
+
+entry:
+  ret void
+}
+
+; A bunch more functions just to make it easier to test several call edges at once.
+define void @f1() {
+  ret void
+}
+define void @f2() {
+  ret void
+}
+define void @f3() {
+  ret void
+}
+define void @f4() {
+  ret void
+}
+define void @f5() {
+  ret void
+}
+define void @f6() {
+  ret void
+}
+define void @f7() {
+  ret void
+}
+define void @f8() {
+  ret void
+}
+define void @f9() {
+  ret void
+}
+define void @f10() {
+  ret void
+}
+define void @f11() {
+  ret void
+}
+define void @f12() {
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @test0() {
+; CHECK-LABEL: Call edges in function: test0
+; CHECK-NEXT: -> f
+; CHECK-NOT: ->
+
+entry:
+  call void @f()
+  call void @f()
+  call void @f()
+  call void @f()
+  ret void
+}
+
+define void ()* @test1(void ()** %x) {
+; CHECK-LABEL: Call edges in function: test1
+; CHECK-NEXT: -> f12
+; CHECK-NEXT: -> f11
+; CHECK-NEXT: -> f10
+; CHECK-NEXT: -> f7
+; CHECK-NEXT: -> f9
+; CHECK-NEXT: -> f8
+; CHECK-NEXT: -> f6
+; CHECK-NEXT: -> f5
+; CHECK-NEXT: -> f4
+; CHECK-NEXT: -> f3
+; CHECK-NEXT: -> f2
+; CHECK-NEXT: -> f1
+; CHECK-NOT: ->
+
+entry:
+  br label %next
+
+dead:
+  br label %next
+
+next:
+  phi void ()* [ @f1, %entry ], [ @f2, %dead ]
+  select i1 true, void ()* @f3, void ()* @f4
+  store void ()* @f5, void ()** %x
+  call void @f6()
+  call void (void ()*, void ()*)* bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9)
+  invoke void @f10() to label %exit unwind label %unwind
+
+exit:
+  ret void ()* @f11
+
+unwind:
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+          cleanup
+  resume { i8*, i32 } { i8* bitcast (void ()* @f12 to i8*), i32 42 }
+}
+
+@g = global void ()* @f1
+@g1 = global [4 x void ()*] [void ()* @f2, void ()* @f3, void ()* @f4, void ()* @f5]
+@g2 = global {i8, void ()*, i8} {i8 1, void ()* @f6, i8 2}
+@h = constant void ()* @f7
+
+define void @test2() {
+; CHECK-LABEL: Call edges in function: test2
+; CHECK-NEXT: -> f7
+; CHECK-NEXT: -> f6
+; CHECK-NEXT: -> f5
+; CHECK-NEXT: -> f4
+; CHECK-NEXT: -> f3
+; CHECK-NEXT: -> f2
+; CHECK-NEXT: -> f1
+; CHECK-NOT: ->
+
+  load i8** bitcast (void ()** @g to i8**)
+  load i8** bitcast (void ()** getelementptr ([4 x void ()*]* @g1, i32 0, i32 2) to i8**)
+  load i8** bitcast (void ()** getelementptr ({i8, void ()*, i8}* @g2, i32 0, i32 1) to i8**)
+  load i8** bitcast (void ()** @h to i8**)
+  ret void
+}
diff --git a/test/Analysis/Lint/address-spaces.ll b/test/Analysis/Lint/address-spaces.ll
new file mode 100644
index 0000000..46ee1d7
--- /dev/null
+++ b/test/Analysis/Lint/address-spaces.ll
@@ -0,0 +1,25 @@
+; RUN: opt -lint < %s
+
+target datalayout = "p32:32:32-p1:16:16:16-n16:32"
+
+declare void @foo(i64) nounwind
+
+define i64 @test1(i32 addrspace(1)* %x) nounwind {
+  %y = ptrtoint i32 addrspace(1)* %x to i64
+  ret i64 %y
+}
+
+define <4 x i64> @test1_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+  %y = ptrtoint <4 x i32 addrspace(1)*> %x to <4 x i64>
+  ret <4 x i64> %y
+}
+
+define i32 addrspace(1)* @test2(i64 %x) nounwind {
+  %y = inttoptr i64 %x to i32 addrspace(1)*
+  ret i32 addrspace(1)* %y
+}
+
+define <4 x i32 addrspace(1)*> @test2_vector(<4 x i64> %x) nounwind {
+  %y = inttoptr <4 x i64> %x to <4 x i32 addrspace(1)*>
+  ret <4 x i32 addrspace(1)*> %y
+}
\ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index 3dacfbb..a845465 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1'
 ; CHECK-NOT: (trunc i{{.*}}ext
diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
index b88e33f..5746d1c 100644
--- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
+++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S
+; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S | FileCheck %s
 ; PR11882: ComputeLoadConstantCompareExitLimit crash.
 ;
 ; for.body is deleted leaving a loop-invariant load.
diff --git a/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
index 52e6683..66df9d1 100644
--- a/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
+++ b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
@@ -4,7 +4,7 @@
 ; getUDivExpr()->getZeroExtendExpr()->isLoopBackedgeGuardedBy()
 ;
 ; We actually want SCEV simplification to fail gracefully in this
-; case, so there's no output to check, just the absense of stack overflow.
+; case, so there's no output to check, just the absence of stack overflow.
 
 @c = common global i8 0, align 1
 
diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll
index 404ab91..ad636da 100644
--- a/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,11 +1,27 @@
 ; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 
+; CHECK-LABEL: @test1
 ; CHECK: -->  (zext
 ; CHECK: -->  (zext
 ; CHECK-NOT: -->  (zext
 
-define i32 @foo(i32 %x) {
+define i32 @test1(i32 %x) {
   %n = and i32 %x, 255
   %y = xor i32 %n, 255
   ret i32 %y
 }
+
+; ScalarEvolution shouldn't try to analyze %z into something like
+;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
+; or
+;   -->  (8 * (zext i1 (trunc i64 ((8 * %x) /u 8) to i1) to i64))
+
+; CHECK-LABEL: @test2
+; CHECK: -->  (8 * (zext i1 (trunc i64 %x to i1) to i64))
+
+define i64 @test2(i64 %x) {
+  %a = shl i64 %x, 3
+  %t = and i64 %a, 8
+  %z = xor i64 %t, 8
+  ret i64 %z
+}
diff --git a/test/Analysis/ScalarEvolution/fold.ll b/test/Analysis/ScalarEvolution/fold.ll
index 57006dd..ab57425 100644
--- a/test/Analysis/ScalarEvolution/fold.ll
+++ b/test/Analysis/ScalarEvolution/fold.ll
@@ -60,3 +60,29 @@ loop:
 exit:
   ret void
 }
+
+define void @test5(i32 %i) {
+; CHECK-LABEL: @test5
+  %A = and i32 %i, 1
+; CHECK: -->  (zext i1 (trunc i32 %i to i1) to i32)
+  %B = and i32 %i, 2
+; CHECK: -->  (2 * (zext i1 (trunc i32 (%i /u 2) to i1) to i32))
+  %C = and i32 %i, 63
+; CHECK: -->  (zext i6 (trunc i32 %i to i6) to i32)
+  %D = and i32 %i, 126
+; CHECK: -->  (2 * (zext i6 (trunc i32 (%i /u 2) to i6) to i32))
+  %E = and i32 %i, 64
+; CHECK: -->  (64 * (zext i1 (trunc i32 (%i /u 64) to i1) to i32))
+  %F = and i32 %i, -2147483648
+; CHECK: -->  (-2147483648 * (%i /u -2147483648))
+  ret void
+}
+
+define void @test6(i8 %x) {
+; CHECK-LABEL: @test6
+  %A = zext i8 %x to i16
+  %B = shl nuw i16 %A, 8
+  %C = and i16 %B, -2048
+; CHECK: -->  (2048 * ((zext i8 %x to i16) /u 8))
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
index 8969a5a..88cdcf2 100644
--- a/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -73,5 +73,5 @@ return:                                           ; preds = %bb1.return_crit_edg
   ret void
 }
 
-; CHECK: Loop %bb: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
 ; CHECK: Loop %bb: max backedge-taken count is 1073741822
diff --git a/test/Analysis/ScalarEvolution/trip-count-pow2.ll b/test/Analysis/ScalarEvolution/trip-count-pow2.ll
new file mode 100644
index 0000000..2c5b72e
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count-pow2.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+define void @test1(i32 %n) {
+entry:
+  %s = mul i32 %n, 96
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 32
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK: Loop %loop: backedge-taken count is ((-32 + (96 * %n)) /u 32)
+; CHECK: Loop %loop: max backedge-taken count is ((-32 + (96 * %n)) /u 32)
+}
+
+; PR19183
+define i32 @test2(i32 %n) {
+entry:
+  %s = and i32 %n, -32
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 32
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret i32 %i
+
+; CHECK-LABEL: @test2
+; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
+; CHECK: Loop %loop: max backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
+}
+
+define void @test3(i32 %n) {
+entry:
+  %s = mul i32 %n, 96
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 96
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+
+; CHECK-LABEL: @test3
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count-switch.ll b/test/Analysis/ScalarEvolution/trip-count-switch.ll
new file mode 100644
index 0000000..2d2b6b4
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count-switch.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+declare void @foo()
+
+define void @test1() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
+  switch i32 %i.0, label %if.end [
+    i32 0, label %for.end
+    i32 1, label %if.then
+  ]
+
+if.then:                                          ; preds = %for.cond
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %for.cond, %if.then
+  %dec = add nsw i32 %i.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK: Loop %for.cond: backedge-taken count is 2
+; CHECK: Loop %for.cond: max backedge-taken count is 2
+}
diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll
deleted file mode 100644
index 2616ea9..0000000
--- a/test/Analysis/ScalarEvolution/xor-and.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
-
-; ScalarEvolution shouldn't try to analyze %z into something like
-;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
-
-; CHECK: -->  (zext i4 (-8 + (trunc i64 (8 * %x) to i4)) to i64)
-
-define i64 @foo(i64 %x) {
-  %a = shl i64 %x, 3
-  %t = and i64 %a, 8
-  %z = xor i64 %t, 8
-  ret i64 %z
-}
diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
new file mode 100644
index 0000000..27aed3b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
@@ -0,0 +1,81 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; PR18000
+
+target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global i32 0, align 4
+@b = common global i32 0, align 4
+@e = common global i8 0, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: nounwind optsize uwtable
+; CHECK-LABEL: foo
+define i32 @foo() {
+entry:
+  %.pr = load i32* @b, align 4
+  %cmp10 = icmp slt i32 %.pr, 1
+  br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge
+
+entry.for.end9_crit_edge:                         ; preds = %entry
+  %.pre = load i32* @c, align 4
+  br label %for.end9
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split
+
+for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8
+  %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ]
+  br label %if.end
+
+; CHECK-LABEL: if.end
+if.end:                                           ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ]
+  %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ]
+
+  %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ]
+  %conv7 = mul i32 %indvars.iv, 258
+  %shl = and i32 %conv7, 510
+  store i32 %shl, i32* @c, align 4
+
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258
+  %dec = add i8 %2, -1
+
+  %cmp2 = icmp sgt i8 %dec, -1
+  %indvars.iv.next = add i32 %indvars.iv, -1
+  br i1 %cmp2, label %if.end, label %for.inc8
+
+for.inc8:                                         ; preds = %if.end
+  store i32 0, i32* @d, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @b, align 4
+  %cmp = icmp slt i32 %1, 0
+  br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge
+
+for.cond.for.end9_crit_edge:                      ; preds = %for.inc8
+  store i8 %dec, i8* @e, align 1
+  br label %for.end9
+
+for.end9:                                         ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge
+  %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ]
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  br label %return
+
+return.loopexit.split:                            ; preds = %for.cond1.preheader.lr.ph
+  store i8 1, i8* @e, align 1
+  store i32 0, i32* @d, align 4
+  br label %return
+
+return:                                           ; preds = %return.loopexit.split, %for.end9
+  %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...)
+
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index 6fd6eac..cdf7281 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -18,8 +18,8 @@ define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
-; CHECK [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
-; CHECK [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
 !0 = metadata !{metadata !"tbaa root", null}
 !1 = metadata !{metadata !3, metadata !3, i64 0}
 !2 = metadata !{metadata !4, metadata !4, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
index 0cd5c30..e1c5d45 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -43,7 +43,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i16 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %s.addr = alloca i32*, align 8
   %A.addr = alloca %struct.StructA*, align 8
@@ -98,7 +98,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i16 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %A.addr = alloca %struct.StructA*, align 8
   %B.addr = alloca %struct.StructB*, align 8
@@ -127,7 +127,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %A.addr = alloca %struct.StructA*, align 8
   %B.addr = alloca %struct.StructB*, align 8
@@ -155,7 +155,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %A.addr = alloca %struct.StructA*, align 8
   %B.addr = alloca %struct.StructB*, align 8
@@ -184,7 +184,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %A.addr = alloca %struct.StructA*, align 8
   %S.addr = alloca %struct.StructS*, align 8
@@ -212,7 +212,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i16 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %A.addr = alloca %struct.StructA*, align 8
   %S.addr = alloca %struct.StructS*, align 8
@@ -240,7 +240,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %S.addr = alloca %struct.StructS*, align 8
   %S2.addr = alloca %struct.StructS2*, align 8
@@ -268,7 +268,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i16 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %S.addr = alloca %struct.StructS*, align 8
   %S2.addr = alloca %struct.StructS2*, align 8
@@ -296,7 +296,7 @@ entry:
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; Remove a load and propogate the value from store.
+; Remove a load and propagate the value from store.
 ; OPT: ret i32 1
   %C.addr = alloca %struct.StructC*, align 8
   %D.addr = alloca %struct.StructD*, align 8
-- 
cgit v1.1