Update to LLVM 3.5a.

Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
author: Stephen Hines <srhines@google.com> 2014-04-23 16:57:46 -0700
committer: Stephen Hines <srhines@google.com> 2014-04-24 15:53:16 -0700
commit: 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree: e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /test/Transforms
parent: 69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download: external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
199 files changed, 6826 insertions, 301 deletions
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
new file mode 100644
index 0000000..b12cbee
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Basic DWARF discriminator test. All the instructions in block
+; 'if.then' should have a different discriminator value than
+; the conditional branch at the end of block 'entry'.
+;
+; Original code:
+;
+;       void foo(int i) {
+;         int x;
+;         if (i < 10) x = i;
+;       }
+
+define void @foo(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4, !dbg !10
+  %cmp = icmp slt i32 %0, 10, !dbg !10
+  br i1 %cmp, label %if.then, label %if.end, !dbg !10
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* %i.addr, align 4, !dbg !10
+; CHECK:  %1 = load i32* %i.addr, align 4, !dbg !12
+
+  store i32 %1, i32* %x, align 4, !dbg !10
+; CHECK:  store i32 %1, i32* %x, align 4, !dbg !12
+
+  br label %if.end, !dbg !10
+; CHECK:   br label %if.end, !dbg !12
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"basic.c", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [basic.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5 "}
+!10 = metadata !{i32 3, i32 0, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [basic.c]
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
+
+; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
+; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./basic.c]
+; CHECK: !14 = metadata !{i32 4, i32 0, metadata !4, null}
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
new file mode 100644
index 0000000..f3b0357
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Test that the only instructions that receive a new discriminator in
+; the block 'if.then' are those that share the same line number as
+; the branch in 'entry'.
+;
+; Original code:
+;
+;       void foo(int i) {
+;         int x, y;
+;         if (i < 10) { x = i;
+;             y = -i;
+;         }
+;       }
+
+define void @foo(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4, !dbg !10
+  %cmp = icmp slt i32 %0, 10, !dbg !10
+  br i1 %cmp, label %if.then, label %if.end, !dbg !10
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* %i.addr, align 4, !dbg !12
+  store i32 %1, i32* %x, align 4, !dbg !12
+
+  %2 = load i32* %i.addr, align 4, !dbg !14
+; CHECK:  %2 = load i32* %i.addr, align 4, !dbg !15
+
+  %sub = sub nsw i32 0, %2, !dbg !14
+; CHECK:  %sub = sub nsw i32 0, %2, !dbg !15
+
+  store i32 %sub, i32* %y, align 4, !dbg !14
+; CHECK:  store i32 %sub, i32* %y, align 4, !dbg !15
+
+  br label %if.end, !dbg !15
+; CHECK:  br label %if.end, !dbg !16
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void, !dbg !16
+; CHECK:  ret void, !dbg !17
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"first-only.c", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [first-only.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
+!10 = metadata !{i32 3, i32 0, metadata !11, null}
+
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0}
+
+!12 = metadata !{i32 3, i32 0, metadata !13, null}
+
+!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !14, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./first-only.c]
+
+!14 = metadata !{i32 4, i32 0, metadata !13, null}
+; CHECK: !14 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1}
+
+!15 = metadata !{i32 5, i32 0, metadata !13, null}
+; CHECK: !15 = metadata !{i32 4, i32 0, metadata !14, null}
+
+!16 = metadata !{i32 6, i32 0, metadata !4, null}
+; CHECK: !16 = metadata !{i32 5, i32 0, metadata !14, null}
+; CHECK: !17 = metadata !{i32 6, i32 0, metadata !4, null}
+
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
new file mode 100644
index 0000000..0241a0c
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for multiple CFG paths on the same line.
+;
+;       void foo(int i) {
+;         int x;
+;         if (i < 10) x = i; else x = -i;
+;       }
+;
+; The two stores inside the if-then-else line must have different discriminator
+; values.
+
+define void @foo(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4, !dbg !10
+  %cmp = icmp slt i32 %0, 10, !dbg !10
+  br i1 %cmp, label %if.then, label %if.else, !dbg !10
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* %i.addr, align 4, !dbg !10
+; CHECK:  %1 = load i32* %i.addr, align 4, !dbg !12
+
+  store i32 %1, i32* %x, align 4, !dbg !10
+; CHECK:  store i32 %1, i32* %x, align 4, !dbg !12
+
+  br label %if.end, !dbg !10
+; CHECK:  br label %if.end, !dbg !12
+
+if.else:                                          ; preds = %entry
+  %2 = load i32* %i.addr, align 4, !dbg !10
+; CHECK:  %2 = load i32* %i.addr, align 4, !dbg !14
+
+  %sub = sub nsw i32 0, %2, !dbg !10
+; CHECK:  %sub = sub nsw i32 0, %2, !dbg !14
+
+  store i32 %sub, i32* %x, align 4, !dbg !10
+; CHECK:  store i32 %sub, i32* %x, align 4, !dbg !14
+
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"multiple.c", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [multiple.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
+!10 = metadata !{i32 3, i32 0, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [multiple.c]
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
+
+; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
+; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./multiple.c]
+; CHECK: !14 = metadata !{i32 3, i32 0, metadata !15, null}
+; CHECK: !15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 2, i32 1} ; [ DW_TAG_lexical_block ] [./multiple.c]
diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll
new file mode 100644
index 0000000..513a968
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -0,0 +1,49 @@
+; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i32 }
+
+; Argpromote + scalarrepl should change this to passing the two integers by value.
+define internal i32 @f(%struct.ss* inalloca  %s) {
+entry:
+  %f0 = getelementptr %struct.ss* %s, i32 0, i32 0
+  %f1 = getelementptr %struct.ss* %s, i32 0, i32 1
+  %a = load i32* %f0, align 4
+  %b = load i32* %f1, align 4
+  %r = add i32 %a, %b
+  ret i32 %r
+}
+; CHECK-LABEL: define internal i32 @f
+; CHECK-NOT: load
+; CHECK: ret
+
+define i32 @main() {
+entry:
+  %S = alloca %struct.ss
+  %f0 = getelementptr %struct.ss* %S, i32 0, i32 0
+  %f1 = getelementptr %struct.ss* %S, i32 0, i32 1
+  store i32 1, i32* %f0, align 4
+  store i32 2, i32* %f1, align 4
+  %r = call i32 @f(%struct.ss* inalloca %S)
+  ret i32 %r
+}
+; CHECK-LABEL: define i32 @main
+; CHECK-NOT: load
+; CHECK: ret
+
+; Argpromote can't promote %a because of the icmp use.
+define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
+; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
+entry:
+  %c = icmp eq %struct.ss* %a, %b
+  ret i1 %c
+}
+
+define i32 @test() {
+entry:
+  %S = alloca %struct.ss
+  %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+  ret i32 0
+}
diff --git a/test/Transforms/ArgumentPromotion/tail.ll b/test/Transforms/ArgumentPromotion/tail.ll
new file mode 100644
index 0000000..43b8996
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/tail.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -argpromotion -S -o - | FileCheck %s
+; PR14710
+
+%pair = type { i32, i32 }
+
+declare i8* @foo(%pair*)
+
+define internal void @bar(%pair* byval %Data) {
+; CHECK: define internal void @bar(i32 %Data.0, i32 %Data.1)
+; CHECK: %Data = alloca %pair
+; CHECK-NOT: tail
+; CHECK: call i8* @foo(%pair* %Data)
+  tail call i8* @foo(%pair* %Data)
+  ret void
+}
+
+define void @zed(%pair* byval %Data) {
+  call void @bar(%pair* byval %Data)
+  ret void
+}
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index e33ac61..e90900a 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -126,8 +126,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 
 ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
 
 ; CHECK: attributes #0 = { nounwind readnone }
-; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index 9d82819..598ea0e 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -o - | llc -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -std-compile-opts -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
 
 ;; We don't want branch folding to fold asm directives.
 
diff --git a/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
new file mode 100644
index 0000000..430b992
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
@@ -0,0 +1,64 @@
+; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The first cast should be sunk into block2, in order that the
+; instruction selector can form an efficient
+; i64 * i64 -> i128 multiplication.
+define i128 @sink(i64* %mem1, i64* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+block1:
+  %l1 = load i64* %mem1
+  %s1 = sext i64 %l1 to i128
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: sext
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %l2 = load i64* %mem2
+  %s2 = sext i64 %l2 to i128
+  %res = mul i128 %s1, %s2
+  ret i128 %res
+}
+
+; The first cast should be hoisted into block1, in order that the
+; instruction selector can form an extend-load.
+define i64 @hoist(i32* %mem1, i32* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block1:
+  %l1 = load i32* %mem1
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %s1 = sext i32 %l1 to i64
+  %l2 = load i32* %mem2
+  %s2 = sext i32 %l2 to i64
+  %res = mul i64 %s1, %s2
+  ret i64 %res
+}
+
+; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
+; loop.
+define i128 @use_ext_source() {
+block1:
+  %v1 = or i64 undef, undef
+  %v2 = zext i64 %v1 to i128
+  br i1 undef, label %block2, label %block3
+
+block2:
+  %v3 = add i64 %v1, 1
+  %v4 = zext i64 %v3 to i128
+  br label %block3
+
+block3:
+  %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
+  ret i128 %res
+}
diff --git a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
new file mode 100644
index 0000000..ba763cf
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
new file mode 100644
index 0000000..e945b03
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
@@ -0,0 +1,105 @@
+; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
+; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin10.9.0"
+
+define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_8bit
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK-NOT: shufflevector
+; CHECK: shl <16 x i8> %lhs, %mask
+  %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <16 x i8> %mask
+
+if_false:
+  %res = shl <16 x i8> %lhs, %mask
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_16bit
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK: shl <8 x i16> %lhs, [[SPLAT]]
+  %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <8 x i16> %mask
+
+if_false:
+  %res = shl <8 x i16> %lhs, %mask
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_notsplat
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK-NOT: shufflevector
+; CHECK: shl <4 x i32> %lhs, %mask
+  %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <4 x i32> %mask
+
+if_false:
+  %res = shl <4 x i32> %lhs, %mask
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-AVX2-LABEL: @test_32bit
+; CHECK-AVX2: if_false:
+; CHECK-AVX2-NOT: shufflevector
+; CHECK-AVX2: ashr <4 x i32> %lhs, %mask
+
+; CHECK-SSE2-LABEL: @test_32bit
+; CHECK-SSE2: if_false:
+; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]]
+  %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <4 x i32> %mask
+
+if_false:
+  %res = ashr <4 x i32> %lhs, %mask
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
+; CHECK-AVX2-LABEL: @test_64bit
+; CHECK-AVX2: if_false:
+; CHECK-AVX2-NOT: shufflevector
+; CHECK-AVX2: lshr <2 x i64> %lhs, %mask
+
+; CHECK-SSE2-LABEL: @test_64bit
+; CHECK-SSE2: if_false:
+; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]]
+
+  %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <2 x i64> %mask
+
+if_false:
+  %res = lshr <2 x i64> %lhs, %mask
+  ret <2 x i64> %res
+}
diff --git a/test/Transforms/ConstantHoisting/X86/const-base-addr.ll b/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
new file mode 100644
index 0000000..01e6cdf
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if even cheap base addresses are hoisted.
+define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK:        %1 = inttoptr i32 %const to %T*
+; CHECK:        %addr1 = getelementptr %T* %1, i32 0, i32 1
+  %addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
+  %tmp1 = load i32* %addr1
+  %addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
+  %tmp2 = load i32* %addr2
+  %addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
+  %tmp3 = load i32* %addr3
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+}
+
diff --git a/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
new file mode 100644
index 0000000..f8e478e
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
@@ -0,0 +1,16 @@
+; Test if this compiles without assertions.
+; RUN: opt -S -consthoist < %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%T = type { i32, i32, i32, i32 }
+
+define i32 @test1() nounwind {
+  %base = inttoptr i32 12345678 to %T*
+  %addr1 = getelementptr %T* %base, i32 0, i32 1
+  %addr2 = getelementptr %T* %base, i32 0, i32 2
+  %addr3 = getelementptr %T* %base, i32 0, i32 3
+  ret i32 12345678
+}
+
diff --git a/test/Transforms/ConstantHoisting/X86/lit.local.cfg b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
new file mode 100644
index 0000000..ba763cf
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/ConstantHoisting/X86/phi.ll b/test/Transforms/ConstantHoisting/X86/phi.ll
new file mode 100644
index 0000000..086df14
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/phi.ll
@@ -0,0 +1,116 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; PR18626
+define i8* @test1(i1 %cmp, i64* %tmp) {
+entry:
+  call void @foo(i8* inttoptr (i64 68719476735 to i8*))
+  br i1 %cmp, label %if.end, label %return
+
+if.end:                                           ; preds = %bb1
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br label %return
+
+return:
+  %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ]
+  store i64 1172321806, i64* %tmp
+  ret i8* %retval.0
+
+; CHECK-LABEL: @test1
+; CHECK: if.end:
+; CHECK: %2 = inttoptr i64 %const to i8*
+; CHECK-NEXT: br
+; CHECK: return:
+; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ]
+}
+
+define void @test2(i1 %cmp, i64** %tmp) {
+entry:
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br i1 %cmp, label %if.end, label %return
+
+if.end:                                           ; preds = %bb1
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br label %return
+
+return:
+  store i64* inttoptr (i64 68719476735 to i64*), i64** %tmp
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: return:
+; CHECK-NEXT: %const_mat = add i64 %const, -1
+; CHECK-NEXT: inttoptr i64 %const_mat to i64*
+}
+
+declare void @foo(i8*)
+
+; PR18768
+define i32 @test3(i1 %c) {
+entry:
+  br i1 %c, label %if.then, label %if.end3
+
+if.then:                                          ; preds = %entry
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then, %entry
+  %d.0 = phi i32* [ inttoptr (i64 985162435264511 to i32*), %entry ], [ null, %if.then ]
+  %cmp4 = icmp eq i32* %d.0, inttoptr (i64 985162435264511 to i32*)
+  %cmp6 = icmp eq i32* %d.0, inttoptr (i64 985162418487296 to i32*)
+  %or = or i1 %cmp4, %cmp6
+  br i1 %or, label %if.then8, label %if.end9
+
+if.then8:                                         ; preds = %if.end3
+  ret i32 1
+
+if.end9:                                          ; preds = %if.then8, %if.end3
+  ret i32 undef
+}
+
+; <rdar://problem/16394449>
+define i64 @switch_test1(i64 %a) {
+; CHECK-LABEL: @switch_test1
+; CHECK: %0 = phi i64 [ %const, %case2 ], [ %const_mat, %Entry ], [ %const_mat, %Entry ]
+Entry:
+  %sel = add i64 %a, 4519019440
+  switch i64 %sel, label %fail [
+    i64 462, label %continuation
+    i64 449, label %case2
+    i64 443, label %continuation
+  ]
+
+case2:
+  br label %continuation
+
+continuation:
+  %0 = phi i64 [ 4519019440, %case2 ], [ 4519019460, %Entry ], [ 4519019460, %Entry ]
+  ret i64 0;
+
+fail:
+  ret i64 -1;
+}
+
+define i64 @switch_test2(i64 %a) {
+; CHECK-LABEL: @switch_test2
+; CHECK: %2 = phi i64* [ %1, %case2 ], [ %0, %Entry ], [ %0, %Entry ]
+Entry:
+  %sel = add i64 %a, 4519019440
+  switch i64 %sel, label %fail [
+    i64 462, label %continuation
+    i64 449, label %case2
+    i64 443, label %continuation
+  ]
+
+case2:
+  br label %continuation
+
+continuation:
+  %0 = phi i64* [ inttoptr(i64 4519019440 to i64*), %case2 ], [ inttoptr(i64 4519019460 to i64*), %Entry ], [ inttoptr(i64 4519019460 to i64*), %Entry ]
+  ret i64 0;
+
+fail:
+  ret i64 -1;
+}
+
diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll
new file mode 100644
index 0000000..cef022e
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the 3rd argument of a stackmap is hoisted.
+define i128 @test1(i128 %a) {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i128 13464618275673403322 to i128
+; CHECK:        tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
+entry:
+  %0 = add i128 %a, 13464618275673403322
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 13464618275673403322)
+  ret i128 %0
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll
deleted file mode 100644
index eba7880..0000000
--- a/test/Transforms/ConstantMerge/linker-private.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: opt < %s -constmerge -S | FileCheck %s
-; <rdar://problem/10564621>
-
-%0 = type opaque
-%struct.NSConstantString = type { i32*, i32, i8*, i32 }
-
-; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-
-@isLogVisible = global i8 0, align 1
-@__CFConstantStringClassReference = external global [0 x i32]
-@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
-@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
-
-define linkonce_odr void @bar() nounwind ssp align 2 {
-entry:
-  %stack = alloca i8*, align 4
-  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
-  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
-  ret void
-}
-
-declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index acbcf75..665d7db 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -32,7 +32,7 @@ entry:
   %i = alloca i32, align 4
   store volatile i32 10, i32* %i, align 4
 ; CHECK: %tmp = load volatile i32* %i, align 4
-; CHECK-next: call void @f(i32 undef)
+; CHECK-NEXT: call void @f(i32 undef)
   %tmp = load volatile i32* %i, align 4
   call void @f(i32 %tmp)
   ret void
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index 82e01f2..16569db 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -28,4 +28,20 @@ define void @caller() {
         ret void
 }
 
+; We can't remove 'this' here, as that would put argmem in ecx instead of
+; memory.
+define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem) {
+	%v = load i32* %argmem
+	ret i32 %v
+}
+; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem)
+
+define i32 @caller2() {
+	%t = alloca i32
+	%m = alloca inalloca i32
+	store i32 42, i32* %m
+	%v = call x86_thiscallcc i32 @unused_this(i32* %t, i32* inalloca %m)
+	ret i32 %v
+}
+
 ; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index ec98466..cdfe226 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -105,6 +105,15 @@ define void @test9(%struct.x* byval  %a) nounwind  {
 ; CHECK-NEXT: ret void
 }
 
+; Test for inalloca handling.
+define void @test9_2(%struct.x* inalloca  %a) nounwind  {
+	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
+	store i32 1, i32* %tmp2, align 4
+	ret void
+; CHECK-LABEL: @test9_2(
+; CHECK-NEXT: ret void
+}
+
 ; va_arg has fuzzy dependence, the store shouldn't be zapped.
 define double @test10(i8* %X) {
         %X_addr = alloca i8*
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index 110bd03..d2460c0 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -91,6 +91,21 @@ l:
 	ret i32 %val
 }
 
+; CHECK: define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* nocapture %p, i1 %b)
+define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) {
+e:
+	br label %l
+l:
+	%x = phi i32 addrspace(1)* [ %p, %e ]
+	%y = phi i32* [ %q, %e ]
+	%tmp = addrspacecast i32 addrspace(1)* %x to i32*		; <i32*> [#uses=2]
+	%tmp2 = select i1 %b, i32* %tmp, i32* %y
+	%val = load i32* %tmp2		; <i32> [#uses=1]
+	store i32 0, i32* %tmp
+	store i32* %y, i32** @g
+	ret i32 %val
+}
+
 ; CHECK: define void @nc2(i32* nocapture %p, i32* %q)
 define void @nc2(i32* %p, i32* %q) {
 	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 0842f56..7ae38bb 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -45,3 +45,9 @@ define void @test6_2(i8** %p, i8* %q) {
   call void @test6_1()
   ret void
 }
+
+; CHECK: define void @test7_1(i32* inalloca nocapture %a)
+; inalloca parameters are always considered written
+define void @test7_1(i32* inalloca %a) {
+  ret void
+}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 2f1bd70..04f3f99 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -1,11 +1,11 @@
 ; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
-; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$'
+; RUN: head -c8 %T/version.gcno | grep '^oncg\*204'
 ; RUN: rm %T/version.gcno
 ; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
 ; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
-; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$'
+; RUN: head -c8 %T/version.gcno | grep '^oncg\*704'
 ; RUN: rm %T/version.gcno
 
 define void @test() {
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index 89d6a5f..fd31fce 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -53,30 +53,58 @@ bb11:		; preds = %bb7, %bb5
 	unreachable
 }
 
-declare i32 @pthread_once(i32*, void ()*)
+define i32 @pthread_once(i32*, void ()*) {
+       ret i32 0
+}
 
-declare i8* @pthread_getspecific(i32)
+define i8* @pthread_getspecific(i32) {
+       ret i8* null
+}
 
-declare i32 @pthread_setspecific(i32, i8*)
+define i32 @pthread_setspecific(i32, i8*) {
+        ret i32 0
+}
 
-declare i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+define i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) {
+       ret i32 0
+}
 
-declare i32 @pthread_cancel(i32)
+define i32 @pthread_cancel(i32) {
+      ret i32 0
+}
 
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
 
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
 
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
 
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*)
+define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) {
+        ret i32 0
+}
 
-declare i32 @pthread_key_create(i32*, void (i8*)*)
+define i32 @pthread_key_create(i32*, void (i8*)*) {
+       ret i32 0
+}
 
-declare i32 @pthread_key_delete(i32)
+define i32 @pthread_key_delete(i32) {
+        ret i32 0
+}
 
-declare i32 @pthread_mutexattr_init(%struct.__sched_param*)
+define i32 @pthread_mutexattr_init(%struct.__sched_param*) {
+        ret i32 0
+}
 
-declare i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32)
+define i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) {
+        ret i32 0
+}
 
-declare i32 @pthread_mutexattr_destroy(%struct.__sched_param*)
+define i32 @pthread_mutexattr_destroy(%struct.__sched_param*) {
+       ret i32 0
+}
diff --git a/test/Transforms/GVN/unreachable_block_infinite_loop.ll b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
index fe335ce..fca5a28 100644
--- a/test/Transforms/GVN/unreachable_block_infinite_loop.ll
+++ b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt -memdep -gvn -disable-output
+; RUN: opt -memdep -gvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll
new file mode 100644
index 0000000..8c77de6
--- /dev/null
+++ b/test/Transforms/GlobalMerge/ARM/arm.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
+
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@bar = internal global [5 x i32] zeroinitializer, align 4
+@baz = internal global [5 x i32] zeroinitializer, align 4
+@foo = internal global [5 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define internal void @initialize() #0 {
+  %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
+  %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
+  %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
+  %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
+  %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
+  %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
+  %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
+  %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
+  %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+  %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+  store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+  ret void
+}
+
+declare i32 @calc(...) #1
+
+; Function Attrs: nounwind ssp
+define internal void @calculate() #0 {
+  %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
+  %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
+  %3 = mul <4 x i32> %2, %1
+  store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
+  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+  %6 = mul nsw i32 %5, %4
+  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
+  ret void
+}
+
+; Function Attrs: nounwind readnone ssp
+define internal i32* @returnFoo() #2 {
+  ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
+}
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"LLVM version 3.4 "}
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"int", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/GlobalMerge/ARM/lit.local.cfg b/test/Transforms/GlobalMerge/ARM/lit.local.cfg
new file mode 100644
index 0000000..8a3ba96
--- /dev/null
+++ b/test/Transforms/GlobalMerge/ARM/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/ARM64/arm64.ll
new file mode 100644
index 0000000..eea474a
--- /dev/null
+++ b/test/Transforms/GlobalMerge/ARM64/arm64.ll
@@ -0,0 +1,88 @@
+; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
+
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+@bar = internal global [5 x i32] zeroinitializer, align 4
+@baz = internal global [5 x i32] zeroinitializer, align 4
+@foo = internal global [5 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define internal void @initialize() #0 {
+  %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+  %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+  %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+  %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+  %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+  %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+  %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+  %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+  %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+  %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+  store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+  ret void
+}
+
+declare i32 @calc(...)
+
+; Function Attrs: nounwind ssp
+define internal void @calculate() #0 {
+  %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+  %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+  %3 = mul nsw i32 %2, %1
+  store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+  %6 = mul nsw i32 %5, %4
+  store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+  %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+  %9 = mul nsw i32 %8, %7
+  store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
+  %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+  %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+  %12 = mul nsw i32 %11, %10
+  store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+  %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+  %15 = mul nsw i32 %14, %13
+  store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone ssp
+define internal i32* @returnFoo() #1 {
+  ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
+}
+
+attributes #0 = { nounwind ssp }
+attributes #1 = { nounwind readnone ssp }
+attributes #2 = { nounwind }
diff --git a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg b/test/Transforms/GlobalMerge/ARM64/lit.local.cfg
new file mode 100644
index 0000000..a75a42b
--- /dev/null
+++ b/test/Transforms/GlobalMerge/ARM64/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index a1b69ef..d6a565a 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -globalopt
 
-@g = external global i32
+@g = global i32 0
 
 @a = alias bitcast (i32* @g to i8*)
 
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 32f4bf8..2d5a956 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,31 +1,35 @@
-; We use a temporary file so that the test fails when opt crashes.
-
-; RUN: opt < %s -globalopt -S > %t
-; RUN: FileCheck %s < %t
+; RUN: opt < %s -globalopt -S | FileCheck %s
 
 @foo1 = alias void ()* @foo2
-; CHECK: @foo1 = alias void ()* @foo2
+; CHECK: @foo1 = alias void ()* @bar2
 
-@foo2 = alias weak void()* @bar1
-; CHECK: @foo2 = alias weak void ()* @bar2
+@foo2 = alias void()* @bar1
+; CHECK: @foo2 = alias void ()* @bar2
 
 @bar1  = alias void ()* @bar2
 ; CHECK: @bar1 = alias void ()* @bar2
 
-declare void @bar2()
-; CHECK: declare void @bar2()
+@weak1 = alias weak void ()* @bar2
+; CHECK: @weak1 = alias weak void ()* @bar2
+
+define void @bar2() {
+  ret void
+}
+; CHECK: define void @bar2()
 
 define void @baz() {
 entry:
          call void @foo1()
-; CHECK: call void @foo2()
+; CHECK: call void @bar2()
 
          call void @foo2()
-; CHECK: call void @foo2()
+; CHECK: call void @bar2()
 
          call void @bar1()
 ; CHECK: call void @bar2()
 
+         call void @weak1()
+; CHECK: call void @weak1()
          ret void
 }
 
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
new file mode 100644
index 0000000..633cd34
--- /dev/null
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+target datalayout = "p:32:32:32-p1:16:16:16"
+
+@c = addrspace(1) global i8 42
+
+@i = internal addrspace(1) global i8 42
+
+; CHECK: @ia = internal addrspace(1) global i8 42
+@ia = alias internal i8 addrspace(1)* @i
+
+@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
+
+@llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ia to i8*), i8* addrspacecast (i8 addrspace(1)* @i to i8*)], section "llvm.metadata"
+; CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ia to i8*)], section "llvm.metadata"
+
+@sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
+; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
+
+@ca = alias internal i8 addrspace(1)* @c
+; CHECK: @ca = alias internal i8 addrspace(1)* @c
+
+define i8 addrspace(1)* @h() {
+  ret i8 addrspace(1)* @ca
+}
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
new file mode 100644
index 0000000..987c4a4
--- /dev/null
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+@_Z17in_custom_section = internal global i8 42, section "CUSTOM"
+@in_custom_section = protected dllexport alias internal i8* @_Z17in_custom_section
+
+; CHECK: @in_custom_section = internal protected dllexport global i8 42, section "CUSTOM"
+
+@llvm.used = appending global [1 x i8*] [i8* @in_custom_section], section "llvm.metadata"
diff --git a/test/Transforms/GlobalOpt/fastcc.ll b/test/Transforms/GlobalOpt/fastcc.ll
new file mode 100644
index 0000000..76122b2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/fastcc.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+define internal i32 @f(i32* %m) {
+; CHECK-LABEL: define internal fastcc i32 @f
+  %v = load i32* %m
+  ret i32 %v
+}
+
+define internal x86_thiscallcc i32 @g(i32* %m) {
+; CHECK-LABEL: define internal fastcc i32 @g
+  %v = load i32* %m
+  ret i32 %v
+}
+
+; Leave this one alone, because the user went out of their way to request this
+; convention.
+define internal coldcc i32 @h(i32* %m) {
+; CHECK-LABEL: define internal coldcc i32 @h
+  %v = load i32* %m
+  ret i32 %v
+}
+
+define internal i32 @j(i32* %m) {
+; CHECK-LABEL: define internal i32 @j
+  %v = load i32* %m
+  ret i32 %v
+}
+
+define void @call_things() {
+  %m = alloca i32
+  call i32 @f(i32* %m)
+  call x86_thiscallcc i32 @g(i32* %m)
+  call coldcc i32 @h(i32* %m)
+  call i32 @j(i32* %m)
+  ret void
+}
+
+@llvm.used = appending global [1 x i8*] [
+   i8* bitcast (i32(i32*)* @j to i8*)
+], section "llvm.metadata"
+
+; CHECK-LABEL: define void @call_things()
+; CHECK: call fastcc i32 @f
+; CHECK: call fastcc i32 @g
+; CHECK: call coldcc i32 @h
+; CHECK: call i32 @j
diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll
index 3bb5ce9..85320b7 100644
--- a/test/Transforms/GlobalOpt/memset.ll
+++ b/test/Transforms/GlobalOpt/memset.ll
@@ -1,6 +1,8 @@
-; both globals are write only, delete them.
+; RUN: opt -S -globalopt < %s | FileCheck %s
 
-; RUN: opt < %s -globalopt -S | not grep internal
+; CHECK-NOT: internal
+
+; Both globals are write only, delete them.
 
 @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 @G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
@@ -13,6 +15,17 @@ define void @foo() {
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+@G0_as1 = internal addrspace(1) global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
+@G1_as1 = internal addrspace(1) global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
+
+define void @foo_as1() {
+  %Blah = alloca [58 x i8]
+  %tmp3 = bitcast [58 x i8]* %Blah to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  ret void
+}
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll
new file mode 100644
index 0000000..c899e2f
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target triple = "x86_64-apple-darwin"
+
+; CHECK-LABEL: @sloop
+; CHECK-LABEL: B18:
+; Only one phi now.
+; CHECK: phi
+; CHECK-NOT: phi
+; One trunc for the gep.
+; CHECK: trunc i64 %indvars.iv to i32
+; One trunc for the dummy() call.
+; CHECK-LABEL: exit24:
+; CHECK: trunc i64 {{.*}}lcssa.wide to i32
+define void @sloop(i32* %a) {
+Prologue:
+  br i1 undef, label %B18, label %B6
+
+B18:                                        ; preds = %B24, %Prologue
+  %.02 = phi i32 [ 0, %Prologue ], [ %tmp33, %B24 ]
+  %tmp23 = zext i32 %.02 to i64
+  %tmp33 = add i32 %.02, 1
+  %o = getelementptr i32* %a, i32 %.02
+  %v = load i32* %o
+  %t = icmp eq i32 %v, 0
+  br i1 %t, label %exit24, label %B24
+
+B24:                                        ; preds = %B18
+  %t2 = icmp eq i32 %tmp33, 20
+  br i1 %t2, label %B6, label %B18
+
+B6:                                       ; preds = %Prologue
+  ret void
+
+exit24:                      ; preds = %B18
+  call void @dummy(i32 %.02)
+  unreachable
+}
+
+declare void @dummy(i32)
diff --git a/test/Transforms/IndVarSimplify/lcssa-preservation.ll b/test/Transforms/IndVarSimplify/lcssa-preservation.ll
new file mode 100644
index 0000000..f69c96c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lcssa-preservation.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+;
+; Make sure IndVars preserves LCSSA form, especially across loop nests. 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @PR18642(i32 %x) {
+; CHECK-LABEL: @PR18642(
+entry:
+  br label %outer.header
+; CHECK:   br label %outer.header
+
+outer.header:
+; CHECK: outer.header:
+  %outer.iv = phi i32 [ 0, %entry ], [ %x, %outer.latch ]
+  br label %inner.header
+; CHECK:   %[[SCEV_EXPANDED:.*]] = add i32
+; CHECK:   br label %inner.header
+
+inner.header:
+; CHECK: inner.header:
+  %inner.iv = phi i32 [ undef, %outer.header ], [ %inc, %inner.latch ]
+  %cmp1 = icmp slt i32 %inner.iv, %outer.iv
+  br i1 %cmp1, label %inner.latch, label %outer.latch
+; CHECK:   br i1 {{.*}}, label %inner.latch, label %outer.latch
+
+inner.latch:
+; CHECK: inner.latch:
+  %inc = add nsw i32 %inner.iv, 1
+  %cmp2 = icmp slt i32 %inner.iv, %outer.iv
+  br i1 %cmp2, label %inner.header, label %exit
+; CHECK:   br i1 {{.*}}, label %inner.header, label %[[EXIT_FROM_INNER:.*]]
+
+outer.latch:
+; CHECK: outer.latch:
+  br i1 undef, label %outer.header, label %exit
+; CHECK:   br i1 {{.*}}, label %outer.header, label %[[EXIT_FROM_OUTER:.*]]
+
+; CHECK: [[EXIT_FROM_INNER]]:
+; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[SCEV_EXPANDED]], %inner.latch ]
+; CHECK-NEXT: br label %exit
+
+; CHECK: [[EXIT_FROM_OUTER]]:
+; CHECK-NEXT: br label %exit
+
+exit:
+; CHECK: exit:
+  %exit.phi = phi i32 [ %inc, %inner.latch ], [ undef, %outer.latch ]
+; CHECK-NEXT: phi i32 [ %[[LCSSA]], %[[EXIT_FROM_INNER]] ], [ undef, %[[EXIT_FROM_OUTER]] ]
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index 2fac4a7..4736f85 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -1,6 +1,6 @@
 ;RUN: opt -S %s -indvars | FileCheck %s
 
-; CHECK-LABEL-LABEL: @foo(
+; CHECK-LABEL: @foo(
 ; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
 ; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
 define void @foo() #0 {
@@ -20,7 +20,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; Check that post-incrementing the backedge taken count does not overflow.
-; CHECK-LABEL-LABEL: @postinc(
+; CHECK-LABEL: @postinc(
 ; CHECK: icmp eq i32 %indvars.iv.next, 256
 define i32 @postinc() #0 {
 entry:
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index fe3df5c..1fdcdd1 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -38,17 +38,16 @@ for.end:
   ret void
 }
 
-; It would be nice if SCEV and any loop analysis could assume that
-; preheaders exist. Unfortunately it is not always the case. This test
-; checks that SCEVExpander can handle an outer loop that has not yet
-; been simplified. As a result, the inner loop's exit test will not be
-; rewritten.
+; This test checks that SCEVExpander can handle an outer loop that has been
+; simplified, and as a result the inner loop's exit test will be rewritten.
 define void @expandOuterRecurrence(i32 %arg) nounwind {
 entry:
   %sub1 = sub nsw i32 %arg, 1
   %cmp1 = icmp slt i32 0, %sub1
   br i1 %cmp1, label %outer, label %exit
 
+; CHECK: outer:
+; CHECK: icmp slt
 outer:
   %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
   %sub2 = sub nsw i32 %arg, %i
@@ -60,7 +59,6 @@ inner.ph:
   br label %inner
 
 ; CHECK: inner:
-; CHECK: icmp slt
 ; CHECK: br i1
 inner:
   %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll
new file mode 100644
index 0000000..2603f36
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/overflowcheck.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @addwithoverflow
+; CHECK-LABEL: loop1:
+; CHECK-NOT: zext
+; CHECK: add nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop2:
+; CHECK-NOT: extractvalue
+; CHECK: add nuw nsw
+; CHECK: @llvm.sadd.with.overflow
+; CHECK-LABEL: loop3:
+; CHECK-NOT: extractvalue
+; CHECK: ret
+define i64 @addwithoverflow(i32 %n, i64* %a) {
+entry:
+  br label %loop0
+
+loop0:
+  %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
+  %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
+  %bc = icmp ult i32 %i, %n
+  br i1 %bc, label %loop1, label %exit
+
+loop1:
+  %zxt = zext i32 %i to i64
+  %ofs = shl nuw nsw i64 %zxt, 3
+  %gep = getelementptr i64* %a, i64 %zxt
+  %v = load i64* %gep, align 8
+  %truncv = trunc i64 %v to i32
+  %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
+  %ovflows = extractvalue { i32, i1 } %adds, 1
+  br i1 %ovflows, label %exit, label %loop2
+
+loop2:
+  %addsval = extractvalue { i32, i1 } %adds, 0
+  %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
+  %i1check = extractvalue { i32, i1 } %i1, 1
+  br i1 %i1check, label %exit, label %loop3
+
+loop3:
+  %i1val = extractvalue { i32, i1 } %i1, 0
+  %test = icmp slt i32 %i1val, %n
+  br i1 %test, label %return, label %loop0
+
+return:
+  %ret = zext i32 %addsval to i64
+  ret i64 %ret
+
+exit:
+  unreachable
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
diff --git a/test/Transforms/IndVarSimplify/tripcount_compute.ll b/test/Transforms/IndVarSimplify/tripcount_compute.ll
index 626a29b..966d152 100644
--- a/test/Transforms/IndVarSimplify/tripcount_compute.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_compute.ll
@@ -160,3 +160,34 @@ loop9:                                            ; preds = %loop2, %loopexit
 loopexit9:                                        ; preds = %loop2
   ret i32 %l.next
 }
+
+; PR18449. Check that the early exit is reduced to never taken.
+;
+; CHECK-LABEL: @twoexit
+; CHECK-LABEL: loop:
+; CHECK: phi
+; CHECK: br i1 false
+; CHECK: br
+; CHECK: ret
+define void @twoexit() {
+"function top level":
+  br label %loop
+
+loop:                                             ; preds = %body, %"function top level"
+  %0 = phi i64 [ 0, %"function top level" ], [ %2, %body ]
+  %1 = icmp ugt i64 %0, 2
+  br i1 %1, label %fail, label %body
+
+fail:                                             ; preds = %loop
+  tail call void @bounds_fail()
+  unreachable
+
+body:                                             ; preds = %loop
+  %2 = add i64 %0, 1
+  %3 = icmp slt i64 %2, 3
+  br i1 %3, label %loop, label %out
+
+out:                                              ; preds = %body
+  ret void
+}
+declare void @bounds_fail()
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
new file mode 100644
index 0000000..543a89b
--- /dev/null
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -S -inline -inline-threshold=2 | FileCheck %s
+; RUN: opt < %s -S -strip-debug -inline -inline-threshold=2 | FileCheck %s
+;
+; The purpose of this test is to check that debug info doesn't influence
+; inlining decisions.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
+entry:
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  %add = fadd <4 x float> %mul, %mul1
+  ret <4 x float> %add
+}
+
+define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @outer_vectors(
+; CHECK-NOT: call <4 x float> @inner_vectors(
+; CHECK: ret float
+
+entry:
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  %vecext = extractelement <4 x float> %call, i32 0
+  %vecext1 = extractelement <4 x float> %call, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecext2 = extractelement <4 x float> %call, i32 2
+  %add3 = fadd float %add, %vecext2
+  %vecext4 = extractelement <4 x float> %call, i32 3
+  %add5 = fadd float %add3, %vecext4
+  ret float %add5
+}
+
+attributes #0 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !{}, metadata !2, metadata !2, metadata !""}
+!1 = metadata !{metadata !"", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!5 = metadata !{metadata !""}
diff --git a/test/Transforms/Inline/inline-cold.ll b/test/Transforms/Inline/inline-cold.ll
new file mode 100644
index 0000000..bb8c008
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -inline -S -inlinecold-threshold=75 | FileCheck %s
+
+; Test that functions with attribute Cold are not inlined while the 
+; same function without attribute Cold will be inlined.
+
+@a = global i32 4
+
+; This function should be larger than the cold threshold (75), but smaller
+; than the regular threshold.
+; Function Attrs: nounwind readnone uwtable
+define i32 @simpleFunction(i32 %a) #0 {
+entry:
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1,  %a1
+  %a2 = load volatile i32* @a
+  %x2 = add i32 %x1, %a2
+  %a3 = load volatile i32* @a
+  %x3 = add i32 %x2, %a3
+  %a4 = load volatile i32* @a
+  %x4 = add i32 %x3, %a4
+  %a5 = load volatile i32* @a
+  %x5 = add i32 %x4, %a5
+  %a6 = load volatile i32* @a
+  %x6 = add i32 %x5, %a6
+  %a7 = load volatile i32* @a
+  %x7 = add i32 %x6, %a6
+  %a8 = load volatile i32* @a
+  %x8 = add i32 %x7, %a8
+  %a9 = load volatile i32* @a
+  %x9 = add i32 %x8, %a9
+  %a10 = load volatile i32* @a
+  %x10 = add i32 %x9, %a10
+  %a11 = load volatile i32* @a
+  %x11 = add i32 %x10, %a11
+  %a12 = load volatile i32* @a
+  %x12 = add i32 %x11, %a12
+  %add = add i32 %x12, %a
+  ret i32 %add
+}
+
+; Function Attrs: nounwind cold readnone uwtable
+define i32 @ColdFunction(i32 %a) #1 {
+; CHECK-LABEL: @ColdFunction
+; CHECK: ret
+entry:
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1,  %a1
+  %a2 = load volatile i32* @a
+  %x2 = add i32 %x1, %a2
+  %a3 = load volatile i32* @a
+  %x3 = add i32 %x2, %a3
+  %a4 = load volatile i32* @a
+  %x4 = add i32 %x3, %a4
+  %a5 = load volatile i32* @a
+  %x5 = add i32 %x4, %a5
+  %a6 = load volatile i32* @a
+  %x6 = add i32 %x5, %a6
+  %a7 = load volatile i32* @a
+  %x7 = add i32 %x6, %a6
+  %a8 = load volatile i32* @a
+  %x8 = add i32 %x7, %a8
+  %a9 = load volatile i32* @a
+  %x9 = add i32 %x8, %a9
+  %a10 = load volatile i32* @a
+  %x10 = add i32 %x9, %a10
+  %a11 = load volatile i32* @a
+  %x11 = add i32 %x10, %a11
+  %a12 = load volatile i32* @a
+  %x12 = add i32 %x11, %a12
+  %add = add i32 %x12, %a
+  ret i32 %add
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a) #0 {
+; CHECK-LABEL: @bar
+; CHECK: call i32 @ColdFunction(i32 5)
+; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: ret
+entry:
+  %0 = tail call i32 @ColdFunction(i32 5)
+  %1 = tail call i32 @simpleFunction(i32 6)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+attributes #0 = { nounwind readnone uwtable }
+attributes #1 = { nounwind cold readnone uwtable }
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index c394138..c53bb5a 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -96,7 +96,6 @@ eh.resume:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]])
 ; CHECK-NEXT:   to label %[[LBL:[^\s]+]] unwind
 ; CHECK: [[LBL]]:
@@ -167,7 +166,6 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
 ; CHECK-NEXT:   to label %[[RESUME1:[^\s]+]] unwind
 ; CHECK: [[RESUME1]]:
@@ -187,7 +185,6 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:   cleanup
 ; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
 ; CHECK-NEXT:   to label %[[RESUME2:[^\s]+]] unwind
 ; CHECK: [[RESUME2]]:
@@ -275,7 +272,6 @@ lpad.cont:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -322,7 +318,6 @@ terminate:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
index 678ee82..3604264 100644
--- a/test/Transforms/Inline/inline_returns_twice.ll
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -inline -S | FileCheck %s
 
 ; Check that functions with "returns_twice" calls are only inlined,
-; if they are themselve marked as such.
+; if they are themselves marked as such.
 
 declare i32 @a() returns_twice
 
diff --git a/test/Transforms/Inline/invoke-cleanup.ll b/test/Transforms/Inline/invoke-cleanup.ll
new file mode 100644
index 0000000..457ae2a
--- /dev/null
+++ b/test/Transforms/Inline/invoke-cleanup.ll
@@ -0,0 +1,39 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+declare void @external_func()
+
+@exception_type1 = external global i8
+@exception_type2 = external global i8
+
+
+define internal void @inner() {
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_type1
+  resume i32 %lp
+}
+
+; Test that the "cleanup" clause is kept when inlining @inner() into
+; this call site (PR17872), otherwise C++ destructors will not be
+; called when they should be.
+
+define void @outer() {
+  invoke void @inner()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      cleanup
+      catch i8* @exception_type2
+  resume i32 %lp
+}
+; CHECK: define void @outer
+; CHECK: landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* @exception_type1
+; CHECK-NEXT: catch i8* @exception_type2
diff --git a/test/Transforms/Inline/invoke-combine-clauses.ll b/test/Transforms/Inline/invoke-combine-clauses.ll
new file mode 100644
index 0000000..5f06039
--- /dev/null
+++ b/test/Transforms/Inline/invoke-combine-clauses.ll
@@ -0,0 +1,117 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+declare void @external_func()
+declare void @abort()
+
+@exception_inner = external global i8
+@exception_outer = external global i8
+@condition = external global i1
+
+
+; Check for a bug in which multiple "resume" instructions in the
+; inlined function caused "catch i8* @exception_outer" to appear
+; multiple times in the resulting landingpad.
+
+define internal void @inner_multiple_resume() {
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_inner
+  %cond = load i1* @condition
+  br i1 %cond, label %resume1, label %resume2
+resume1:
+  resume i32 1
+resume2:
+  resume i32 2
+}
+
+define void @outer_multiple_resume() {
+  invoke void @inner_multiple_resume()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_outer
+  resume i32 %lp
+}
+; CHECK: define void @outer_multiple_resume()
+; CHECK: %lp.i = landingpad
+; CHECK-NEXT: catch i8* @exception_inner
+; CHECK-NEXT: catch i8* @exception_outer
+; Check that there isn't another "catch" clause:
+; CHECK-NEXT: load
+
+
+; Check for a bug in which having a "resume" and a "call" in the
+; inlined function caused "catch i8* @exception_outer" to appear
+; multiple times in the resulting landingpad.
+
+define internal void @inner_resume_and_call() {
+  call void @external_func()
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_inner
+  resume i32 %lp
+}
+
+define void @outer_resume_and_call() {
+  invoke void @inner_resume_and_call()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_outer
+  resume i32 %lp
+}
+; CHECK: define void @outer_resume_and_call()
+; CHECK: %lp.i = landingpad
+; CHECK-NEXT: catch i8* @exception_inner
+; CHECK-NEXT: catch i8* @exception_outer
+; Check that there isn't another "catch" clause:
+; CHECK-NEXT: br
+
+
+; Check what happens if the inlined function contains an "invoke" but
+; no "resume".  In this case, the inlined landingpad does not need to
+; include the "catch i8* @exception_outer" clause from the outer
+; function (since the outer function's landingpad will not be
+; reachable), but it's OK to include this clause.
+
+define internal void @inner_no_resume_or_call() {
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_inner
+  ; A landingpad might have no "resume" if a C++ destructor aborts.
+  call void @abort() noreturn nounwind
+  unreachable
+}
+
+define void @outer_no_resume_or_call() {
+  invoke void @inner_no_resume_or_call()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32 personality i8* null
+      catch i8* @exception_outer
+  resume i32 %lp
+}
+; CHECK: define void @outer_no_resume_or_call()
+; CHECK: %lp.i = landingpad
+; CHECK-NEXT: catch i8* @exception_inner
+; CHECK-NEXT: catch i8* @exception_outer
+; Check that there isn't another "catch" clause:
+; CHECK-NEXT: call void @abort()
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
index af42bc7..46c3bcd 100644
--- a/test/Transforms/Inline/ptr-diff.ll
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -31,7 +31,7 @@ else:
 
 define i32 @outer2(i32* %ptr) {
 ; Test that an inbounds GEP disables this -- it isn't safe in general as
-; wrapping changes the behavior of lessthan and greaterthan comparisions.
+; wrapping changes the behavior of lessthan and greaterthan comparisons.
 ; CHECK-LABEL: @outer2(
 ; CHECK: call i32 @inner2
 ; CHECK: ret i32
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index c27fe0a..7f9bd9e 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -3,7 +3,9 @@
 
 @__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
-declare extern_weak i32 @pthread_cancel(i32)
+define weak i32 @pthread_cancel(i32) {
+       ret i32 0
+}
 
 define i1 @__gthread_active_p() {
 entry:
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index 23ee12b..c7cef75 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -3,7 +3,9 @@
 
 @A = alias weak void ()* @B		; <void ()*> [#uses=1]
 
-declare extern_weak void @B()
+define weak void @B() {
+       ret void
+}
 
 define i32 @active() {
 entry:
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 2dedd44..1883a8f 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -1,6 +1,3 @@
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-target triple = "thumbv7-apple-ios0"
-
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
 define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
@@ -67,6 +64,72 @@ entry:
 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 
+; ARM64 variants - <rdar://problem/12349617>
+
+define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+}
+
+define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @constantMulARM64() nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+}
+
+define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+}
+
+define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %b = add <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %b = add <4 x i32> %x, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %b
+}
+
+declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
 ; CHECK: attributes #0 = { nounwind readnone ssp }
 ; CHECK: attributes #1 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 0964bc0..67d560e 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -41,3 +41,38 @@ define i32 @test4(i32 %A) {
 ; CHECK-NEXT: ret i32 %B
 }
 
+define <2 x i1> @test5(<2 x i1> %A, <2 x i1> %B) {
+  %add = add <2 x i1> %A, %B
+  ret <2 x i1> %add
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: %add = xor <2 x i1> %A, %B
+; CHECK-NEXT: ret <2 x i1> %add
+}
+
+define <2 x i64> @test6(<2 x i64> %A) {
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %add = add <2 x i64> %shl, %A
+  ret <2 x i64> %add
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: %add = mul <2 x i64> %A, <i64 5, i64 9>
+; CHECK-NEXT: ret <2 x i64> %add
+}
+
+define <2 x i64> @test7(<2 x i64> %A) {
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %mul = mul <2 x i64> %A, <i64 3, i64 4>
+  %add = add <2 x i64> %shl, %mul
+  ret <2 x i64> %add
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %add = mul <2 x i64> %A, <i64 7, i64 12>
+; CHECK-NEXT: ret <2 x i64> %add
+}
+
+define <2 x i64> @test8(<2 x i64> %A) {
+  %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
+  %add = add <2 x i64> %xor, <i64 2, i64 3>
+  ret <2 x i64> %add
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: %add = sub <2 x i64> <i64 1, i64 2>, %A
+; CHECK-NEXT: ret <2 x i64> %add
+}
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
index 208c7f0..f9b7e3b 100644
--- a/test/Transforms/InstCombine/add4.ll
+++ b/test/Transforms/InstCombine/add4.ll
@@ -77,3 +77,26 @@ define float @test7(float %A, float %B, i32 %C) {
 ; CHECK: uitofp
 }
 
+define <4 x float> @test8(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
+  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
+  %cf = uitofp <4 x i1> %C to <4 x float>
+  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
+  %p1 = fmul fast <4 x float> %A, %mc
+  %p2 = fmul fast <4 x float> %B, %cf
+  %s1 = fadd fast <4 x float> %p2, %p1
+  ret <4 x float> %s1
+; CHECK-LABEL: @test8(
+; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
+}
+
+define <4 x float> @test9(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
+  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
+  %cf = uitofp <4 x i1> %C to <4 x float>
+  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
+  %p1 = fmul fast <4 x float> %A, %mc
+  %p2 = fmul fast <4 x float> %B, %cf
+  %s1 = fadd fast <4 x float> %p1, %p2
+  ret <4 x float> %s1
+; CHECK-LABEL: @test9
+; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
+}
diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll
index e4a61e9..e46b5c8 100644
--- a/test/Transforms/InstCombine/bitcast-store.ll
+++ b/test/Transforms/InstCombine/bitcast-store.ll
@@ -3,14 +3,14 @@
 ; Instcombine should preserve metadata and alignment while
 ; folding a bitcast into a store.
 
-; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 %struct.A = type { i32 (...)** }
 
 @G = external constant [5 x i8*]
 
+; CHECK-LABEL: @foo
+; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
 define void @foo(%struct.A* %a) nounwind {
 entry:
   %0 = bitcast %struct.A* %a to i8***
@@ -18,4 +18,18 @@ entry:
   ret void
 }
 
+; Check instcombine doesn't try and fold the following bitcast into the store.
+; This transformation would not be safe since we would need to use addrspacecast
+; and addrspacecast is not guaranteed to be a no-op cast.
+
+; CHECK-LABEL: @bar
+; CHECK: %cast = bitcast i8** %b to i8 addrspace(1)**
+; CHECK: store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+define void @bar(i8 addrspace(1)* %a, i8** %b) nounwind {
+entry:
+  %cast = bitcast i8** %b to i8 addrspace(1)**
+  store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+  ret void
+}
+
 !0 = metadata !{metadata !"hello"}
diff --git a/test/Transforms/InstCombine/call-cast-target-inalloca.ll b/test/Transforms/InstCombine/call-cast-target-inalloca.ll
new file mode 100644
index 0000000..baf97e0
--- /dev/null
+++ b/test/Transforms/InstCombine/call-cast-target-inalloca.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @takes_i32(i32)
+declare void @takes_i32_inalloca(i32* inalloca)
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+  %args = alloca i32
+  call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args)
+; CHECK: call void bitcast
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+  call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
+; CHECK: call void bitcast
+  ret void
+}
diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll
index 315c516..1af3317 100644
--- a/test/Transforms/InstCombine/call-cast-target.ll
+++ b/test/Transforms/InstCombine/call-cast-target.ll
@@ -13,3 +13,15 @@ entry:
 
 declare i8* @ctime(i32*)
 
+define internal { i8 } @foo(i32*) {
+entry:
+  ret { i8 } { i8 0 }
+}
+
+define void @test_struct_ret() {
+; CHECK-LABEL: @test_struct_ret
+; CHECK-NOT: bitcast
+entry:
+  %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/cast-call-combine.ll b/test/Transforms/InstCombine/cast-call-combine.ll
new file mode 100644
index 0000000..be70a87
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-call-combine.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -always-inline -instcombine -S | FileCheck %s
+
+define internal void @foo(i16*) alwaysinline {
+  ret void
+}
+
+define void @bar() noinline noreturn {
+  unreachable
+}
+
+define void @test() {
+  br i1 false, label %then, label %else
+
+then:
+  call void @bar()
+  unreachable
+
+else:
+  ; CHECK-NOT: call
+  call void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null)
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 8934404..47ba920 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -10,6 +10,7 @@ define i1 @test1(i32 %X) {
         ; Convert to setne int %X, 12
         %c = icmp ne i32 %A, 12         ; <i1> [#uses=1]
         ret i1 %c
+; CHECK-LABEL @test1(
 ; CHECK: %c = icmp ne i32 %X, 12
 ; CHECK: ret i1 %c
 }
@@ -20,6 +21,7 @@ define i1 @test2(i32 %X, i32 %Y) {
         ; Convert to setne int %X, %Y
         %c = icmp ne i32 %A, %B         ; <i1> [#uses=1]
         ret i1 %c
+; CHECK-LABEL @test2(
 ; CHECK: %c = icmp ne i32 %X, %Y
 ; CHECK: ret i1 %c
 }
@@ -29,6 +31,7 @@ define i32 @test4(i32 %A) {
         %C = shl i32 %B, 2              ; <i32> [#uses=1]
         %D = bitcast i32 %C to i32              ; <i32> [#uses=1]
         ret i32 %D
+; CHECK-LABEL: @test4(
 ; CHECK: %C = shl i32 %A, 2
 ; CHECK: ret i32 %C
 }
@@ -38,6 +41,7 @@ define i16 @test5(i16 %A) {
         %C = and i32 %B, 15             ; <i32> [#uses=1]
         %D = trunc i32 %C to i16                ; <i16> [#uses=1]
         ret i16 %D
+; CHECK-LABEL: @test5(
 ; CHECK: %C = and i16 %A, 15
 ; CHECK: ret i16 %C
 }
@@ -46,6 +50,7 @@ define i1 @test6(i1 %A) {
         %B = zext i1 %A to i32          ; <i32> [#uses=1]
         %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
         ret i1 %C
+; CHECK-LABEL: @test6(
 ; CHECK: ret i1 %A
 }
 
@@ -53,6 +58,7 @@ define i1 @test6a(i1 %A) {
         %B = zext i1 %A to i32          ; <i32> [#uses=1]
         %C = icmp ne i32 %B, -1         ; <i1> [#uses=1]
         ret i1 %C
+; CHECK-LABEL: @test6a(
 ; CHECK: ret i1 true
 }
 
@@ -60,6 +66,7 @@ define i1 @test7(i8* %A) {
         %B = bitcast i8* %A to i32*             ; <i32*> [#uses=1]
         %C = icmp eq i32* %B, null              ; <i1> [#uses=1]
         ret i1 %C
+; CHECK-LABEL: @test7(
 ; CHECK: %C = icmp eq i8* %A, null
 ; CHECK: ret i1 %C
 }
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index cac0ec1..4fab92f 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -708,6 +708,34 @@ define %s @test68(%s *%p, i64 %i) {
 ; CHECK-NEXT: ret %s
 }
 
+; addrspacecasts should be eliminated.
+define %s @test68_addrspacecast(%s* %p, i64 %i) {
+; CHECK-LABEL: @test68_addrspacecast(
+; CHECK-NEXT: getelementptr %s*
+; CHECK-NEXT: load %s*
+; CHECK-NEXT: ret %s
+  %o = mul i64 %i, 12
+  %q = addrspacecast %s* %p to i8 addrspace(2)*
+  %pp = getelementptr inbounds i8 addrspace(2)* %q, i64 %o
+  %r = addrspacecast i8 addrspace(2)* %pp to %s*
+  %l = load %s* %r
+  ret %s %l
+}
+
+define %s @test68_addrspacecast_2(%s* %p, i64 %i) {
+; CHECK-LABEL: @test68_addrspacecast_2(
+; CHECK-NEXT: getelementptr %s* %p
+; CHECK-NEXT: addrspacecast
+; CHECK-NEXT: load %s addrspace(1)*
+; CHECK-NEXT: ret %s
+  %o = mul i64 %i, 12
+  %q = addrspacecast %s* %p to i8 addrspace(2)*
+  %pp = getelementptr inbounds i8 addrspace(2)* %q, i64 %o
+  %r = addrspacecast i8 addrspace(2)* %pp to %s addrspace(1)*
+  %l = load %s addrspace(1)* %r
+  ret %s %l
+}
+
 define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
 ; CHECK-LABEL: @test68_as1(
   %o = mul i32 %i, 12
@@ -903,6 +931,33 @@ define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-NEXT: ret double
 }
 
+define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test80_addrspacecast(
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* %p
+; CHECK-NEXT: load double addrspace(1)*
+; CHECK-NEXT: ret double
+  %tmp = mul nsw i32 %i, 8
+  %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
+  %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
+  %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
+  %l = load double addrspace(1)* %r
+  ret double %l
+}
+
+define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test80_addrspacecast_2(
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)*
+; CHECK-NEXT: addrspacecast double addrspace(1)*
+; CHECK-NEXT: load double addrspace(3)*
+; CHECK-NEXT: ret double
+  %tmp = mul nsw i32 %i, 8
+  %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
+  %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
+  %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
+  %l = load double addrspace(3)* %r
+  ret double %l
+}
+
 define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
 ; CHECK-LABEL: @test80_as1(
   %tmp = mul nsw i16 %i, 8
diff --git a/test/Transforms/InstCombine/ceil.ll b/test/Transforms/InstCombine/ceil.ll
new file mode 100644
index 0000000..9f965a3
--- /dev/null
+++ b/test/Transforms/InstCombine/ceil.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.ceil.f32(float) #0
+declare double @llvm.ceil.f64(double) #0
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #0
+
+; CHECK-LABEL: @constant_fold_ceil_f32_01
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_ceil_f32_01() #0 {
+  %x = call float @llvm.ceil.f32(float 1.00) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f32_02
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_ceil_f32_02() #0 {
+  %x = call float @llvm.ceil.f32(float 1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f32_03
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_ceil_f32_03() #0 {
+  %x = call float @llvm.ceil.f32(float -1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_v4f32_01
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float -1.000000e+00, float -1.000000e+00>
+define <4 x float> @constant_fold_ceil_v4f32_01() #0 {
+  %x = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float 1.00, float 1.25, float -1.25, float -1.00>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_01
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_ceil_f64_01() #0 {
+  %x = call double @llvm.ceil.f64(double 1.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_02
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_ceil_f64_02() #0 {
+  %x = call double @llvm.ceil.f64(double 1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_03
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_ceil_f64_03() #0 {
+  %x = call double @llvm.ceil.f64(double -1.75) #0
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
new file mode 100644
index 0000000..14377df
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float) #0
+declare float @llvm.fmuladd.f32(float, float, float) #0
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+
+declare double @llvm.fma.f64(double, double, double) #0
+declare double @llvm.fmuladd.f64(double, double, double) #0
+
+
+
+; CHECK-LABEL: @constant_fold_fma_f32
+; CHECK-NEXT: ret float 6.000000e+00
+define float @constant_fold_fma_f32() #0 {
+  %x = call float @llvm.fma.f32(float 1.0, float 2.0, float 4.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_fma_v4f32
+; CHECK-NEXT: ret <4 x float> <float 1.200000e+01, float 1.400000e+01, float 1.600000e+01, float 1.800000e+01>
+define <4 x float> @constant_fold_fma_v4f32() #0 {
+  %x = call <4 x float> @llvm.fma.v4f32(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_fmuladd_f32
+; CHECK-NEXT: ret float 6.000000e+00
+define float @constant_fold_fmuladd_f32() #0 {
+  %x = call float @llvm.fmuladd.f32(float 1.0, float 2.0, float 4.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_fma_f64
+; CHECK-NEXT: ret double 6.000000e+00
+define double @constant_fold_fma_f64() #0 {
+  %x = call double @llvm.fma.f64(double 1.0, double 2.0, double 4.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_fmuladd_f64
+; CHECK-NEXT: ret double 6.000000e+00
+define double @constant_fold_fmuladd_f64() #0 {
+  %x = call double @llvm.fmuladd.f64(double 1.0, double 2.0, double 4.0) #0
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/copysign.ll b/test/Transforms/InstCombine/copysign.ll
new file mode 100644
index 0000000..556b799
--- /dev/null
+++ b/test/Transforms/InstCombine/copysign.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.copysign.f32(float, float) #0
+declare double @llvm.copysign.f64(double, double) #0
+
+; CHECK-LABEL: @constant_fold_copysign_f32_01
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_copysign_f32_01() #0 {
+  %x = call float @llvm.copysign.f32(float 1.0, float -2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f32_02
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_copysign_f32_02() #0 {
+  %x = call float @llvm.copysign.f32(float -2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f32_03
+; CHECK-NEXT: ret float -2.000000e+00
+define float @constant_fold_copysign_f32_03() #0 {
+  %x = call float @llvm.copysign.f32(float -2.0, float -1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_01
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_copysign_f64_01() #0 {
+  %x = call double @llvm.copysign.f64(double 1.0, double -2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_02
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_copysign_f64_02() #0 {
+  %x = call double @llvm.copysign.f64(double -1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_03
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_copysign_f64_03() #0 {
+  %x = call double @llvm.copysign.f64(double -1.0, double -2.0) #0
+  ret double %x
+}
+
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index f67fd1c..1bf486f 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -131,4 +131,28 @@ define i32 @test15(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT: ret i32
 }
 
+define <2 x i64> @test16(<2 x i64> %x) nounwind {
+  %shr = lshr <2 x i64> %x, <i64 3, i64 5>
+  %div = udiv <2 x i64> %shr, <i64 4, i64 6>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: udiv <2 x i64> %x, <i64 32, i64 192>
+; CHECK-NEXT: ret <2 x i64>
+}
+
+define <2 x i64> @test17(<2 x i64> %x) nounwind {
+  %neg = sub nsw <2 x i64> zeroinitializer, %x
+  %div = sdiv <2 x i64> %neg, <i64 3, i64 4>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: sdiv <2 x i64> %x, <i64 -3, i64 -4>
+; CHECK-NEXT: ret <2 x i64>
+}
 
+define <2 x i64> @test18(<2 x i64> %x) nounwind {
+  %div = sdiv <2 x i64> %x, <i64 -1, i64 -1>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: sub <2 x i64> zeroinitializer, %x
+; CHECK-NEXT: ret <2 x i64>
+}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index 5cacb59..d958470 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -157,7 +157,10 @@ define float @exp10_test(float %f) nounwind readnone {
    %call = call double @exp10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
-; CHECK: call float @exp10f(float %f)
+; FIXME: Re-enable this when Linux allows transforming this again, or when we
+; can use builtin attributes to test the transform regardless of OS.
+; DISABLED-CHECK: call float @exp10f(float %f)
+; CHECK: call double @exp10(double %conv)
 }
 
 define double @exp10_test2(float %f) nounwind readnone {
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
index 99fb9ec..8e6a0e0 100644
--- a/test/Transforms/InstCombine/exp2-1.ll
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -1,6 +1,7 @@
 ; Test that the exp2 library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -74,3 +75,26 @@ define float @test_simplify8(i8 zeroext %x) {
 ; CHECK: call float @ldexpf
   ret float %ret
 }
+
+declare double @llvm.exp2.f64(double)
+declare float @llvm.exp2.f32(float)
+
+define double @test_simplify9(i8 zeroext %x) {
+; CHECK-LABEL: @test_simplify9(
+; CHECK-WIN-LABEL: @test_simplify9(
+  %conv = uitofp i8 %x to double
+  %ret = call double @llvm.exp2.f64(double %conv)
+; CHECK: call double @ldexp
+; CHECK-WIN: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify10(i8 zeroext %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-WIN-LABEL: @test_simplify10(
+  %conv = uitofp i8 %x to float
+  %ret = call float @llvm.exp2.f32(float %conv)
+; CHECK: call float @ldexpf
+; CHECK-WIN-NOT: call float @ldexpf
+  ret float %ret
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index d8ba2a5..2ee4b0f 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -140,6 +140,42 @@ define float @fold13(float %x) {
 ; CHECK: ret
 }
 
+; -x + y => y - x
+define float @fold14(float %x, float %y) {
+  %neg = fsub fast float -0.0, %x
+  %add = fadd fast float %neg, %y
+  ret float %add
+; CHECK: fold14
+; CHECK: fsub fast float %y, %x
+; CHECK: ret
+}
+
+; x + -y => x - y
+define float @fold15(float %x, float %y) {
+  %neg = fsub fast float -0.0, %y
+  %add = fadd fast float %x, %neg
+  ret float %add
+; CHECK: fold15
+; CHECK: fsub fast float %x, %y
+; CHECK: ret
+}
+
+; (select X+Y, X-Y) => X + (select Y, -Y)
+define float @fold16(float %x, float %y) {
+  %cmp = fcmp ogt float %x, %y
+  %plus = fadd fast float %x, %y
+  %minus = fsub fast float %x, %y
+  %r = select i1 %cmp, float %plus, float %minus
+  ret float %r
+; CHECK: fold16
+; CHECK: fsub fast float
+; CHECK: select
+; CHECK: fadd fast float
+; CHECK: ret
+}
+
+
+
 ; =========================================================================
 ;
 ;   Testing-cases about fmul begin
@@ -223,6 +259,14 @@ define float @fmul3(float %f1, float %f2) {
 ; CHECK: fmul fast float %f1, 3.000000e+00
 }
 
+define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) {
+  %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
+  %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
+  ret <4 x float> %t3
+; CHECK-LABEL: @fmul3_vec(
+; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+}
+
 ; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
 ; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
 ;
@@ -309,6 +353,15 @@ define float @fdiv2(float %x) {
 ; CHECK: fmul fast float %x, 0x3FE0B21660000000
 }
 
+define <2 x float> @fdiv2_vec(<2 x float> %x) {
+  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
+  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
+  ret <2 x float> %div1
+
+; CHECK-LABEL: @fdiv2_vec(
+; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00>
+}
+
 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
 ;
 define float @fdiv3(float %x) {
diff --git a/test/Transforms/InstCombine/fdiv.ll b/test/Transforms/InstCombine/fdiv.ll
index 1edbc5e..af6a240 100644
--- a/test/Transforms/InstCombine/fdiv.ll
+++ b/test/Transforms/InstCombine/fdiv.ll
@@ -23,3 +23,29 @@ define float @test3(float %x) nounwind readnone ssp {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: fdiv float %x, 0x36A0000000000000
 }
+
+define float @test4(float %x) nounwind readnone ssp {
+  %div = fdiv fast float %x, 8.0
+  ret float %div
+
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: fmul fast float %x, 1.250000e-01
+}
+
+define float @test5(float %x, float %y, float %z) nounwind readnone ssp {
+  %div1 = fdiv fast float %x, %y
+  %div2 = fdiv fast float %div1, %z
+  ret float %div2
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: fmul fast
+; CHECK-NEXT: fdiv fast
+}
+
+define float @test6(float %x, float %y, float %z) nounwind readnone ssp {
+  %div1 = fdiv fast float %x, %y
+  %div2 = fdiv fast float %z, %div1
+  ret float %div2
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: fmul fast
+; CHECK-NEXT: fdiv fast
+}
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll
index 26f77a7..e500467 100644
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -170,6 +170,58 @@ define i32 @test14(float %x, float %y) nounwind uwtable {
 ; CHECK-NEXT: fcmp oeq float %truncf, %y
 }
 
+define i32 @test15(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fmin(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %fminf = call float @fminf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %fminf, %z
+}
+
+define i32 @test16(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %z to double
+  %2 = fpext float %x to double
+  %3 = fpext float %y to double
+  %4 = call double @fmin(double %2, double %3) nounwind
+  %5 = fcmp oeq double %1, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %fminf = call float @fminf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %fminf, %z
+}
+
+define i32 @test17(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fmax(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %fmaxf = call float @fmaxf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %fmaxf, %z
+}
+
+define i32 @test18(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %z to double
+  %2 = fpext float %x to double
+  %3 = fpext float %y to double
+  %4 = call double @fmax(double %2, double %3) nounwind
+  %5 = fcmp oeq double %1, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: %fmaxf = call float @fmaxf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %fmaxf, %z
+}
+
 declare double @fabs(double) nounwind readnone
 declare double @ceil(double) nounwind readnone
 declare double @floor(double) nounwind readnone
@@ -177,3 +229,5 @@ declare double @nearbyint(double) nounwind readnone
 declare double @rint(double) nounwind readnone
 declare double @round(double) nounwind readnone
 declare double @trunc(double) nounwind readnone
+declare double @fmin(double, double) nounwind readnone
+declare double @fmax(double, double) nounwind readnone
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index 402ee52..18cbf9d 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -24,10 +24,10 @@ define float @test2(float %x) {
 define float @test3(float %x, float %y) {
   %sub1 = fsub float -0.000000e+00, %x
   %sub2 = fsub float -0.000000e+00, %y
-  %mul = fmul float %sub1, %sub2
+  %mul = fmul fast float %sub1, %sub2
   ret float %mul
 ; CHECK-LABEL: @test3(
-; CHECK: fmul float %x, %y
+; CHECK: fmul fast float %x, %y
 }
 
 ; (0.0 - X) * (0.0 - Y) => X * Y
@@ -93,3 +93,33 @@ for.body:                                         ; preds = %for.cond
 for.end:                                          ; preds = %for.cond
   ret void
 }
+
+; X * -1.0 => -0.0 - X
+define float @test9(float %x) {
+  %mul = fmul float %x, -1.0
+  ret float %mul
+
+; CHECK-LABEL: @test9(
+; CHECK-NOT: fmul
+; CHECK: fsub
+}
+
+; PR18532
+define <4 x float> @test10(<4 x float> %x) {
+  %mul = fmul <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  ret <4 x float> %mul
+
+; CHECK-LABEL: @test10(
+; CHECK-NOT: fmul
+; CHECK: fsub
+}
+
+define float @test11(float %x, float %y) {
+  %a = fadd fast float %x, 1.0
+  %b = fadd fast float %y, 2.0
+  %c = fadd fast float %a, %b
+  ret float %c
+; CHECK-LABEL: @test11(
+; CHECK-NOT: fadd float
+; CHECK: fadd fast float
+}
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 05d1b48..9be66fd 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -31,6 +31,15 @@ define half @test4(float %a) {
   ret half %c
 }
 
+; CHECK: test4-fast
+define half @test4-fast(float %a) {
+; CHECK: fptrunc
+; CHECK: fsub fast
+  %b = fsub fast float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
 ; CHECK: test5
 define half @test5(float %a, float %b, float %c) {
 ; CHECK: fcmp ogt
diff --git a/test/Transforms/InstCombine/fpextend.ll b/test/Transforms/InstCombine/fpextend.ll
index 70e0c62..8640cd2 100644
--- a/test/Transforms/InstCombine/fpextend.ll
+++ b/test/Transforms/InstCombine/fpextend.ll
@@ -1,3 +1,4 @@
+
 ; RUN: opt < %s -instcombine -S | not grep fpext
 @X = external global float 
 @Y = external global float
@@ -12,6 +13,18 @@ entry:
 	ret void
 }
 
+define void @test2() nounwind  {
+entry:
+	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = load float* @Y, align 4		; <float> [#uses=1]
+	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
+	%tmp5 = fmul double %tmp1, %tmp23		; <double> [#uses=1]
+	%tmp56 = fptrunc double %tmp5 to float		; <float> [#uses=1]
+	store float %tmp56, float* @X, align 4
+	ret void
+}
+
 define void @test3() nounwind  {
 entry:
 	%tmp = load float* @X, align 4		; <float> [#uses=1]
@@ -33,4 +46,3 @@ entry:
 	store float %tmp34, float* @X, align 4
 	ret void
 }
-
diff --git a/test/Transforms/InstCombine/fpextend_x86.ll b/test/Transforms/InstCombine/fpextend_x86.ll
new file mode 100644
index 0000000..e012551
--- /dev/null
+++ b/test/Transforms/InstCombine/fpextend_x86.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -S | FileCheck %s
+target triple = "x86_64-apple-macosx"
+
+define double @test1(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fadd x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test1
+; CHECK: fadd x86_fp80
+; CHECK: ret
+}
+
+define double @test2(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fsub x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test2
+; CHECK: fsub x86_fp80
+; CHECK: ret
+}
+
+define double @test3(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fmul x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test3
+; CHECK: fmul x86_fp80
+; CHECK: ret
+}
+
+define double @test4(double %a, half %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext half %b to x86_fp80
+  %wr = fmul x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test4
+; CHECK: fmul double
+; CHECK: ret
+}
+
+define double @test5(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fdiv x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test5
+; CHECK: fdiv x86_fp80
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 3f6a314..6741345 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -56,18 +56,18 @@ define void @test_simplify4(%FILE* %fp) {
 ; CHECK-IPRINTF-LABEL: @test_simplify4(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
-; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify1(%FILE* %fp) {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
-; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify2(%FILE* %fp, double %d) {
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index c29a7dc..ef0cb29 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
@@ -728,6 +728,19 @@ define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %
   ret i64 %x
 }
 
+; gep should be done in the original address space.
+define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
+; CHECK-NEXT: %t = addrspacecast double*
+; CHECK: load i64 addrspace(3)* %t
+  %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64 addrspace(3)* %cast, i64 %V
+  %x = load i64 addrspace(3)* %t
+  ret i64 %x
+}
+
 ; The element size of the array is different the element size of the pointer
 define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
 ; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
@@ -789,4 +802,13 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
+define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind {
+; CHECK-LABEL: @ascast_0_gep(
+; CHECK-NOT: getelementptr
+; CHECK: ret
+  %gep = getelementptr [128 x i32]* %p, i32 0, i32 0
+  %x = addrspacecast i32* %gep to i32 addrspace(1)*
+  ret i32 addrspace(1)* %x
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
new file mode 100644
index 0000000..8929c82
--- /dev/null
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define <1 x i8> @test1(<8 x i8> %in) {
+; CHECK-LABEL: @test1
+; CHECK: shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> <i32 5>
+  %val = extractelement <8 x i8> %in, i32 5
+  %vec = insertelement <1 x i8> undef, i8 %val, i32 0
+  ret <1 x i8> %vec
+}
+
+define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
+; CHECK-LABEL: @test2
+; CHECK: shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> <i32 11, i32 9, i32 0, i32 10>
+  %elt0 = extractelement <8 x i16> %in, i32 3
+  %elt1 = extractelement <8 x i16> %in, i32 1
+  %elt2 = extractelement <8 x i16> %in2, i32 0
+  %elt3 = extractelement <8 x i16> %in, i32 2
+
+  %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0
+  %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
+  %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
+  %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
+
+  ret <4 x i16> %vec.3
+}
+
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: @test_vcopyq_lane_p64
+; CHECK: extractelement
+; CHECK: insertelement
+; CHECK-NOT: shufflevector
+entry:
+  %elt = extractelement <1 x i64> %b, i32 0
+  %res = insertelement <2 x i64> %a, i64 %elt, i32 1
+  ret <2 x i64> %res
+}
+
diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll
new file mode 100644
index 0000000..fd6339c
--- /dev/null
+++ b/test/Transforms/InstCombine/load-addrspace-cast.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-n8:16:32:64"
+
+define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
+; CHECK-LABEL: @pointer_to_addrspace_pointer(
+; CHECK: load
+; CHECK: addrspacecast
+  %y = bitcast i32 addrspace(1)** %x to i32**
+  %z = load i32** %y
+  ret i32* %z
+}
+
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 94fc118..d19bedc 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -181,3 +181,19 @@ define i32 @test19(i32 %A, i32 %B) {
   ret i32 %H
 ; CHECK: ret i32 0
 }
+
+define <2 x i64> @test20(<2 x i64> %A) {
+; CHECK-LABEL: @test20(
+        %B = add <2 x i64> %A, <i64 12, i64 14>
+        %C = mul <2 x i64> %B, <i64 3, i64 2>
+        ret <2 x i64> %C
+; CHECK: mul <2 x i64> %A, <i64 3, i64 2>
+; CHECK: add <2 x i64> %{{.}}, <i64 36, i64 28>
+}
+
+define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test21(
+        %C = mul <2 x i1> %A, %B
+        ret <2 x i1> %C
+; CHECK: %C = and <2 x i1> %A, %B
+}
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
index 51f955c..496d847 100644
--- a/test/Transforms/InstCombine/onehot_merge.ll
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -16,7 +16,7 @@ bb:
 
 ;CHECK: @foo1_and
 ;CHECK:  shl i32 1, %c1
-;CHECK-NEXT:  shl i32 1, %c2
+;CHECK-NEXT:  lshr i32 -2147483648, %c2
 ;CHECK-NEXT:  or i32
 ;CHECK-NEXT:  and i32
 ;CHECK-NEXT:  icmp ne i32 %1, %0
@@ -24,7 +24,7 @@ bb:
 define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
 bb:
   %tmp = shl i32 1, %c1
-  %tmp4 = shl i32 1, %c2
+  %tmp4 = lshr i32 -2147483648, %c2
   %tmp1 = and i32 %tmp, %k
   %tmp2 = icmp eq i32 %tmp1, 0
   %tmp5 = and i32 %tmp4, %k
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index 9f1d073..fb3b7d7 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -1,6 +1,11 @@
 ; Test that the pow library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefix=CHECK-NO-EXP10
 ; rdar://7251832
 
 ; NOTE: The readonly attribute on the pow call should be preserved
@@ -155,13 +160,33 @@ declare double @llvm.pow.f64(double %Val, double %Power)
 define double @test_simplify17(double %x) {
 ; CHECK-LABEL: @test_simplify17(
   %retval = call double @llvm.pow.f64(double %x, double 0.5)
-; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x)
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
   ret double %retval
 ; CHECK-NEXT: ret double [[SELECT]]
 }
 
+; Check pow(10.0, x) -> __exp10(x) on OS X 10.9+ and iOS 7.0+.
+
+define float @test_simplify18(float %x) {
+; CHECK-LABEL: @test_simplify18(
+  %retval = call float @powf(float 10.0, float %x)
+; CHECK-EXP10: [[EXP10F:%[_a-z0-9]+]] = call float @__exp10f(float %x) [[NUW_RO:#[0-9]+]]
+  ret float %retval
+; CHECK-EXP10: ret float [[EXP10F]]
+; CHECK-NO-EXP10: call float @powf
+}
+
+define double @test_simplify19(double %x) {
+; CHECK-LABEL: @test_simplify19(
+  %retval = call double @pow(double 10.0, double %x)
+; CHECK-EXP10: [[EXP10:%[_a-z0-9]+]] = call double @__exp10(double %x) [[NUW_RO]]
+  ret double %retval
+; CHECK-EXP10: ret double [[EXP10]]
+; CHECK-NO-EXP10: call double @pow
+}
+
 ; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
 
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index c98ddd5..483bc7a 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -87,18 +87,18 @@ define void @test_simplify7() {
 ; CHECK-IPRINTF-LABEL: @test_simplify7(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
-; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify1() {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
-; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify2(i8* %fmt, double %d) {
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 22fd90b..9f07702 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -204,3 +204,12 @@ define i32 @test19(i32 %x, i32 %y) {
 	%E = urem i32 %y, %D
 	ret i32 %E
 }
+
+define <2 x i64> @test20(<2 x i64> %X, <2 x i1> %C) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64>
+	%V = select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> <i64 8, i64 9>
+	%R = urem <2 x i64> %V, <i64 2, i64 3>
+	ret <2 x i64> %R
+}
diff --git a/test/Transforms/InstCombine/round.ll b/test/Transforms/InstCombine/round.ll
new file mode 100644
index 0000000..ecc62dd
--- /dev/null
+++ b/test/Transforms/InstCombine/round.ll
@@ -0,0 +1,90 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.round.f32(float) #0
+declare double @llvm.round.f64(double) #0
+
+; CHECK-LABEL: @constant_fold_round_f32_01
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_round_f32_01() #0 {
+  %x = call float @llvm.round.f32(float 1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_02
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_round_f32_02() #0 {
+  %x = call float @llvm.round.f32(float -1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_03
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_round_f32_03() #0 {
+  %x = call float @llvm.round.f32(float 1.5) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_04
+; CHECK-NEXT: ret float -2.000000e+00
+define float @constant_fold_round_f32_04() #0 {
+  %x = call float @llvm.round.f32(float -1.5) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_05
+; CHECK-NEXT: ret float 3.000000e+00
+define float @constant_fold_round_f32_05() #0 {
+  %x = call float @llvm.round.f32(float 2.75) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_06
+; CHECK-NEXT: ret float -3.000000e+00
+define float @constant_fold_round_f32_06() #0 {
+  %x = call float @llvm.round.f32(float -2.75) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_01
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_round_f64_01() #0 {
+  %x = call double @llvm.round.f64(double 1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_02
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_round_f64_02() #0 {
+  %x = call double @llvm.round.f64(double -1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_03
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_round_f64_03() #0 {
+  %x = call double @llvm.round.f64(double 1.5) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_04
+; CHECK-NEXT: ret double -2.000000e+00
+define double @constant_fold_round_f64_04() #0 {
+  %x = call double @llvm.round.f64(double -1.5) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_05
+; CHECK-NEXT: ret double 3.000000e+00
+define double @constant_fold_round_f64_05() #0 {
+  %x = call double @llvm.round.f64(double 2.7) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_06
+; CHECK-NEXT: ret double -3.000000e+00
+define double @constant_fold_round_f64_06() #0 {
+  %x = call double @llvm.round.f64(double -2.7) #0
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index 5b9deb4..832d958 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -19,3 +19,13 @@ define i32 @t2(i32 %c, i32 %x) nounwind {
        %t3 = select i1 %t1, i32 %t2, i32 %x
        ret i32 %t3
 }
+
+define float @t3(float %x, float %y) nounwind {
+  %t1 = fcmp ogt float %x, %y
+  %t2 = select i1 %t1, float %x, float 1.0
+  %t3 = fadd fast float %t2, 1.0
+  ret float %t3
+; CHECK-LABEL: @t3(
+; CHECK: fadd fast
+; CHECK: select
+}
diff --git a/test/Transforms/InstCombine/select-select.ll b/test/Transforms/InstCombine/select-select.ll
new file mode 100644
index 0000000..65820ac
--- /dev/null
+++ b/test/Transforms/InstCombine/select-select.ll
@@ -0,0 +1,24 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK: @foo1
+define float @foo1(float %a) #0 {
+; CHECK-NOT: xor
+  %b = fcmp ogt float %a, 0.000000e+00
+  %c = select i1 %b, float %a, float 0.000000e+00
+  %d = fcmp olt float %c, 1.000000e+00
+  %f = select i1 %d, float %c, float 1.000000e+00
+  ret float %f
+}
+
+; CHECK: @foo2
+define float @foo2(float %a) #0 {
+; CHECK-NOT: xor
+  %b = fcmp ogt float %a, 0.000000e+00
+  %c = select i1 %b, float %a, float 0.000000e+00
+  %d = fcmp olt float %c, 1.000000e+00
+  %e = select i1 %b, float %a, float 0.000000e+00
+  %f = select i1 %d, float %e, float 1.000000e+00
+  ret float %f
+}
+
+attributes #0 = { nounwind readnone ssp uwtable }
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index 95ed9b9..aa23d93 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -177,3 +177,41 @@ if.then:
 if.end:
   ret void
 }
+
+define void @test10(i32 %a) nounwind {
+  %1 = and i32 %a, 2
+  %2 = icmp eq i32 %1, 0
+  %3 = icmp ult i32 %a, 4
+  %or.cond = and i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %1 = icmp ult i32 %a, 2
+; CHECK-NEXT: br i1 %1, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test11(i32 %a) nounwind {
+  %1 = and i32 %a, 2
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp ugt i32 %a, 3
+  %or.cond = or i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: %1 = icmp ugt i32 %a, 1
+; CHECK-NEXT: br i1 %1, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
index 0d1a602..739827f 100644
--- a/test/Transforms/InstCombine/sincospi.ll
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -1,5 +1,6 @@
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
 ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
 ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
@@ -23,12 +24,12 @@ define float @test_instbased_f32() {
        %res = fadd float %sin, %cos
        ret float %res
 ; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
-; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float [[VAL]])
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
 
 ; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
-; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float [[VAL]])
 ; CHECK: extractvalue { float, float } [[SINCOS]], 0
 ; CHECK: extractvalue { float, float } [[SINCOS]], 1
 
@@ -41,11 +42,11 @@ define float @test_constant_f32() {
        %cos = call float @__cospif(float 1.0) #0
        %res = fadd float %sin, %cos
        ret float %res
-; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float 1.000000e+00)
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
 
-; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float 1.000000e+00)
 ; CHECK: extractvalue { float, float } [[SINCOS]], 0
 ; CHECK: extractvalue { float, float } [[SINCOS]], 1
 
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index 78dd7aa..afa38f3 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -77,18 +77,18 @@ define void @test_simplify6(i8* %dst) {
 ; CHECK-IPRINTF-LABEL: @test_simplify6(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
-; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify1(i8* %dst) {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
-; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
-; CHECK-NEXT-IPRINTF: ret void
+; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
index d2c9894..66b3e2e 100644
--- a/test/Transforms/InstCombine/strchr-1.ll
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -63,3 +63,16 @@ define void @test_simplify5() {
   store i8* %dst, i8** @chp
   ret void
 }
+
+; Check transformation strchr(p, 0) -> p + strlen(p)
+define void @test_simplify6(i8* %str) {
+; CHECK: %strlen = call i32 @strlen(i8* %str)
+; CHECK-NOT: call i8* @strchr
+; CHECK: %strchr = getelementptr i8* %str, i32 %strlen
+; CHECK: store i8* %strchr, i8** @chp, align 4
+; CHECK: ret void
+
+  %dst = call i8* @strchr(i8* %str, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 36c523b..41d803c8 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -391,4 +391,56 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
   ret i16 %sub
 }
 
-
+define <2 x i64> @test31(<2 x i64> %A) {
+  %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
+  %sub = sub <2 x i64> <i64 2, i64 3>, %xor
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: %sub = add <2 x i64> %A, <i64 3, i64 4>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
+
+define <2 x i64> @test32(<2 x i64> %A) {
+  %add = add <2 x i64> %A, <i64 -1, i64 -1>
+  %sub = sub <2 x i64> <i64 2, i64 3>, %add
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: %sub = sub <2 x i64> <i64 3, i64 4>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
+
+define <2 x i64> @test33(<2 x i1> %A) {
+  %ext = zext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> zeroinitializer, %ext
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: %sub = sext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
+
+define <2 x i64> @test34(<2 x i1> %A) {
+  %ext = sext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> zeroinitializer, %ext
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: %sub = zext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
+
+define <2 x i64> @test35(<2 x i64> %A) {
+  %mul = mul <2 x i64> %A, <i64 3, i64 4>
+  %sub = sub <2 x i64> %A, %mul
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: %sub = mul <2 x i64> %A, <i64 -2, i64 -3>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
+
+define <2 x i64> @test36(<2 x i64> %A) {
+  %shl = shl <2 x i64> %A, <i64 3, i64 4>
+  %sub = sub <2 x i64> %shl, %A
+  ret <2 x i64> %sub
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: %sub = mul <2 x i64> %A, <i64 7, i64 15>
+; CHECK-NEXT: ret <2 x i64> %sub
+}
diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll
index 3c98287..f6f9e01 100644
--- a/test/Transforms/InstCombine/vec_extract_var_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -16,3 +16,11 @@ define void @test (float %b, <8 x float> * %p)  {
   ret void    
 }
 
+; PR18600
+define i32 @test2(i32 %i) {
+  %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
+  ret i32 %e
+
+; CHECK-LABEL: @test2
+; CHECK: extractelement
+}
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll
index 73ec1f1..1d778a0 100644
--- a/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -36,10 +36,10 @@ for.cond:
   %input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
   br i1 undef, label %for.end, label %for.body
 
-; CHECK extractelement
+; CHECK: extractelement
 for.body:
   %dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
-  %sub44 = sub <3 x i32> zeroinitializer, %dec43
+  %sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
   %div45 = sdiv <3 x i32> %input_2.addr.0, %sub44
   br label %for.cond
 
diff --git a/test/Transforms/InstCombine/vec_sext.ll b/test/Transforms/InstCombine/vec_sext.ll
index d7ab96b..6f0d214 100644
--- a/test/Transforms/InstCombine/vec_sext.ll
+++ b/test/Transforms/InstCombine/vec_sext.ll
@@ -13,6 +13,7 @@ entry:
   %cond = or <4 x i32> %2, %3
   ret <4 x i32> %cond
 
+; CHECK-LABEL: @psignd_3
 ; CHECK:   ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
 ; CHECK:   sub nsw <4 x i32> zeroinitializer, %a
 ; CHECK:   xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -20,3 +21,25 @@ entry:
 ; CHECK:   and <4 x i32> %b.lobit, %sub
 ; CHECK:   or <4 x i32> %1, %2
 }
+
+define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b) nounwind ssp {
+entry:
+  %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = icmp slt <4 x i32> %sext, zeroinitializer
+  %sext3 = sext <4 x i1> %0 to <4 x i32>
+  %1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %2 = and <4 x i32> %a, %1
+  %3 = and <4 x i32> %sext3, %sub
+  %cond = or <4 x i32> %2, %3
+  ret <4 x i32> %cond
+
+; CHECK-LABEL: @test1
+; CHECK:   ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+; CHECK:   xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK:   sub nsw <4 x i32> zeroinitializer, %a
+; CHECK:   and <4 x i32> %b.lobit, %a
+; CHECK:   and <4 x i32> %b.lobit.not, %sub
+; CHECK:   or <4 x i32> %0, %1
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 3ee43dc..a409a91 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -228,3 +228,20 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
   ret <4 x float> %tmp5
 }
 
+define <1 x i32> @test16a(i32 %ele) {
+; CHECK-LABEL: @test16a(
+; CHECK-NEXT: ret <1 x i32> <i32 2>
+  %tmp0 = insertelement <2 x i32> <i32 1, i32 undef>, i32 %ele, i32 1
+  %tmp1 = shl <2 x i32> %tmp0, <i32 1, i32 1>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <1 x i32> <i32 0>
+  ret <1 x i32> %tmp2
+}
+
+define <4 x i8> @test16b(i8 %ele) {
+; CHECK-LABEL: @test16b(
+; CHECK-NEXT: ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
+  %tmp0 = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef, i8 1>, i8 %ele, i32 6
+  %tmp1 = shl <8 x i8> %tmp0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i8> %tmp2
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/zext.ll b/test/Transforms/InstCombine/zext.ll
index 10eabf7..b62c626 100644
--- a/test/Transforms/InstCombine/zext.ll
+++ b/test/Transforms/InstCombine/zext.ll
@@ -5,7 +5,41 @@ define i64 @test_sext_zext(i16 %A) {
         %c1 = zext i16 %A to i32                ; <i32> [#uses=1]
         %c2 = sext i32 %c1 to i64               ; <i64> [#uses=1]
         ret i64 %c2
+
+; CHECK-LABEL: @test_sext_zext
 ; CHECK-NOT: %c1
 ; CHECK: %c2 = zext i16 %A to i64
 ; CHECK: ret i64 %c2
 }
+
+define <2 x i64> @test2(<2 x i1> %A) {
+  %xor = xor <2 x i1> %A, <i1 true, i1 true>
+  %zext = zext <2 x i1> %xor to <2 x i64>
+  ret <2 x i64> %zext
+
+; CHECK-LABEL: @test2
+; CHECK-NEXT: zext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT: xor <2 x i64> %1, <i64 1, i64 1>
+}
+
+define <2 x i64> @test3(<2 x i64> %A) {
+  %trunc = trunc <2 x i64> %A to <2 x i32>
+  %and = and <2 x i32> %trunc, <i32 23, i32 42>
+  %zext = zext <2 x i32> %and to <2 x i64>
+  ret <2 x i64> %zext
+
+; CHECK-LABEL: @test3
+; CHECK-NEXT: and <2 x i64> %A, <i64 23, i64 42>
+}
+
+define <2 x i64> @test4(<2 x i64> %A) {
+  %trunc = trunc <2 x i64> %A to <2 x i32>
+  %and = and <2 x i32> %trunc, <i32 23, i32 42>
+  %xor = xor <2 x i32> %and, <i32 23, i32 42>
+  %zext = zext <2 x i32> %xor to <2 x i64>
+  ret <2 x i64> %zext
+
+; CHECK-LABEL: @test4
+; CHECK-NEXT: xor <2 x i64> %A, <i64 4294967295, i64 4294967295>
+; CHECK-NEXT: and <2 x i64> %1, <i64 23, i64 42>
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index abb3869..ee6be04 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -739,3 +739,21 @@ define i1 @non_inbounds_gep_compare2(i64* %a) {
   ret i1 %cmp
 ; CHECK-NEXT: ret i1 true
 }
+
+define <4 x i8> @vectorselectfold(<4 x i8> %a, <4 x i8> %b) {
+  %false = icmp ne <4 x i8> zeroinitializer, zeroinitializer
+  %sel = select <4 x i1> %false, <4 x i8> %a, <4 x i8> %b
+  ret <4 x i8> %sel
+
+; CHECK-LABEL: @vectorselectfold
+; CHECK-NEXT: ret <4 x i8> %b
+}
+
+define <4 x i8> @vectorselectfold2(<4 x i8> %a, <4 x i8> %b) {
+  %true = icmp eq <4 x i8> zeroinitializer, zeroinitializer
+  %sel = select <4 x i1> %true, <4 x i8> %a, <4 x i8> %b
+  ret <4 x i8> %sel
+
+; CHECK-LABEL: @vectorselectfold
+; CHECK-NEXT: ret <4 x i8> %a
+}
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index 23cd50f..181c2ef 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -153,3 +153,10 @@ define i64 @test18(i64 %a) {
   %r = call i64 (i64)* undef(i64 %a)
   ret i64 %r
 }
+
+; CHECK-LABEL: @test19
+; CHECK: ret <4 x i8> undef
+define <4 x i8> @test19(<4 x i8> %a) {
+  %b = shl <4 x i8> %a, <i8 8, i8 9, i8 undef, i8 -1>
+  ret <4 x i8> %b
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index 5ac1dde..1781463 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -1,4 +1,7 @@
-;RUN: opt -instsimplify -disable-output < %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
 declare void @helper(<2 x i8*>)
 define void @test(<2 x i8*> %a) {
   %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
@@ -6,3 +9,47 @@ define void @test(<2 x i8*> %a) {
   ret void
 }
 
+define <4 x i8*> @test1(<4 x i8*> %a) {
+  %gep = getelementptr <4 x i8*> %a, <4 x i32> zeroinitializer
+  ret <4 x i8*> %gep
+
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret <4 x i8*> %a
+}
+
+define <4 x i8*> @test2(<4 x i8*> %a) {
+  %gep = getelementptr <4 x i8*> %a
+  ret <4 x i8*> %gep
+
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret <4 x i8*> %a
+}
+
+%struct = type { double, float }
+
+define <4 x float*> @test3() {
+  %gep = getelementptr <4 x %struct*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x float*> %gep
+
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret <4 x float*> undef
+}
+
+%struct.empty = type { }
+
+define <4 x %struct.empty*> @test4(<4 x %struct.empty*> %a) {
+  %gep = getelementptr <4 x %struct.empty*> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x %struct.empty*> %gep
+
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret <4 x %struct.empty*> %a
+}
+
+define <4 x i8*> @test5() {
+  %c = inttoptr <4 x i64> <i64 1, i64 2, i64 3, i64 4> to <4 x i8*>
+  %gep = getelementptr <4 x i8*> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i8*> %gep
+
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret <4 x i8*> getelementptr (<4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+}
diff --git a/test/Transforms/Internalize/lists.ll b/test/Transforms/Internalize/lists.ll
index 83e441a2..548c8aa 100644
--- a/test/Transforms/Internalize/lists.ll
+++ b/test/Transforms/Internalize/lists.ll
@@ -1,7 +1,7 @@
 ; No arguments means internalize everything
 ; RUN: opt < %s -internalize -S | FileCheck --check-prefix=ALL %s
 
-; Non existent files should be treated as if they were empty (so internalize
+; Non-existent files should be treated as if they were empty (so internalize
 ; everything)
 ; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=ALL %s
 
@@ -48,3 +48,12 @@ define void @foo() {
 define available_externally void @bar() {
   ret void
 }
+
+; ALL: define dllexport void @export_foo() {
+; FOO_AND_J: define dllexport void @export_foo() {
+; FOO_AND_BAR: define dllexport void @export_foo() {
+; FOO_J_AND_BAR: define dllexport void @export_foo() {
+define dllexport void @export_foo() {
+  ret void
+}
+
diff --git a/test/Transforms/LICM/lcssa-ssa-promoter.ll b/test/Transforms/LICM/lcssa-ssa-promoter.ll
new file mode 100644
index 0000000..5df3ef1
--- /dev/null
+++ b/test/Transforms/LICM/lcssa-ssa-promoter.ll
@@ -0,0 +1,76 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+;
+; Manually validate LCSSA form is preserved even after SSAUpdater is used to
+; promote things in the loop bodies.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = common global i32 0, align 4
+@y = common global i32 0, align 4
+
+define void @PR18688() {
+; CHECK-LABEL: @PR18688(
+
+entry:
+  br i1 undef, label %return, label %outer.preheader
+
+outer.preheader:
+  br label %outer.header
+; CHECK: outer.preheader:
+; CHECK: br label %outer.header
+
+outer.header:
+  store i32 0, i32* @x, align 4
+  br i1 undef, label %outer.latch, label %inner.preheader
+; CHECK: outer.header:
+; CHECK-NEXT: br i1 undef, label %outer.latch, label %inner.preheader
+
+inner.preheader:
+  br label %inner.header
+; CHECK: inner.preheader:
+; CHECK-NEXT: br label %inner.header
+
+inner.header:
+  br i1 undef, label %inner.body.rhs, label %inner.latch
+; CHECK: inner.header:
+; CHECK-NEXT: %[[PHI0:[^,]+]] = phi i32 [ %{{[^,]+}}, %inner.latch ], [ 0, %inner.preheader ]
+; CHECK-NEXT: br i1 undef, label %inner.body.rhs, label %inner.latch
+
+inner.body.rhs:
+  store i32 0, i32* @x, align 4
+  br label %inner.latch
+; CHECK: inner.body.rhs:
+; CHECK-NEXT: br label %inner.latch
+
+inner.latch:
+  %y_val = load i32* @y, align 4
+  %icmp = icmp eq i32 %y_val, 0
+  br i1 %icmp, label %inner.exit, label %inner.header
+; CHECK: inner.latch:
+; CHECK-NEXT: %[[PHI1:[^,]+]] = phi i32 [ 0, %inner.body.rhs ], [ %[[PHI0]], %inner.header ]
+; CHECK-NEXT: br i1 %{{[^,]+}}, label %inner.exit, label %inner.header
+
+inner.exit:
+  br label %outer.latch
+; CHECK: inner.exit:
+; CHECK-NEXT: %[[INNER_LCSSA:[^,]+]] = phi i32 [ %[[PHI1]], %inner.latch ]
+; CHECK-NEXT: br label %outer.latch
+
+outer.latch:
+  br i1 undef, label %outer.exit, label %outer.header
+; CHECK: outer.latch:
+; CHECK-NEXT: %[[PHI2:[^,]+]] = phi i32 [ %[[INNER_LCSSA]], %inner.exit ], [ 0, %outer.header ]
+; CHECK-NEXT: br i1 {{.*}}, label %outer.exit, label %outer.header
+
+outer.exit:
+  br label %return
+; CHECK: outer.exit:
+; CHECK-NEXT: %[[OUTER_LCSSA:[^,]+]] = phi i32 [ %[[PHI2]], %outer.latch ]
+; CHECK-NEXT: store i32 %[[OUTER_LCSSA]]
+; CHECK-NEXT: br label %return
+
+return:
+  ret void
+}
+
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 92ef155..d7e7c6e 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -24,7 +24,8 @@ Loop:   ; preds = %Loop, %0
 Out:
   ret void
 ; CHECK: Out:
-; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %x2
+; CHECK-NEXT:   store i32 %[[LCSSAPHI]], i32* @X
 ; CHECK-NEXT:   ret void
 
 }
@@ -48,7 +49,8 @@ Loop:   ; preds = %Loop, %0
 Exit:   ; preds = %Loop
   ret void
 ; CHECK: Exit:
-; CHECK-NEXT:   store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %V
+; CHECK-NEXT:   store i32 %[[LCSSAPHI]], i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:   ret void
 }
 
@@ -142,7 +144,8 @@ Loop:   ; preds = %Loop, %0
 Out:
   ret void
 ; CHECK: Out:
-; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %x2
+; CHECK-NEXT:   store i32 %[[LCSSAPHI]], i32* @X
 ; CHECK-NEXT:   ret void
 
 }
@@ -178,7 +181,8 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK: for.body.lr.ph:
 ; CHECK-NEXT:  %gi.promoted = load i32* %gi, align 4, !tbaa !0
 ; CHECK: for.cond.for.end_crit_edge:
-; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %inc
+; CHECK-NEXT:  store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
 }
 
 !0 = metadata !{metadata !4, metadata !4, i64 0}
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index b503f96..ccc9186 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -53,7 +53,7 @@ Exit:
         
 ; CHECK-LABEL: @test3(
 ; CHECK:     Exit.loopexit:
-; CHECK-NEXT:  %X = add i32 0, 1
+; CHECK-NEXT:  %X.le = add i32 0, 1
 ; CHECK-NEXT:  br label %Exit
 
 }
@@ -76,8 +76,9 @@ Out:		; preds = %Loop
 	ret i32 %tmp.7
 ; CHECK-LABEL: @test4(
 ; CHECK:     Out:
-; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
-; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn
+; CHECK-NEXT:  mul i32 %N, %[[LCSSAPHI]]
+; CHECK-NEXT:  sub i32 %tmp.6.le, %N
 ; CHECK-NEXT:  ret i32
 }
 
@@ -100,8 +101,8 @@ Out:		; preds = %Loop
 	ret i32 %tmp.6
 ; CHECK-LABEL: @test5(
 ; CHECK:     Out:
-; CHECK-NEXT:  %tmp.6 = load i32* @X
-; CHECK-NEXT:  ret i32 %tmp.6
+; CHECK-NEXT:  %tmp.6.le = load i32* @X
+; CHECK-NEXT:  ret i32 %tmp.6.le
 }
 
 
@@ -124,9 +125,9 @@ Out:		; preds = %Loop
 	ret i32 %sunk2
 ; CHECK-LABEL: @test6(
 ; CHECK:     Out:
-; CHECK-NEXT:  %dead = getelementptr %Ty* @X2, i64 0, i32 0
-; CHECK-NEXT:  %sunk2 = load i32* %dead
-; CHECK-NEXT:  ret i32 %sunk2
+; CHECK-NEXT:  %dead.le = getelementptr %Ty* @X2, i64 0, i32 0
+; CHECK-NEXT:  %sunk2.le = load i32* %dead.le
+; CHECK-NEXT:  ret i32 %sunk2.le
 }
 
 
@@ -152,12 +153,14 @@ Out2:		; preds = %ContLoop
 	ret i32 %tmp.7
 ; CHECK-LABEL: @test7(
 ; CHECK:     Out1:
-; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
-; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn
+; CHECK-NEXT:  mul i32 %N, %[[LCSSAPHI]]
+; CHECK-NEXT:  sub i32 %tmp.6.le, %N
 ; CHECK-NEXT:  ret
 ; CHECK:     Out2:
-; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
-; CHECK-NEXT:  sub i32 %tmp.6
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn
+; CHECK-NEXT:  mul i32 %N, %[[LCSSAPHI]]
+; CHECK-NEXT:  sub i32 %tmp.6.le4, %N
 ; CHECK-NEXT:  ret
 }
 
@@ -183,8 +186,9 @@ exit2:		; preds = %Cont
 ; CHECK:     exit1:
 ; CHECK-NEXT:  ret i32 0
 ; CHECK:     exit2:
-; CHECK-NEXT:  %V = add i32 %X, 1
-; CHECK-NEXT:  ret i32 %V
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %X
+; CHECK-NEXT:  %V.le = add i32 %[[LCSSAPHI]], 1
+; CHECK-NEXT:  ret i32 %V.le
 }
 
 
@@ -208,7 +212,7 @@ return.i:		; preds = %no_exit.1.i
 
 ; CHECK-LABEL: @test9(
 ; CHECK: loopentry.3.i.preheader.loopexit:
-; CHECK-NEXT:  %inc.1.i = add i32 0, 1
+; CHECK-NEXT:  %inc.1.i.le = add i32 0, 1
 ; CHECK-NEXT:  br label %loopentry.3.i.preheader
 }
 
@@ -229,8 +233,9 @@ Out:		; preds = %Loop
         
 ; CHECK-LABEL: @test10(
 ; CHECK: Out: 
-; CHECK-NEXT:  %tmp.6 = sdiv i32 %N, %N_addr.0.pn
-; CHECK-NEXT:  ret i32 %tmp.6
+; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn
+; CHECK-NEXT:  %tmp.6.le = sdiv i32 %N, %[[LCSSAPHI]]
+; CHECK-NEXT:  ret i32 %tmp.6.le
 }
 
 ; Should delete, not sink, dead instructions.
@@ -246,4 +251,69 @@ Out:
 ; CHECK-NEXT:  ret void
 }
 
+@c = common global [1 x i32] zeroinitializer, align 4
 
+; Test a *many* way nested loop with multiple exit blocks both of which exit
+; multiple loop nests. This exercises LCSSA corner cases.
+define i32 @PR18753(i1* %a, i1* %b, i1* %c, i1* %d) {
+entry:
+  br label %l1.header
+
+l1.header:
+  %iv = phi i64 [ %iv.next, %l1.latch ], [ 0, %entry ]
+  %arrayidx.i = getelementptr inbounds [1 x i32]* @c, i64 0, i64 %iv
+  br label %l2.header
+
+l2.header:
+  %x0 = load i1* %c, align 4
+  br i1 %x0, label %l1.latch, label %l3.preheader
+
+l3.preheader:
+  br label %l3.header
+
+l3.header:
+  %x1 = load i1* %d, align 4
+  br i1 %x1, label %l2.latch, label %l4.preheader
+
+l4.preheader:
+  br label %l4.header
+
+l4.header:
+  %x2 = load i1* %a
+  br i1 %x2, label %l3.latch, label %l4.body
+
+l4.body:
+  call void @f(i32* %arrayidx.i)
+  %x3 = load i1* %b
+  %l = trunc i64 %iv to i32
+  br i1 %x3, label %l4.latch, label %exit
+
+l4.latch:
+  call void @g()
+  %x4 = load i1* %b, align 4
+  br i1 %x4, label %l4.header, label %exit
+
+l3.latch:
+  br label %l3.header
+
+l2.latch:
+  br label %l2.header
+
+l1.latch:
+  %iv.next = add nsw i64 %iv, 1
+  br label %l1.header
+
+exit:
+  %lcssa = phi i32 [ %l, %l4.latch ], [ %l, %l4.body ]
+; CHECK-LABEL: @PR18753(
+; CHECK:       exit:
+; CHECK-NEXT:    %[[LCSSAPHI:.*]] = phi i64 [ %iv, %l4.latch ], [ %iv, %l4.body ]
+; CHECK-NEXT:    %l.le = trunc i64 %[[LCSSAPHI]] to i32
+; CHECK-NEXT:    ret i32 %l.le
+
+  ret i32 %lcssa
+}
+
+declare void @f(i32*)
+
+declare void @g()
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
index 886d7f2..df7f0a9 100644
--- a/test/Transforms/LICM/volatile-alias.ll
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -4,7 +4,7 @@
 ; out of the loop.
 ; CHECK: load i32* %p
 ; CHECK: for.body:
-; CHECK; load volatile i32* %q
+; CHECK: load volatile i32* %q
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 314a149..3bd6d7a 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
 ; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: %exitcond = icmp eq i64 %indvar, 1499
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -131,7 +131,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -213,7 +213,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %1, %mul
 ; CHECK: store float %add, float* %arrayidx2, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -313,7 +313,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %2, %mul
 ; CHECK: store float %add, float* %arrayidx4, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
new file mode 100644
index 0000000..a45469b
--- /dev/null
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-none-linux"
+
+;void foo(int *A, int *B, int m, int n) {
+;  for (int i = m; i < n; i+=4) {
+;    A[i+0] = B[i+0] * 4;
+;    A[i+1] = B[i+1] * 4;
+;    A[i+2] = B[i+2] * 4;
+;    A[i+3] = B[i+3] * 4;
+;  }
+;}
+define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
+entry:
+  %cmp34 = icmp slt i32 %m, %n
+  br i1 %cmp34, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
+  %arrayidx = getelementptr inbounds i32* %B, i32 %i.035
+  %0 = load i32* %arrayidx, align 4
+  %mul = shl nsw i32 %0, 2
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035
+  store i32 %mul, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %i.035, 1
+  %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3
+  %1 = load i32* %arrayidx4, align 4
+  %mul5 = shl nsw i32 %1, 2
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3
+  store i32 %mul5, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %i.035, 2
+  %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8
+  %2 = load i32* %arrayidx9, align 4
+  %mul10 = shl nsw i32 %2, 2
+  %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8
+  store i32 %mul10, i32* %arrayidx12, align 4
+  %add13 = add nsw i32 %i.035, 3
+  %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13
+  %3 = load i32* %arrayidx14, align 4
+  %mul15 = shl nsw i32 %3, 2
+  %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13
+  store i32 %mul15, i32* %arrayidx17, align 4
+  %add18 = add nsw i32 %i.035, 4
+  %cmp = icmp slt i32 %add18, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+; CHECK-LABEL: @foo
+; CHECK: for.body.preheader:                               ; preds = %entry
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %m
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %m, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:                                         ; preds = %for.body, %for.body.preheader
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %m, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds i32* %B, i32 %6
+; CHECK:   %7 = load i32* %arrayidx, align 4
+; CHECK:   %mul = shl nsw i32 %7, 2
+; CHECK:   %arrayidx2 = getelementptr inbounds i32* %A, i32 %6
+; CHECK:   store i32 %mul, i32* %arrayidx2, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
+
+;void daxpy_ur(int n,float da,float *dx,float *dy)
+;    {
+;    int m = n % 4;
+;    for (int i = m; i < n; i = i + 4)
+;        {
+;        dy[i]   = dy[i]   + da*dx[i];
+;        dy[i+1] = dy[i+1] + da*dx[i+1];
+;        dy[i+2] = dy[i+2] + da*dx[i+2];
+;        dy[i+3] = dy[i+3] + da*dx[i+3];
+;        }
+;    }
+define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
+entry:
+  %rem = srem i32 %n, 4
+  %cmp55 = icmp slt i32 %rem, %n
+  br i1 %cmp55, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
+  %arrayidx = getelementptr inbounds float* %dy, i32 %i.056
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056
+  %1 = load float* %arrayidx1, align 4
+  %mul = fmul float %1, %da
+  %add = fadd float %0, %mul
+  store float %add, float* %arrayidx, align 4
+  %add3 = add nsw i32 %i.056, 1
+  %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %3, %da
+  %add8 = fadd float %2, %mul7
+  store float %add8, float* %arrayidx4, align 4
+  %add11 = add nsw i32 %i.056, 2
+  %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11
+  %4 = load float* %arrayidx12, align 4
+  %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11
+  %5 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %5, %da
+  %add16 = fadd float %4, %mul15
+  store float %add16, float* %arrayidx12, align 4
+  %add19 = add nsw i32 %i.056, 3
+  %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19
+  %6 = load float* %arrayidx20, align 4
+  %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19
+  %7 = load float* %arrayidx22, align 4
+  %mul23 = fmul float %7, %da
+  %add24 = fadd float %6, %mul23
+  store float %add24, float* %arrayidx20, align 4
+  %add27 = add nsw i32 %i.056, 4
+  %cmp = icmp slt i32 %add27, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @daxpy_ur
+; CHECK: for.body.preheader:
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %rem
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %rem, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %rem, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds float* %dy, i32 %6
+; CHECK:   %7 = load float* %arrayidx, align 4
+; CHECK:   %arrayidx1 = getelementptr inbounds float* %dx, i32 %6
+; CHECK:   %8 = load float* %arrayidx1, align 4
+; CHECK:   %mul = fmul float %8, %da
+; CHECK:   %add = fadd float %7, %mul
+; CHECK:   store float %add, float* %arrayidx, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
index aed7670..c9991c7 100644
--- a/test/Transforms/LoopReroll/reduction.ll
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -38,7 +38,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load i32* %arrayidx, align 4
 ; CHECK: %add = add nsw i32 %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load float* %arrayidx, align 4
 ; CHECK: %add = fadd float %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll b/test/Transforms/LoopRotate/PhiSelfReference-1.ll
index a1aa21b..aa1708e 100644
--- a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
+++ b/test/Transforms/LoopRotate/PhiSelfReference-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-; ModuleID = 'PhiSelfRefernce-1.bc'
+; ModuleID = 'PhiSelfReference-1.bc'
 
 define void @snrm2(i32 %incx) {
 entry:
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 9461980..50fc965 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -46,7 +46,11 @@ define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
 ; CHECK-LABEL: define void @FindFreeHorzSeg(
 ; CHECK: %dec = add
 ; CHECK-NEXT: tail call void @llvm.dbg.value
-; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end
+; CHECK-NEXT: br i1 %tobool, label %for.cond, label %[[LOOP_EXIT:[^,]*]]
+; CHECK: [[LOOP_EXIT]]:
+; CHECK-NEXT: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ]
+; CHECK-NEXT: br label %for.end
+
 
 entry:
   br label %for.cond
diff --git a/test/Transforms/LoopRotate/preserve-loop-simplify.ll b/test/Transforms/LoopRotate/preserve-loop-simplify.ll
new file mode 100644
index 0000000..53fa02a
--- /dev/null
+++ b/test/Transforms/LoopRotate/preserve-loop-simplify.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -loop-rotate < %s -verify-loop-info | FileCheck %s
+;
+; Verify that LoopRotate preserves LoopSimplify form even in very peculiar loop
+; structures. We manually validate the CFG with FileCheck because currently we
+; can't cause a failure when LoopSimplify fails to be preserved.
+
+define void @PR18643() {
+; CHECK-LABEL: @PR18643(
+entry:
+  br label %outer.header
+; CHECK: br label %outer.header
+
+outer.header:
+; CHECK: outer.header:
+  br i1 undef, label %inner.header, label %outer.body
+; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_PREROTATE_PREHEADER:[^,]*]], label %outer.body
+
+; CHECK: [[INNER_PREROTATE_PREHEADER]]:
+; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_PREROTATE_PREHEADER_SPLIT_RETURN:[^,]*]], label %[[INNER_ROTATED_PREHEADER:[^,]*]]
+
+; CHECK: [[INNER_ROTATED_PREHEADER]]:
+; CHECK-NEXT: br label %inner.body
+
+inner.header:
+; Now the latch!
+; CHECK: inner.header:
+  br i1 undef, label %return, label %inner.body
+; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_RETURN:[^,]*]], label %inner.body
+
+inner.body:
+; Now the header!
+; CHECK: inner.body:
+  br i1 undef, label %outer.latch, label %inner.latch
+; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_OUTER_LATCH:[^,]*]], label %inner.header
+
+inner.latch:
+; Dead!
+  br label %inner.header
+
+outer.body:
+; CHECK: outer.body:
+  br label %outer.latch
+; CHECK-NEXT: br label %outer.latch
+
+; L2 -> L1 exit edge needs a simplified exit block.
+; CHECK: [[INNER_SPLIT_OUTER_LATCH]]:
+; CHECK-NEXT: br label %outer.latch
+
+outer.latch:
+; CHECK: outer.latch:
+  br label %outer.header
+; CHECK-NEXT: br label %outer.header
+
+; L1 -> L0 exit edge need sa simplified exit block.
+; CHECK: [[INNER_PREROTATE_PREHEADER_SPLIT_RETURN]]:
+; CHECK-NEXT: br label %return
+
+; L2 -> L0 exit edge needs a simplified exit block.
+; CHECK: [[INNER_SPLIT_RETURN]]:
+; CHECK-NEXT: br label %return
+
+return:
+; CHECK: return:
+  unreachable
+}
diff --git a/test/Transforms/LoopSimplify/ashr-crash.ll b/test/Transforms/LoopSimplify/ashr-crash.ll
new file mode 100644
index 0000000..c58903d
--- /dev/null
+++ b/test/Transforms/LoopSimplify/ashr-crash.ll
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -loop-rotate -licm -instcombine -indvars -loop-unroll -S %s | FileCheck %s
+;
+; PR18361: ScalarEvolution::getAddRecExpr():
+;          Assertion `isLoopInvariant(Operands[i],...
+;
+; After a series of loop optimizations, SCEV's LoopDispositions grow stale.
+; In particular, LoopSimplify hoists %cmp4, resulting in this SCEV for %add:
+; {(zext i1 %cmp4 to i32),+,1}<nw><%for.cond1.preheader>
+;
+; When recomputing the SCEV for %ashr, we truncate the operands to get:
+; (zext i1 %cmp4 to i16)
+;
+; This SCEV was never mapped to a value so never invalidated. It's
+; loop disposition is still marked as non-loop-invariant, which is
+; inconsistent with the AddRec.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@d = common global i32 0, align 4
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Check that the def-use chain that leads to the bad SCEV is still
+; there.
+;
+; CHECK-LABEL: @foo
+; CHECK-LABEL: entry:
+; CHECK-LABEL: for.cond1.preheader:
+; CHECK-LABEL: for.body3:
+; CHECK: %cmp4.le.le
+; CHECK: %conv.le.le = zext i1 %cmp4.le.le to i32
+; CHECK: %xor.le.le = xor i32 %conv6.le.le, 1
+define void @foo() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc7, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  %f.0 = phi i32 [ undef, %entry ], [ %f.1, %for.inc7 ]
+  store i32 %storemerge, i32* @d, align 4
+  %cmp = icmp slt i32 %storemerge, 1
+  br i1 %cmp, label %for.cond1, label %for.end9
+
+for.cond1:                                        ; preds = %for.cond, %for.body3
+  %storemerge1 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ]
+  %f.1 = phi i32 [ %xor, %for.body3 ], [ %f.0, %for.cond ]
+  store i32 %storemerge1, i32* @a, align 4
+  %cmp2 = icmp slt i32 %storemerge1, 1
+  br i1 %cmp2, label %for.body3, label %for.inc7
+
+for.body3:                                        ; preds = %for.cond1
+  %0 = load i32* @c, align 4
+  %cmp4 = icmp sge i32 %storemerge1, %0
+  %conv = zext i1 %cmp4 to i32
+  %1 = load i32* @d, align 4
+  %add = add nsw i32 %conv, %1
+  %sext = shl i32 %add, 16
+  %conv6 = ashr exact i32 %sext, 16
+  %xor = xor i32 %conv6, 1
+  %inc = add nsw i32 %storemerge1, 1
+  br label %for.cond1
+
+for.inc7:                                         ; preds = %for.cond1
+  %2 = load i32* @d, align 4
+  %inc8 = add nsw i32 %2, 1
+  br label %for.cond
+
+for.end9:                                         ; preds = %for.cond
+  %cmp10 = icmp sgt i32 %f.0, 0
+  br i1 %cmp10, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.end9
+  store i32 0, i32* @b, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.end9
+  ret void
+}
diff --git a/test/Transforms/LoopSimplify/notify-scev.ll b/test/Transforms/LoopSimplify/notify-scev.ll
new file mode 100644
index 0000000..ee8e2ee
--- /dev/null
+++ b/test/Transforms/LoopSimplify/notify-scev.ll
@@ -0,0 +1,110 @@
+; RUN: opt -indvars -S %s | FileCheck %s
+;
+; PR18384: ValueHandleBase::ValueIsDeleted.
+;
+; Ensure that LoopSimplify calls ScalarEvolution::forgetLoop before
+; deleting a block, regardless of whether any values were hoisted out
+; of the block.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+%struct.Params = type { [2 x [4 x [16 x i16]]] }
+
+; Verify that the loop tail is deleted, and we don't crash!
+;
+; CHECK-LABEL: @t
+; CHECK-LABEL: for.cond127.preheader:
+; CHECK-NOT: for.cond127:
+; CHECK-LABEL: for.body129:
+define void @t() {
+entry:
+  br label %for.body102
+
+for.body102:
+  br i1 undef, label %for.cond127.preheader, label %for.inc203
+
+for.cond127.preheader:
+  br label %for.body129
+
+for.cond127:
+  %cmp128 = icmp slt i32 %inc191, 2
+  br i1 %cmp128, label %for.body129, label %for.end192
+
+for.body129:
+  %uv.013 = phi i32 [ 0, %for.cond127.preheader ], [ %inc191, %for.cond127 ]
+  %idxprom130 = sext i32 %uv.013 to i64
+  br i1 undef, label %for.cond135.preheader.lr.ph, label %for.end185
+
+for.cond135.preheader.lr.ph:
+  br i1 undef, label %for.cond135.preheader.lr.ph.split.us, label %for.cond135.preheader.lr.ph.split_crit_edge
+
+for.cond135.preheader.lr.ph.split_crit_edge:
+  br label %for.cond135.preheader.lr.ph.split
+
+for.cond135.preheader.lr.ph.split.us:
+  br label %for.cond135.preheader.us
+
+for.cond135.preheader.us:
+  %block_y.09.us = phi i32 [ 0, %for.cond135.preheader.lr.ph.split.us ], [ %add184.us, %for.cond132.us ]
+  br i1 true, label %for.cond138.preheader.lr.ph.us, label %for.end178.us
+
+for.end178.us:
+  %add184.us = add nsw i32 %block_y.09.us, 4
+  br i1 undef, label %for.end185split.us-lcssa.us, label %for.cond132.us
+
+for.end174.us:
+  br i1 undef, label %for.cond138.preheader.us, label %for.cond135.for.end178_crit_edge.us
+
+for.inc172.us:
+  br i1 undef, label %for.cond142.preheader.us, label %for.end174.us
+
+for.body145.us:
+  %arrayidx163.us = getelementptr inbounds %struct.Params* undef, i64 0, i32 0, i64 %idxprom130, i64 %idxprom146.us
+  br i1 undef, label %for.body145.us, label %for.inc172.us
+
+for.cond142.preheader.us:
+  %j.04.us = phi i32 [ %block_y.09.us, %for.cond138.preheader.us ], [ undef, %for.inc172.us ]
+  %idxprom146.us = sext i32 %j.04.us to i64
+  br label %for.body145.us
+
+for.cond138.preheader.us:
+  br label %for.cond142.preheader.us
+
+for.cond132.us:
+  br i1 undef, label %for.cond135.preheader.us, label %for.cond132.for.end185_crit_edge.us-lcssa.us
+
+for.cond138.preheader.lr.ph.us:
+  br label %for.cond138.preheader.us
+
+for.cond135.for.end178_crit_edge.us:
+  br label %for.end178.us
+
+for.end185split.us-lcssa.us:
+  br label %for.end185split
+
+for.cond132.for.end185_crit_edge.us-lcssa.us:
+  br label %for.cond132.for.end185_crit_edge
+
+for.cond135.preheader.lr.ph.split:
+  br label %for.end185split
+
+for.end185split:
+  br label %for.end185
+
+for.cond132.for.end185_crit_edge:
+  br label %for.end185
+
+for.end185:
+  %inc191 = add nsw i32 %uv.013, 1
+  br i1 false, label %for.end192, label %for.cond127
+
+for.end192:
+  br label %for.inc203
+
+for.inc203:
+  br label %for.end205
+
+for.end205:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
new file mode 100644
index 0000000..a499579
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
new file mode 100644
index 0000000..9a175ad
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid < %s | FileCheck %s
+; rdar://10232252
+; Prevent LSR of doing poor choice that cannot be folded in addressing mode
+
+; Remove the -pre-RA-sched=list-hybrid option after fixing:
+; <rdar://problem/12702735> [ARM64][coalescer] need better register
+; coalescing for simple unit tests.
+
+; CHECK: testCase
+; CHECK: %while.body{{$}}
+; CHECK: ldr [[STREG:x[0-9]+]], [{{x[0-9]+}}], #8
+; CHECK-NEXT: str [[STREG]], [{{x[0-9]+}}], #8
+; CHECK: %while.end
+define i32 @testCase() nounwind ssp {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ]
+  %pDst.05 = phi i64* [ inttoptr (i64 6442450944 to i64*), %entry ], [ %incdec.ptr1, %while.body ]
+  %pSrc.04 = phi i64* [ inttoptr (i64 4294967296 to i64*), %entry ], [ %incdec.ptr, %while.body ]
+  %incdec.ptr = getelementptr inbounds i64* %pSrc.04, i64 1
+  %tmp = load volatile i64* %pSrc.04, align 8
+  %incdec.ptr1 = getelementptr inbounds i64* %pDst.05, i64 1
+  store volatile i64 %tmp, i64* %pDst.05, align 8
+  %sub = add i64 %len.06, -8
+  %cmp = icmp sgt i64 %sub, -1
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:                                        ; preds = %while.body
+  tail call void inttoptr (i64 6442450944 to void ()*)() nounwind
+  ret i32 0
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
new file mode 100644
index 0000000..10b2c3a
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s
+; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys
+;
+; LSR on loop %while.cond should reassociate non-address mode
+; expressions at use %cmp16 to avoid sinking computation into %while.body18.
+;
+; Remove the -pre-RA-sched=list-hybrid option after fixing:
+; <rdar://problem/12702735> [ARM64][coalescer] need better register
+; coalescing for simple unit tests.
+
+; CHECK: @memset
+; CHECK: %while.body18{{$}}
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8
+; First set the IVREG variable, then use it
+; CHECK-NEXT: sub [[IVREG:x[0-9]+]],
+; CHECK: [[IVREG]], #8
+; CHECK-NEXT: cmp  [[IVREG]], #7
+; CHECK-NEXT: b.hi
+define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat {
+entry:
+  %cmp = icmp eq i64 %len, 0
+  br i1 %cmp, label %done, label %while.cond.preheader
+
+while.cond.preheader:                             ; preds = %entry
+  %conv = trunc i32 %val to i8
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %while.cond.preheader
+  %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ]
+  %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ]
+  %cond = icmp eq i64 %len.addr.0, 0
+  br i1 %cond, label %done, label %land.rhs
+
+land.rhs:                                         ; preds = %while.cond
+  %0 = ptrtoint i8* %ptr.0 to i64
+  %and = and i64 %0, 7
+  %cmp5 = icmp eq i64 %and, 0
+  br i1 %cmp5, label %if.end9, label %while.body
+
+while.body:                                       ; preds = %land.rhs
+  %incdec.ptr = getelementptr inbounds i8* %ptr.0, i64 1
+  store i8 %conv, i8* %ptr.0, align 1, !tbaa !0
+  %dec = add i64 %len.addr.0, -1
+  br label %while.cond
+
+if.end9:                                          ; preds = %land.rhs
+  %conv.mask = and i32 %val, 255
+  %1 = zext i32 %conv.mask to i64
+  %2 = shl nuw nsw i64 %1, 8
+  %ins18 = or i64 %2, %1
+  %3 = shl nuw nsw i64 %1, 16
+  %ins15 = or i64 %ins18, %3
+  %4 = shl nuw nsw i64 %1, 24
+  %5 = shl nuw nsw i64 %1, 32
+  %mask8 = or i64 %ins15, %4
+  %6 = shl nuw nsw i64 %1, 40
+  %mask5 = or i64 %mask8, %5
+  %7 = shl nuw nsw i64 %1, 48
+  %8 = shl nuw i64 %1, 56
+  %mask2.masked = or i64 %mask5, %6
+  %mask = or i64 %mask2.masked, %7
+  %ins = or i64 %mask, %8
+  %9 = bitcast i8* %ptr.0 to i64*
+  %cmp1636 = icmp ugt i64 %len.addr.0, 7
+  br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph
+
+while.body18:                                     ; preds = %if.end9, %while.body18
+  %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ]
+  %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ]
+  %incdec.ptr19 = getelementptr inbounds i64* %wideptr.038, i64 1
+  store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2
+  %sub = add i64 %len.addr.137, -8
+  %cmp16 = icmp ugt i64 %sub, 7
+  br i1 %cmp16, label %while.body18, label %while.end20
+
+while.end20:                                      ; preds = %while.body18
+  %cmp21 = icmp eq i64 %sub, 0
+  br i1 %cmp21, label %done, label %while.body29.lr.ph
+
+while.body29.lr.ph:                               ; preds = %while.end20, %if.end9
+  %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ]
+  %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ]
+  %10 = bitcast i64* %wideptr.0.lcssa48 to i8*
+  br label %while.body29
+
+while.body29:                                     ; preds = %while.body29, %while.body29.lr.ph
+  %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ]
+  %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ]
+  %dec26 = add i64 %len.addr.235, -1
+  %incdec.ptr31 = getelementptr inbounds i8* %ptr.134, i64 1
+  store i8 %conv, i8* %ptr.134, align 1, !tbaa !0
+  %cmp27 = icmp eq i64 %dec26, 0
+  br i1 %cmp27, label %done, label %while.body29
+
+done:                                             ; preds = %while.cond, %while.body29, %while.end20, %entry
+  ret i8* %dest
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"long long", metadata !0}
diff --git a/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
new file mode 100644
index 0000000..5506994
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
+entry:
+  br label %for.cond
+
+; SCEVExpander used to create induction variables in the loop %for.cond while
+; expanding the recurrence start value of loop strength reduced values from
+; %vector.body.
+
+; CHECK-LABEL: indvar_expansion
+; CHECK: for.cond:
+; CHECK-NOT: phi i3
+; CHECK: br i1 {{.+}}, label %for.cond
+
+for.cond:
+  %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
+  %cmp = icmp eq i8 undef, 0
+  %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
+  br i1 %cmp, label %for.cond, label %for.cond2
+
+for.cond2:
+  br i1 undef, label %for.cond2, label %for.body14.lr.ph
+
+for.body14.lr.ph:
+  %sext = shl i64 %indvars.iv44, 32
+  %0 = ashr exact i64 %sext, 32
+  %1 = sub i64 undef, %indvars.iv44
+  %2 = and i64 %1, 4294967295
+  %3 = add i64 %2, 1
+  %fold = add i64 %1, 1
+  %n.mod.vf = and i64 %fold, 7
+  %n.vec = sub i64 %3, %n.mod.vf
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
+  %4 = getelementptr inbounds i8* %rowsptr, i64 %index
+  %5 = bitcast i8* %4 to <4 x i8>*
+  %wide.load = load <4 x i8>* %5, align 1
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, %end.idx.rnd.down
+  br i1 %6, label %for.end24, label %vector.body
+
+for.end24:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
new file mode 100644
index 0000000..e7ebaa8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression.  In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK:    [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK:     {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+@j = common global i32 0, align 4
+@c = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i8 0, align 1
+@d = common global i32 0, align 4
+@i = common global i32 0, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  store i8 0, i8* @h, align 1
+  %0 = load i32* @j, align 4
+  %tobool.i = icmp eq i32 %0, 0
+  %1 = load i32* @d, align 4
+  %cmp3 = icmp sgt i32 %1, -1
+  %.lobit = lshr i32 %1, 31
+  %.lobit.not = xor i32 %.lobit, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %fn3.exit
+  %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+  %conv = sext i8 %inc9 to i32
+  br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %for.body
+  store i32 0, i32* @c, align 4
+  br label %fn3.exit
+
+fn3.exit:                                         ; preds = %for.body, %land.rhs.i
+  %inc = add i8 %inc9, 1
+  %cmp = icmp sgt i8 %inc, -1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %fn3.exit
+  %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+  store i32 %conv, i32* @g, align 4
+  store i32 %.lobit.not., i32* @i, align 4
+  store i8 %inc, i8* @h, align 1
+  %conv7 = sext i8 %inc to i32
+  %add = add nsw i32 %conv7, %conv
+  store i32 %add, i32* @e, align 4
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
index 255cf41..aa688d9 100644
--- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx"
 ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
 ; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
 ;
-; CHECK=LABEL: for.end:
+; CHECK-LABEL: for.end:
 ; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
 ; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
 ; CHECK: %f = ashr i32 %sext.us, 24
diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll
new file mode 100644
index 0000000..c38d6a6
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type.
+; CHECK-NOT: trunc i32 %.ph to i8
+
+%struct.anon = type { i32, i32, i32 }
+
+@c = global i32 1, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+@b = common global i32 0, align 4
+@a = common global %struct.anon zeroinitializer, align 4
+@e = common global %struct.anon zeroinitializer, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
+  %tobool7.i = icmp eq i32 %0, 0
+  %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  %f.promoted.i = load i32* @f, align 4, !tbaa !7
+  br label %for.body6.i.outer
+
+for.body6.i.outer:                                ; preds = %entry, %lor.end.i
+  %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ]
+  %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ]
+  %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ]
+  %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ]
+  br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i
+
+lbl.loopexit.i:                                   ; preds = %for.body6.i.outer, %lbl.loopexit.i
+  br label %lbl.loopexit.i
+
+if.end9.i:                                        ; preds = %for.body6.i.outer
+  %inc10.i = add i8 %p.addr.16.i.ph, 1
+  %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0
+  br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i
+
+lor.rhs.i:                                        ; preds = %if.end9.i
+  %1 = load i32* @b, align 4, !tbaa !7
+  %dec.i = add nsw i32 %1, -1
+  store i32 %dec.i, i32* @b, align 4, !tbaa !7
+  %tobool13.i = icmp ne i32 %1, 0
+  br label %lor.end.i
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %if.end9.i
+  %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ]
+  %lor.ext.i = zext i1 %2 to i32
+  %or14.i = or i32 %lor.ext.i, %or1410.i.ph
+  %or15.i = or i32 %or14.i, %or1512.i.ph
+  %add.i = add nsw i32 %.ph, 2
+  %cmp.i = icmp slt i32 %add.i, 21
+  br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit
+
+fn1.exit:                                         ; preds = %lor.end.i
+  store i32 0, i32* @g, align 4, !tbaa !7
+  store i32 %or14.i, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  store i32 %or15.i, i32* @f, align 4, !tbaa !7
+  store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
+  store i32 0, i32* @h, align 4, !tbaa !7
+  %3 = load i32* @b, align 4, !tbaa !7
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5 "}
+!1 = metadata !{metadata !2, metadata !3, i64 0}
+!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8}
+!3 = metadata !{metadata !"int", metadata !4, i64 0}
+!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0}
+!5 = metadata !{metadata !"Simple C/C++ TBAA"}
+!6 = metadata !{metadata !2, metadata !3, i64 8}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !2, metadata !3, i64 4}
diff --git a/test/Transforms/LoopUnroll/X86/lit.local.cfg b/test/Transforms/LoopUnroll/X86/lit.local.cfg
new file mode 100644
index 0000000..ba763cf
--- /dev/null
+++ b/test/Transforms/LoopUnroll/X86/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
new file mode 100644
index 0000000..15867cb
--- /dev/null
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -S -loop-unroll -mcpu=nehalem -x86-use-partial-unrolling=1 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mcpu=core -x86-use-partial-unrolling=1 | FileCheck -check-prefix=CHECK-NOUNRL %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds double* %b, i64 %index
+  %1 = bitcast double* %0 to <2 x double>*
+  %wide.load = load <2 x double>* %1, align 8
+  %.sum9 = or i64 %index, 2
+  %2 = getelementptr double* %b, i64 %.sum9
+  %3 = bitcast double* %2 to <2 x double>*
+  %wide.load8 = load <2 x double>* %3, align 8
+  %4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
+  %5 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00>
+  %6 = getelementptr inbounds double* %a, i64 %index
+  %7 = bitcast double* %6 to <2 x double>*
+  store <2 x double> %4, <2 x double>* %7, align 8
+  %.sum10 = or i64 %index, 2
+  %8 = getelementptr double* %a, i64 %.sum10
+  %9 = bitcast double* %8 to <2 x double>*
+  store <2 x double> %5, <2 x double>* %9, align 8
+  %index.next = add i64 %index, 4
+  %10 = icmp eq i64 %index.next, 1600
+  br i1 %10, label %for.end, label %vector.body
+
+; FIXME: We should probably unroll this loop by a factor of 2, but the cost
+; model needs to be fixed to account for instructions likely to be folded
+; as part of an addressing mode.
+; CHECK-LABEL: @foo
+; CHECK-NOUNRL-LABEL: @foo
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+define void @bar(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %v0 = getelementptr inbounds double* %b, i64 %index
+  %v1 = bitcast double* %v0 to <2 x double>*
+  %wide.load = load <2 x double>* %v1, align 8
+  %v4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
+  %v5 = fmul <2 x double> %v4, <double 8.000000e+00, double 8.000000e+00>
+  %v6 = getelementptr inbounds double* %a, i64 %index
+  %v7 = bitcast double* %v6 to <2 x double>*
+  store <2 x double> %v5, <2 x double>* %v7, align 8
+  %index.next = add i64 %index, 2
+  %v10 = icmp eq i64 %index.next, 1600
+  br i1 %v10, label %for.end, label %vector.body
+
+; FIXME: We should probably unroll this loop by a factor of 2, but the cost
+; model needs to first to fixed to account for instructions likely to be folded
+; as part of an addressing mode.
+
+; CHECK-LABEL: @bar
+; CHECK: fadd
+; CHECK-NEXT: fmul
+; CHECK: fadd
+; CHECK-NEXT: fmul
+
+; CHECK-NOUNRL-LABEL: @bar
+; CHECK-NOUNRL: fadd
+; CHECK-NOUNRL-NEXT: fmul
+; CHECK-NOUNRL-NOT: fadd
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
index 39363ab..8843fc2 100644
--- a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
 ; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFTUNROLL
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
@@ -30,3 +31,41 @@ define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
   %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
   ret i32 %sum.0.lcssa
 }
+
+; Verify the register limit. On arm we don't have 16 allocatable registers.
+;SWIFTUNROLL-LABEL: @register_limit(
+;SWIFTUNROLL: load i32
+;SWIFTUNROLL-NOT: load i32
+define i32 @register_limit(i32* nocapture %A, i32 %n) {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:
+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
+  %sum.02 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ]
+  %sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ]
+  %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
+  %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+  %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i32 %i.02
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, %sum.01
+  %5 = add nsw i32 %i.02, 1
+  %6 = add nsw i32 %3, %sum.02
+  %7 = add nsw i32 %3, %sum.03
+  %8 = add nsw i32 %3, %sum.04
+  %9 = add nsw i32 %3, %sum.05
+  %10 = add nsw i32 %3, %sum.05
+  %exitcond = icmp eq i32 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
+  %sum.1.lcssa = phi i32 [ 0, %0 ], [ %6, %.lr.ph ]
+  %sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ]
+  %sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ]
+  %sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  %sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll b/test/Transforms/LoopVectorize/ARM64/gather-cost.ll
new file mode 100644
index 0000000..bb28538
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM64/gather-cost.ll
@@ -0,0 +1,85 @@
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios -S -mcpu=cyclone < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+@kernel = global [512 x float] zeroinitializer, align 16
+@kernel2 = global [512 x float] zeroinitializer, align 16
+@kernel3 = global [512 x float] zeroinitializer, align 16
+@kernel4 = global [512 x float] zeroinitializer, align 16
+@src_data = global [1536 x float] zeroinitializer, align 16
+@r_ = global i8 0, align 1
+@g_ = global i8 0, align 1
+@b_ = global i8 0, align 1
+
+; We don't want to vectorize most loops containing gathers because they are
+; expensive.
+; Make sure we don't vectorize it.
+; CHECK-NOT: x float>
+
+define void @_Z4testmm(i64 %size, i64 %offset) {
+entry:
+  %cmp53 = icmp eq i64 %size, 0
+  br i1 %cmp53, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %r.057 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add10, %for.body ]
+  %g.056 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add20, %for.body ]
+  %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
+  %add = add i64 %v.055, %offset
+  %mul = mul i64 %add, 3
+  %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %mul
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i64 0, i64 %v.055
+  %1 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %1
+  %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i64 0, i64 %v.055
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul fast float %mul3, %2
+  %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i64 0, i64 %v.055
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %mul5, %3
+  %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i64 0, i64 %v.055
+  %4 = load float* %arrayidx8, align 4
+  %mul9 = fmul fast float %mul7, %4
+  %add10 = fadd fast float %r.057, %mul9
+  %arrayidx.sum = add i64 %mul, 1
+  %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float* %arrayidx11, align 4
+  %mul13 = fmul fast float %1, %5
+  %mul15 = fmul fast float %2, %mul13
+  %mul17 = fmul fast float %3, %mul15
+  %mul19 = fmul fast float %4, %mul17
+  %add20 = fadd fast float %g.056, %mul19
+  %arrayidx.sum52 = add i64 %mul, 2
+  %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float* %arrayidx21, align 4
+  %mul23 = fmul fast float %1, %6
+  %mul25 = fmul fast float %2, %mul23
+  %mul27 = fmul fast float %3, %mul25
+  %mul29 = fmul fast float %4, %mul27
+  %add30 = fadd fast float %b.054, %mul29
+  %inc = add i64 %v.055, 1
+  %exitcond = icmp ne i64 %inc, %size
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  %add30.lcssa = phi float [ %add30, %for.body ]
+  %add20.lcssa = phi float [ %add20, %for.body ]
+  %add10.lcssa = phi float [ %add10, %for.body ]
+  %phitmp = fptoui float %add10.lcssa to i8
+  %phitmp60 = fptoui float %add20.lcssa to i8
+  %phitmp61 = fptoui float %add30.lcssa to i8
+  br label %for.end
+
+for.end:
+  %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  store i8 %r.0.lcssa, i8* @r_, align 1
+  store i8 %g.0.lcssa, i8* @g_, align 1
+  store i8 %b.0.lcssa, i8* @b_, align 1
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
new file mode 100644
index 0000000..de86e54
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..2e46300
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
new file mode 100644
index 0000000..6cd9c4d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] }
+
+@global_data = external global %struct.GlobalData, align 16
+@ntimes = external hidden unnamed_addr global i32, align 4
+
+define signext i32 @s173() #0 {
+entry:
+  %0 = load i32* @ntimes, align 4
+  %cmp21 = icmp sgt i32 %0, 0
+  br i1 %cmp21, label %for.cond1.preheader, label %for.end12
+
+for.cond1.preheader:                              ; preds = %for.end, %entry
+  %nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
+  %1 = load float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
+  %2 = load float* %arrayidx5, align 4
+  %add = fadd float %1, %2
+  %3 = add nsw i64 %indvars.iv, 16000
+  %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
+  store float %add, float* %arrayidx8, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16000
+  br i1 %exitcond, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %for.body3
+  %inc11 = add nsw i32 %nl.022, 1
+  %4 = load i32* @ntimes, align 4
+  %mul = mul nsw i32 %4, 10
+  %cmp = icmp slt i32 %inc11, %mul
+  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+
+for.end12:                                        ; preds = %for.end, %entry
+  ret i32 0
+
+; CHECK-LABEL: @s173
+; CHECK: load <4 x float>*
+; CHECK: add i64 %index, 16000
+; CHECK: ret i32 0
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index 885418c..faed77d 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-loop-unrolling -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 ; We want to make sure that we don't even try to vectorize loops again
 ; The vectorizer used to mark the un-vectorized loop only as already vectorized
diff --git a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
new file mode 100644
index 0000000..529ed88
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -mcpu=core-avx2 -loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@float_array = common global [10000 x float] zeroinitializer, align 16
+@unsigned_array = common global [10000 x i32] zeroinitializer, align 16
+
+; If we need to scalarize the fptoui and then use inserts to build up the
+; vector again, then there is certainly no value in going 256-bit wide.
+; CHECK-NOT: vinserti128
+
+define void @convert(i32 %N) {
+entry:
+  %0 = icmp eq i32 %N, 0
+  br i1 %0, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [10000 x float]* @float_array, i64 0, i64 %indvars.iv
+  %1 = load float* %arrayidx, align 4
+  %conv = fptoui float %1 to i32
+  %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
new file mode 100644
index 0000000..ef3e3be
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -mcpu=core-avx2 -loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@n = global i32 10000, align 4
+@double_array = common global [10000 x double] zeroinitializer, align 16
+@unsigned_array = common global [10000 x i32] zeroinitializer, align 16
+
+; If we need to scalarize the fptoui and then use inserts to build up the
+; vector again, then there is certainly no value in going 256-bit wide.
+; CHECK-NOT: vpinsrd
+
+define void @convert() {
+entry:
+  %0 = load i32* @n, align 4
+  %cmp4 = icmp eq i32 %0, 0
+  br i1 %cmp4, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [10000 x double]* @double_array, i64 0, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %conv = fptoui double %1 to i32
+  %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp ult i32 %2, %0
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
new file mode 100644
index 0000000..23e6227
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+
+; CHECK: cost of 7 for VF 8 For instruction:   %conv = fptosi float %tmp to i8
+define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
+entry:
+  br label %for.body
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %tmp = load float* %arrayidx, align 4
+  %conv = fptosi float %tmp to i8
+  %arrayidx2 = getelementptr inbounds i8* %a, i64 %indvars.iv
+  store i8 %conv, i8* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
new file mode 100644
index 0000000..224823b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -0,0 +1,175 @@
+; RUN: opt < %s -mcpu=corei7 -O1 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1
+; RUN: opt < %s -mcpu=corei7 -O2 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O2
+; RUN: opt < %s -mcpu=corei7 -O3 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -Os -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Os
+; RUN: opt < %s -mcpu=corei7 -Oz -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Oz
+; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC
+; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC
+; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC2
+; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC2
+; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3DIS
+
+; This file tests the llvm.vectorizer.pragma forcing vectorization even when
+; optimization levels are too low, or when vectorization is disabled.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; O1-LABEL: @enabled(
+; O1: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @enabled(
+; O2: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @enabled(
+; O3: store <4 x i32>
+; O3: ret i32
+; Pragma always wins!
+; O3DIS-LABEL: @enabled(
+; O3DIS: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @enabled(
+; Os: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @enabled(
+; Oz: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @enabled(
+; O1VEC: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @enabled(
+; OzVEC: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @enabled(
+; O1VEC2: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @enabled(
+; OzVEC2: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %N
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32* %a, align 4
+  ret i32 %1
+}
+
+; O1-LABEL: @nopragma(
+; O1-NOT: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @nopragma(
+; O2: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @nopragma(
+; O3: store <4 x i32>
+; O3: ret i32
+; O3DIS-LABEL: @nopragma(
+; O3DIS-NOT: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @nopragma(
+; Os: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @nopragma(
+; Oz-NOT: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @nopragma(
+; O1VEC: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @nopragma(
+; OzVEC: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @nopragma(
+; O1VEC2: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @nopragma(
+; OzVEC2: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %N
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32* %a, align 4
+  ret i32 %1
+}
+
+; O1-LABEL: @disabled(
+; O1-NOT: store <4 x i32>
+; O1: ret i32
+; O2-LABEL: @disabled(
+; O2-NOT: store <4 x i32>
+; O2: ret i32
+; O3-LABEL: @disabled(
+; O3-NOT: store <4 x i32>
+; O3: ret i32
+; O3DIS-LABEL: @disabled(
+; O3DIS-NOT: store <4 x i32>
+; O3DIS: ret i32
+; Os-LABEL: @disabled(
+; Os-NOT: store <4 x i32>
+; Os: ret i32
+; Oz-LABEL: @disabled(
+; Oz-NOT: store <4 x i32>
+; Oz: ret i32
+; O1VEC-LABEL: @disabled(
+; O1VEC-NOT: store <4 x i32>
+; O1VEC: ret i32
+; OzVEC-LABEL: @disabled(
+; OzVEC-NOT: store <4 x i32>
+; OzVEC: ret i32
+; O1VEC2-LABEL: @disabled(
+; O1VEC2-NOT: store <4 x i32>
+; O1VEC2: ret i32
+; OzVEC2-LABEL: @disabled(
+; OzVEC2-NOT: store <4 x i32>
+; OzVEC2: ret i32
+
+define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %N
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32* %a, align 4
+  ret i32 %1
+}
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1}
+!2 = metadata !{metadata !2, metadata !3}
+!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index 14ac417..dfa4faa 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -115,6 +115,31 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
   ret void
 }
 
+; N is unknown, we need a tail. Can't vectorize because the loop is cold.
+;CHECK-LABEL: @example4(
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) {
+  %1 = icmp eq i32 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph, !prof !0
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %2 = add nsw i32 %.05, -1
+  %3 = getelementptr inbounds i32* %.023, i64 1
+  %4 = load i32* %.023, align 16
+  %5 = getelementptr inbounds i32* %.014, i64 1
+  store i32 %4, i32* %.014, align 16
+  %6 = icmp eq i32 %2, 0
+  br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
 
 ; We can't vectorize this one because we need a runtime ptr check.
 ;CHECK-LABEL: @example23(
diff --git a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
new file mode 100644
index 0000000..86c32b2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+
+; CHECK: cost of 20 for VF 2 For instruction:   %conv = uitofp i64 %tmp to double
+; CHECK: cost of 40 for VF 4 For instruction:   %conv = uitofp i64 %tmp to double
+define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
+entry:
+  br label %for.body
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i64* %a, i64 %indvars.iv
+  %tmp = load i64* %arrayidx, align 4
+  %conv = uitofp i64 %tmp to double
+  %arrayidx2 = getelementptr inbounds double* %b, i64 %indvars.iv
+  store double %conv, double* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index ea107dc..d5024bb 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -1,13 +1,26 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \
+; RUN:   | FileCheck %s --check-prefix=CHECK-VECTOR
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \
+; RUN:   | FileCheck %s --check-prefix=CHECK-SCALAR
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
-;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>
-;CHECK-NOT: load <4 x i32>
-;CHECK: store <4 x i32>
-;CHECK-NOT: store <4 x i32>
-;CHECK: ret
+
+; We don't unroll this loop because it has a small constant trip count.
+;
+; CHECK-VECTOR-LABEL: @foo(
+; CHECK-VECTOR: load <4 x i32>
+; CHECK-VECTOR-NOT: load <4 x i32>
+; CHECK-VECTOR: store <4 x i32>
+; CHECK-VECTOR-NOT: store <4 x i32>
+; CHECK-VECTOR: ret
+;
+; CHECK-SCALAR-LABEL: @foo(
+; CHECK-SCALAR: load i32*
+; CHECK-SCALAR-NOT: load i32*
+; CHECK-SCALAR: store i32
+; CHECK-SCALAR-NOT: store i32
+; CHECK-SCALAR: ret
 define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
   br label %1
 
@@ -26,10 +39,18 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
   ret i32 undef
 }
 
-;CHECK-LABEL: @bar(
-;CHECK: store <4 x i32>
-;CHECK: store <4 x i32>
-;CHECK: ret
+; But this is a good small loop to unroll as we don't know of a bound on its
+; trip count.
+;
+; CHECK-VECTOR-LABEL: @bar(
+; CHECK-VECTOR: store <4 x i32>
+; CHECK-VECTOR: store <4 x i32>
+; CHECK-VECTOR: ret
+;
+; CHECK-SCALAR-LABEL: @bar(
+; CHECK-SCALAR: store i32
+; CHECK-SCALAR: store i32
+; CHECK-SCALAR: ret
 define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
@@ -48,3 +69,32 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
 ._crit_edge:                                      ; preds = %.lr.ph, %0
   ret i32 undef
 }
+
+; Also unroll if we need a runtime check but it was going to be added for
+; vectorization anyways.
+; CHECK-VECTOR-LABEL: @runtime_chk(
+; CHECK-VECTOR: store <4 x float>
+; CHECK-VECTOR: store <4 x float>
+;
+; But not if the unrolling would introduce the runtime check.
+; CHECK-SCALAR-LABEL: @runtime_chk(
+; CHECK-SCALAR: store float
+; CHECK-SCALAR-NOT: store float
+define void @runtime_chk(float* %A, float* %B, float %N) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %mul = fmul float %0, %N
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  store float %mul, float* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index a4ebb42..21d0937 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -51,3 +51,29 @@ define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 ._crit_edge:                                      ; preds = %.lr.ph, %0
   ret i32 undef
 }
+
+; Make sure we copy fast math flags and use them for the final reduction.
+; CHECK-LABEL: fast_math
+; CHECK: load <4 x float>
+; CHECK: fadd fast <4 x float>
+; CHECK: br
+; CHECK: fadd fast <4 x float>
+; CHECK: fadd fast <4 x float>
+define float @fast_math(float* noalias %s) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float* %s, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %add = fadd fast float %q.04, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  %add.lcssa = phi float [ %add, %for.body ]
+  ret float %add.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index c45098d..0dfbab0 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK-LABEL: @foo(
-;CHECK: fadd <4 x float>
+;CHECK: fadd fast <4 x float>
 ;CHECK: ret
 define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index 0118fb4..d64d67f 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -387,7 +387,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias08(
-; CHECK: sub nsw <4 x i32>
+; CHECK: sub <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias08(i32 %a) #0 {
@@ -439,7 +439,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias09(
-; CHECK: sub nsw <4 x i32>
+; CHECK: sub <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias09(i32 %a) #0 {
@@ -491,7 +491,7 @@ for.end:                                          ; preds = %for.cond
 ;   return *(PA+a);
 ; }
 ; CHECK-LABEL: define i32 @noAlias10(
-; CHECK-NOT: sub nsw <4 x i32>
+; CHECK-NOT: sub {{.*}} <4 x i32>
 ; CHECK: ret
 ;
 ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
@@ -721,7 +721,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias14(
-; CHECK: sub nsw <4 x i32>
+; CHECK: sub <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias14(i32 %a) #0 {
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
new file mode 100644
index 0000000..7b0e181
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -0,0 +1,126 @@
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test predication of stores.
+define i32 @test(i32* nocapture %f) #0 {
+entry:
+  br label %for.body
+
+; VEC-LABEL: test
+; VEC:   %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC:   %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC:   %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
+; VEC:   %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
+; VEC:   %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC:   br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC: [[cond]]:
+; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC:   %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
+; VEC:   store i32 %[[v13]], i32* %[[v14]], align 4
+; VEC:   br label %[[else:.+]]
+;
+; VEC: [[else]]:
+; VEC:   %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
+; VEC:   %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC:   br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC: [[cond2]]:
+; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC:   %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
+; VEC:   store i32 %[[v17]], i32* %[[v18]], align 4
+; VEC:   br label %[[else2:.+]]
+;
+; VEC: [[else2]]:
+
+; UNROLL-LABEL: test
+; UNROLL: vector.body:
+; UNROLL:   %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
+; UNROLL:   %[[IND1:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 1
+; UNROLL:   %[[v0:[a-zA-Z0-9]+]] = getelementptr inbounds i32* %f, i64 %[[IND]]
+; UNROLL:   %[[v1:[a-zA-Z0-9]+]] = getelementptr inbounds i32* %f, i64 %[[IND1]]
+; UNROLL:   %[[v2:[a-zA-Z0-9]+]] = load i32* %[[v0]], align 4
+; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32* %[[v1]], align 4
+; UNROLL:   %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100
+; UNROLL:   %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
+; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
+; UNROLL:   %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
+; UNROLL:   %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[v4]], true
+; UNROLL:   br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
+;
+; UNROLL: [[cond]]:
+; UNROLL:   store i32 %[[v6]], i32* %[[v0]], align 4
+; UNROLL:   br label %[[else]]
+;
+; UNROLL: [[else]]:
+; UNROLL:   %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[v5]], true
+; UNROLL:   br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
+;
+; UNROLL: [[cond2]]:
+; UNROLL:   store i32 %[[v7]], i32* %[[v1]], align 4
+; UNROLL:   br label %[[else2]]
+;
+; UNROLL: [[else2]]:
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32* %f, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 100
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  %add = add nsw i32 %0, 20
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 128
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+; Track basic blocks when unrolling conditional blocks. This code used to assert
+; because we did not update the phi nodes with the proper predecessor in the
+; vectorized loop body.
+; PR18724
+
+; UNROLL-LABEL: bug18724
+; UNROLL: store i32
+; UNROLL: store i32
+
+define void @bug18724() {
+entry:
+  br label %for.body9
+
+for.body9:
+  br i1 undef, label %for.inc26, label %for.body14
+
+for.body14:
+  %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ]
+  %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ undef, %for.body9 ]
+  %arrayidx16 = getelementptr inbounds [768 x i32]* undef, i64 0, i64 %indvars.iv3
+  %tmp = load i32* %arrayidx16, align 4
+  br i1 undef, label %if.then18, label %for.inc23
+
+if.then18:
+  store i32 2, i32* %arrayidx16, align 4
+  %inc21 = add nsw i32 %iNewChunks.120, 1
+  br label %for.inc23
+
+for.inc23:
+  %iNewChunks.2 = phi i32 [ %inc21, %if.then18 ], [ %iNewChunks.120, %for.body14 ]
+  %indvars.iv.next4 = add nsw i64 %indvars.iv3, 1
+  %tmp1 = trunc i64 %indvars.iv3 to i32
+  %cmp13 = icmp slt i32 %tmp1, 0
+  br i1 %cmp13, label %for.body14, label %for.inc26
+
+for.inc26:
+  %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ]
+  unreachable
+}
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index d35bd58..71bedb7 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -34,7 +34,7 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
   ret void
 }
 
-; Can't vectorize this loop because the access to A[X] is non linear.
+; Can't vectorize this loop because the access to A[X] is non-linear.
 ;
 ;  for (i = 0; i < n; ++i) {
 ;    A[B[i]]++;
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 50c3b6b..ad2c663 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -75,7 +75,7 @@ loopexit:
 ; PR17532
 
 ; CHECK-LABEL: i8_loop
-; CHECK; icmp eq i32 {{.*}}, 256
+; CHECK: icmp eq i32 {{.*}}, 256
 define i32 @i8_loop() nounwind readnone ssp uwtable {
   br label %1
 
@@ -92,7 +92,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 }
 
 ; CHECK-LABEL: i16_loop
-; CHECK; icmp eq i32 {{.*}}, 65536
+; CHECK: icmp eq i32 {{.*}}, 65536
 
 define i32 @i16_loop() nounwind readnone ssp uwtable {
   br label %1
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
new file mode 100644
index 0000000..88a29c5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -0,0 +1,42 @@
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; We must not vectorize this loop. %add55 is not reduction. Its value is used
+; multiple times.
+
+; PR18526
+
+; CHECK: multiple_use_of_value
+; CHECK-NOT: <2 x i32>
+
+define void @multiple_use_of_value() {
+entry:
+  %n = alloca i32, align 4
+  %k7 = alloca i32, align 4
+  %nf = alloca i32, align 4
+  %0 = load i32* %k7, align 4
+  %.neg1 = sub i32 0, %0
+  %n.promoted = load i32* %n, align 4
+  %nf.promoted = load i32* %nf, align 4
+  br label %for.body
+
+for.body:
+  %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ]
+  %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ]
+  %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ]
+  %.neg2 = sub i32 0, %inc6
+  %add.neg = add i32 0, %add55
+  %add4.neg = add i32 %add.neg, %.neg1
+  %sub = add i32 %add4.neg, %.neg2
+  %add5 = add i32 %sub, %add55
+  %inc10 = add i32 %inc107, 1
+  %cmp = icmp ult i32 %inc10, 61
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  store i32 %add5.lcssa, i32* %n, align 4
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index a2b9ad9..e7b1e2a 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -7,11 +7,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
-;CHECK-NEXT: icmp uge
-;CHECK-NEXT: icmp uge
-;CHECK-NEXT: icmp uge
-;CHECK-NEXT: icmp uge
-;CHECK-NEXT: and
+;CHECK-DAG: icmp uge
+;CHECK-DAG: icmp uge
+;CHECK-DAG: icmp uge
+;CHECK-DAG: icmp uge
+;CHECK-DAG: and
+;CHECK-DAG: and
+;CHECK: br
 ;CHECK: ret
 define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) {
 entry:
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
index 33f128d..89f4678 100644
--- a/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -12,10 +12,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK-LABEL: @inc(
 ;CHECK: load i32*
 ;CHECK: load i32*
+;CHECK: load i32*
+;CHECK: load i32*
+;CHECK-NOT: load i32*
+;CHECK: add nsw i32
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK-NOT: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
 ;CHECK: store i32
 ;CHECK: store i32
+;CHECK-NOT: store i32
+;CHECK: add i64 %{{.*}}, 4
 ;CHECK: ret void
 define void @inc(i32 %n) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index e8d3728..6b06afa 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; PR16073
 
-; Because we were caching value pointers accross a function call that could RAUW
+; Because we were caching value pointers across a function call that could RAUW
 ; we would generate an undefined value store below:
 ; SCEVExpander::expandCodeFor would change a value (the start value of an
 ; induction) that we cached in the induction variable list.
diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll
new file mode 100644
index 0000000..51d20e2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -0,0 +1,87 @@
+; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: test
+define void @test(i32* noalias %A, i64 %AStride,
+                  i32* noalias %B, i32 %BStride,
+                  i32* noalias %C, i64 %CStride, i32 %N) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+; CHECK-DAG: icmp ne i64 %AStride, 1
+; CHECK-DAG: icmp ne i32 %BStride, 1
+; CHECK-DAG: icmp ne i64 %CStride, 1
+; CHECK: or
+; CHECK: or
+; CHECK: br
+
+; CHECK: vector.body
+; CHECK: load <2 x i32>
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %iv.trunc = trunc i64 %indvars.iv to i32
+  %mul = mul i32 %iv.trunc, %BStride
+  %mul64 = zext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i32* %B, i64 %mul64
+  %0 = load i32* %arrayidx, align 4
+  %mul2 = mul nsw i64 %indvars.iv, %CStride
+  %arrayidx3 = getelementptr inbounds i32* %C, i64 %mul2
+  %1 = load i32* %arrayidx3, align 4
+  %mul4 = mul nsw i32 %1, %0
+  %mul3 = mul nsw i64 %indvars.iv, %AStride
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 %mul3
+  store i32 %mul4, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; We used to crash on this function because we removed the fptosi cast when
+; replacing the symbolic stride '%conv'.
+; PR18480
+
+; CHECK-LABEL: fn1
+; CHECK: load <2 x double>
+
+define void @fn1(double* noalias %x, double* noalias %c, double %a) {
+entry:
+  %conv = fptosi double %a to i32
+  %cmp8 = icmp sgt i32 %conv, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %0 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %0, %conv
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds double* %x, i64 %idxprom
+  %1 = load double* %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds double* %c, i64 %indvars.iv
+  store double %1, double* %arrayidx3, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %conv
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
index 4331677..c319834 100644
--- a/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -3,7 +3,7 @@
 define i8 @cmpswap() {
 ; CHECK-LABEL: @cmpswap(
   %i = alloca i8
-  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic
+  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
 ; CHECK-NEXT: icmp
 ; CHECK-NEXT: select
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 955209a..e184cb0 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -245,6 +245,35 @@ return:                                           ; preds = %if.end, %if.then
 
 declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
 
+; CHECK-LABEL: @test9(
+define i32 @test9(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %expval, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i1 @llvm.expect.i1(i1, i1) nounwind readnone
+
 ; CHECK: !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
 ; CHECK: !1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
 ; CHECK: !2 = metadata !{metadata !"branch_weights", i32 4, i32 64, i32 4}
diff --git a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll b/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
deleted file mode 100644
index bddb702..0000000
--- a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
-
-define void @_ZNKSt11__use_cacheISt16__numpunct_cacheIcEEclERKSt6locale() {
-entry:
-	br i1 false, label %then, label %UnifiedReturnBlock
-then:		; preds = %entry
-	invoke void @_Znwj( )
-			to label %UnifiedReturnBlock unwind label %UnifiedReturnBlock
-UnifiedReturnBlock:		; preds = %then, %then, %entry
-	%UnifiedRetVal = phi i32* [ null, %entry ], [ null, %then ], [ null, %then ] ; <i32*> [#uses=0]
-	ret void
-}
-
-declare void @_Znwj()
-
diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
deleted file mode 100644
index 1057ad7..0000000
--- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
-
-declare void @ll_listnext__listiterPtr()
-
-define void @WorkTask.fn() {
-block0:
-	invoke void @ll_listnext__listiterPtr( )
-			to label %block9 unwind label %block8_exception_handling
-block8_exception_handling:		; preds = %block0
-	ret void
-block9:		; preds = %block0
-	%w_2690 = phi { i32, i32 }* [ null, %block0 ]		; <{ i32, i32 }*> [#uses=1]
-	%tmp.129 = getelementptr { i32, i32 }* %w_2690, i32 0, i32 1		; <i32*> [#uses=1]
-	%v2769 = load i32* %tmp.129		; <i32> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
deleted file mode 100644
index 9402046..0000000
--- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
-
-declare fastcc i32 @ll_listnext__listiterPtr()
-
-define fastcc i32 @WorkTask.fn() {
-block0:
-	%v2679 = invoke fastcc i32 @ll_listnext__listiterPtr( )
-			to label %block9 unwind label %block8_exception_handling	; <i32> [#uses=1]
-block8_exception_handling:		; preds = %block0
-	ret i32 0
-block9:		; preds = %block0
-	%i_2689 = phi i32 [ %v2679, %block0 ]		; <i32> [#uses=1]
-	ret i32 %i_2689
-}
-
diff --git a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll b/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
deleted file mode 100644
index b46ccfb..0000000
--- a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
-; PR2029
-define i32 @main(i32 %argc, i8** %argv) {
-bb470:
-        invoke i32 @main(i32 0, i8** null) to label %invcont474 unwind label
-%lpad902
-
-invcont474:             ; preds = %bb470
-        ret i32 0
-
-lpad902:                ; preds = %bb470
-        %tmp471.lcssa = phi i8* [ null, %bb470 ]                ; <i8*>
-        ret i32 0
-}
diff --git a/test/Transforms/LowerInvoke/basictest.ll b/test/Transforms/LowerInvoke/basictest.ll
deleted file mode 100644
index f0ca5f4..0000000
--- a/test/Transforms/LowerInvoke/basictest.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -lowerinvoke -disable-output -enable-correct-eh-support
-
-
-define i32 @foo() {
-	invoke i32 @foo( )
-			to label %Ok unwind label %Crap		; <i32>:1 [#uses=0]
-Ok:		; preds = %0
-	invoke i32 @foo( )
-			to label %Ok2 unwind label %Crap		; <i32>:2 [#uses=0]
-Ok2:		; preds = %Ok
-	ret i32 2
-Crap:		; preds = %Ok, %0
-	ret i32 1
-}
-
-define i32 @bar(i32 %blah) {
-	br label %doit
-doit:		; preds = %0
-        ;; Value live across an unwind edge.
-	%B2 = add i32 %blah, 1		; <i32> [#uses=1]
-	invoke i32 @foo( )
-			to label %Ok unwind label %Crap		; <i32>:1 [#uses=0]
-Ok:		; preds = %doit
-	invoke i32 @foo( )
-			to label %Ok2 unwind label %Crap		; <i32>:2 [#uses=0]
-Ok2:		; preds = %Ok
-	ret i32 2
-Crap:		; preds = %Ok, %doit
-	ret i32 %B2
-}
diff --git a/test/Transforms/LowerInvoke/lowerinvoke.ll b/test/Transforms/LowerInvoke/lowerinvoke.ll
new file mode 100644
index 0000000..05c19be
--- /dev/null
+++ b/test/Transforms/LowerInvoke/lowerinvoke.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -lowerinvoke -S | FileCheck %s
+
+declare i32 @external_func(i64 %arg)
+
+define i32 @invoke_test(i64 %arg) {
+entry:
+  %result = invoke fastcc i32 @external_func(i64 inreg %arg)
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %phi = phi i32 [ 99, %entry ]
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 %phi
+}
+
+; The "invoke" should be converted to a "call".
+; CHECK-LABEL: define i32 @invoke_test
+; CHECK: %result = call fastcc i32 @external_func(i64 inreg %arg)
+; CHECK-NEXT: br label %cont
+
+; Note that this pass does not remove dead landingpad blocks.
+; CHECK: lpad:
+; CHECK-NOT: phi
+; CHECK: landingpad
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index 7c7b4fc..d980b7f 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -272,3 +272,15 @@ define void @test9() nounwind {
 ; CHECK-LABEL: @test9(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
 }
+
+; PR19092
+define void @test10(i8* nocapture %P) nounwind {
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false)
+  ret void
+; CHECK-LABEL: @test10(
+; CHECK-NOT: memset
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
+; CHECK-NOT: memset
+; CHECK: ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy-undef.ll b/test/Transforms/MemCpyOpt/memcpy-undef.ll
new file mode 100644
index 0000000..663b8dc
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memcpy-undef.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.foo = type { i8, [7 x i8], i32 }
+
+define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable {
+  %bletch.sroa.1 = alloca [7 x i8], align 1
+  %1 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 0
+  store i8 98, i8* %1, align 4
+  %2 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 1, i64 0
+  %3 = getelementptr inbounds [7 x i8]* %bletch.sroa.1, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i32 1, i1 false)
+  %4 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 2
+  store i32 20, i32* %4, align 4
+  ret i32 undef
+
+; Check that the memcpy is removed.
+; CHECK-LABEL: @test1(
+; CHECK-NOT: call void @llvm.memcpy
+}
+
+define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
+  call void @llvm.lifetime.start(i64 8, i8* %in)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
+  ret void
+
+; Check that the memcpy is removed.
+; CHECK-LABEL: @test2(
+; CHECK-NOT: call void @llvm.memcpy
+}
+
+define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
+  call void @llvm.lifetime.start(i64 4, i8* %in)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
+  ret void
+
+; Check that the memcpy is not removed.
+; CHECK-LABEL: @test3(
+; CHECK: call void @llvm.memcpy
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 2417cd1..492c453 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -78,6 +78,7 @@ define void @test4(i8 *%P) {
 
 declare void @test4a(i8* align 1 byval)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
@@ -152,6 +153,22 @@ declare noalias i8* @malloc(i32)
 ; rdar://11341081
 %struct.big = type { [50 x i32] }
 
+define void @test9_addrspacecast() nounwind ssp uwtable {
+entry:
+; CHECK-LABEL: @test9_addrspacecast(
+; CHECK: f1
+; CHECK-NOT: memcpy
+; CHECK: f2
+  %b = alloca %struct.big, align 4
+  %tmp = alloca %struct.big, align 4
+  call void @f1(%struct.big* sret %tmp)
+  %0 = addrspacecast %struct.big* %b to i8 addrspace(1)*
+  %1 = addrspacecast %struct.big* %tmp to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i32 4, i1 false)
+  call void @f2(%struct.big* %b)
+  ret void
+}
+
 define void @test9() nounwind ssp uwtable {
 entry:
 ; CHECK: test9
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index 4020e10..6297af6 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -14,7 +14,9 @@ target triple = "x86_64-pc-linux-gnu"
 
 @func_7_xxx = alias weak i32 (...)* @aliased_func_7_xxx
 
-declare i32 @aliased_func_7_xxx(...)
+define i32 @aliased_func_7_xxx(...) {
+  ret i32 0
+}
 
 define i32 @func_3_xxx() nounwind uwtable ssp {
   ret i32 3
diff --git a/test/Transforms/ObjCARC/allocas.ll b/test/Transforms/ObjCARC/allocas.ll
index 5065673..7347a8f 100644
--- a/test/Transforms/ObjCARC/allocas.ll
+++ b/test/Transforms/ObjCARC/allocas.ll
@@ -28,7 +28,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata)
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
 
-; In the presense of allocas, unconditionally remove retain/release pairs only
+; In the presence of allocas, unconditionally remove retain/release pairs only
 ; if they are known safe in both directions. This prevents matching up an inner
 ; retain with the boundary guarding release in the following situation:
 ; 
@@ -336,7 +336,7 @@ bb3:
   ret void
 }
 
-; Make sure in the presense of allocas, if we find a cfghazard we do not perform
+; Make sure in the presence of allocas, if we find a cfghazard we do not perform
 ; code motion even if we are known safe. These two concepts are separate and
 ; should be treated as such.
 ;
diff --git a/test/Transforms/ObjCARC/contract-end-of-use-list.ll b/test/Transforms/ObjCARC/contract-end-of-use-list.ll
new file mode 100644
index 0000000..a38cd8a
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract-end-of-use-list.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S < %s -objc-arc-expand -objc-arc-contract | FileCheck %s
+; Don't crash.  Reproducer for a use_iterator bug from r203364.
+; rdar://problem/16333235
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.2.0"
+
+%struct = type { i8*, i8* }
+
+; CHECK-LABEL: @foo() {
+define internal i8* @foo() {
+entry:
+  %call = call i8* @bar()
+; CHECK: %retained1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call)
+  %retained1 = call i8* @objc_retain(i8* %call)
+  %isnull = icmp eq i8* %retained1, null
+  br i1 %isnull, label %cleanup, label %if.end
+
+if.end:
+; CHECK: %retained2 = call i8* @objc_retain(i8* %retained1)
+  %retained2 = call i8* @objc_retain(i8* %retained1)
+  br label %cleanup
+
+cleanup:
+  %retval = phi i8* [ %retained2, %if.end ], [ null, %entry ]
+  ret i8* %retval
+}
+
+declare i8* @bar()
+
+declare extern_weak i8* @objc_retain(i8*)
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 0728617..79e300c 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -24,11 +24,11 @@ target triple = "x86_64-apple-macosx10.9.0"
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1
 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 @__CFConstantStringClassReference = external global [0 x i32]
-@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
+@.str = private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
 @_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring"
 @"OBJC_CLASS_$_NSException" = external global %struct._class_t
 @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
-@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1
+@.str2 = private unnamed_addr constant [4 x i8] c"Foo\00", align 1
 @_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring"
 @"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
 @"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
new file mode 100644
index 0000000..84ac981
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll
new file mode 100644
index 0000000..3d6da12
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+define i64 @mismatched_intrinsics(<4 x i32> %in1, <2 x i32> %in2) nounwind {
+; CHECK-LABEL: @mismatched_intrinsics
+; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v4i32
+; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v2i32
+
+  %vaddlvq_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1) #2
+  %vaddlv_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in2) #2
+  %tst = icmp sgt i64 %vaddlvq_s32.i, %vaddlv_s32.i
+  %equal = sext i1 %tst to i64
+  ret i64 %equal
+}
+
+declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1)
+declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in1)
diff --git a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
new file mode 100644
index 0000000..c7ec98a
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -0,0 +1,65 @@
+; RUN: opt -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+
+; This test used to crash because we were following phi chains incorrectly.
+; We used indices to get the incoming value of two phi nodes rather than 
+; incoming block lookup.
+; This can give wrong results when the ordering of incoming
+; edges in the two phi nodes don't match.
+;CHECK-LABEL: bar
+
+%0 = type { %1, %2 }
+%1 = type { double, double }
+%2 = type { double, double }
+
+
+;define fastcc void @bar() {
+define void @bar() {
+  %1 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0
+  %2 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1
+  %3 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0
+  %4 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1
+  %5 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0
+  %6 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1
+  br label %7
+
+; <label>:7                                       ; preds = %18, %17, %17, %0
+  %8 = phi double [ 2.800000e+01, %0 ], [ %11, %18 ], [ %11, %17 ], [ %11, %17 ]
+  %9 = phi double [ 1.800000e+01, %0 ], [ %10, %18 ], [ %10, %17 ], [ %10, %17 ]
+  store double %9, double* %1, align 8
+  store double %8, double* %2, align 8
+  %10 = load double* %3, align 8
+  %11 = load double* %4, align 8
+  br i1 undef, label %12, label %13
+
+; <label>:12                                      ; preds = %7
+  ret void
+
+; <label>:13                                      ; preds = %7
+  store double %10, double* %5, align 8
+  store double %11, double* %6, align 8
+  br i1 undef, label %14, label %15
+
+; <label>:14                                      ; preds = %13
+  br label %15
+
+; <label>:15                                      ; preds = %14, %13
+  br i1 undef, label %16, label %17
+
+; <label>:16                                      ; preds = %15
+  unreachable
+
+; <label>:17                                      ; preds = %15
+  switch i32 undef, label %18 [
+    i32 32, label %7
+    i32 103, label %7
+  ]
+
+; <label>:18                                      ; preds = %17
+  br i1 undef, label %7, label %19
+
+; <label>:19                                      ; preds = %18
+  unreachable
+}
diff --git a/test/Transforms/SLPVectorizer/X86/extractcost.ll b/test/Transforms/SLPVectorizer/X86/extractcost.ll
new file mode 100644
index 0000000..01baf66
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK-LABEL: @foo(
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
+entry:
+  %mul = mul nsw i32 %n, 5
+  %add = add nsw i32 %mul, 9
+  store i32 %add, i32* %A, align 4
+  %mul1 = mul nsw i32 %n, 9
+  %add2 = add nsw i32 %mul1, 9
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 1
+  store i32 %add2, i32* %arrayidx3, align 4
+  %mul4 = shl i32 %n, 3
+  %add5 = add nsw i32 %mul4, 9
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 2
+  store i32 %add5, i32* %arrayidx6, align 4
+  %mul7 = mul nsw i32 %n, 10
+  %add8 = add nsw i32 %mul7, 9
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 3
+  store i32 %add8, i32* %arrayidx9, align 4
+  %externaluse1 = add nsw i32 %add, %m  
+  %externaluse2 = mul nsw i32 %add, %m  ; we should add the extract cost only once and the store will be vectorized
+  %add10 = add nsw i32 %externaluse1, %externaluse2
+  ret i32 %add10
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 43f7aed..7537ea3 100644
--- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer -slp-threshold=0 < %s | FileCheck %s -check-prefix=ZEROTHRESH
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
 
 target triple = "x86_64-apple-macosx10.8.0"
@@ -194,4 +195,28 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
   ret <4 x float> %rb
 }
 
+; Check that cost model for vectorization takes credit for
+; instructions that are erased.
+define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
+; ZEROTHRESH-LABEL: @take_credit(
+; ZEROTHRESH-CHECK: %1 = fadd <4 x float> %a, %b
+  %a0 = extractelement <4 x float> %a, i32 0
+  %b0 = extractelement <4 x float> %b, i32 0
+  %c0 = fadd float %a0, %b0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b1 = extractelement <4 x float> %b, i32 1
+  %c1 = fadd float %a1, %b1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %b2 = extractelement <4 x float> %b, i32 2
+  %c2 = fadd float %a2, %b2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b3 = extractelement <4 x float> %b, i32 3
+  %c3 = fadd float %a3, %b3
+  %v0 = insertelement <4 x float> undef, float %c0, i32 0
+  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
+  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
+  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
+  ret <4 x float> %v3
+}
+
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
new file mode 100644
index 0000000..2b7ee75
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+declare double @llvm.fabs.f64(double) nounwind readnone
+
+;CHECK-LABEL: @vec_fabs_f64(
+;CHECK: load <2 x double>
+;CHECK: load <2 x double>
+;CHECK: call <2 x double> @llvm.fabs.v2f64
+;CHECK: store <2 x double>
+;CHECK: ret
+define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
+  store double %call, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %call5, double* %arrayidx5, align 8
+  ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+;CHECK-LABEL: @vec_copysign_f32(
+;CHECK: load <4 x float>
+;CHECK: load <4 x float>
+;CHECK: call <4 x float> @llvm.copysign.v4f32
+;CHECK: store <4 x float>
+;CHECK: ret
+define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
+entry:
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+  store float %call0, float* %c, align 4
+
+  %ix2 = getelementptr inbounds float* %a, i64 1
+  %2 = load float* %ix2, align 4
+  %ix3 = getelementptr inbounds float* %b, i64 1
+  %3 = load float* %ix3, align 4
+  %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
+  %c1 = getelementptr inbounds float* %c, i64 1
+  store float %call1, float* %c1, align 4
+
+  %ix4 = getelementptr inbounds float* %a, i64 2
+  %4 = load float* %ix4, align 4
+  %ix5 = getelementptr inbounds float* %b, i64 2
+  %5 = load float* %ix5, align 4
+  %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
+  %c2 = getelementptr inbounds float* %c, i64 2
+  store float %call2, float* %c2, align 4
+
+  %ix6 = getelementptr inbounds float* %a, i64 3
+  %6 = load float* %ix6, align 4
+  %ix7 = getelementptr inbounds float* %b, i64 3
+  %7 = load float* %ix7, align 4
+  %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
+  %c3 = getelementptr inbounds float* %c, i64 3
+  store float %call3, float* %c3, align 4
+
+  ret void
+}
+
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/metadata.ll b/test/Transforms/SLPVectorizer/X86/metadata.ll
new file mode 100644
index 0000000..5bd2fa4
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK-LABEL: test1
+;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA:[0-9]+]]
+;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
+;CHECK: fmul <2 x double>{{.*}}!fpmath ![[FP1:[0-9]+]]
+;CHECK: store <2 x double>{{.*}}!tbaa ![[TBAA]]
+;CHECK: ret void
+
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8, !tbaa !4
+  %i1 = load double* %b, align 8, !tbaa !4
+  %mul = fmul double %i0, %i1, !fpmath !0
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %mul5 = fmul double %i3, %i4, !fpmath !0
+  store double %mul, double* %c, align 8, !tbaa !4
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8, !tbaa !4
+  ret void
+}
+
+;CHECK-LABEL: test2
+;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
+;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
+;CHECK: fmul <2 x double>{{.*}}!fpmath ![[FP2:[0-9]+]]
+;CHECK: store <2 x double>{{.*}}!tbaa ![[TBAA]]
+;CHECK: ret void
+
+define void @test2(double* %a, double* %b, i8* %e) {
+entry:
+  %i0 = load double* %a, align 8, !tbaa !4
+  %i1 = load double* %b, align 8, !tbaa !4
+  %mul = fmul double %i0, %i1, !fpmath !1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %mul5 = fmul double %i3, %i4, !fpmath !1
+  %c = bitcast i8* %e to double*
+  store double %mul, double* %c, align 8, !tbaa !4
+  %carrayidx5 = getelementptr inbounds i8* %e, i64 8
+  %arrayidx5 = bitcast i8* %carrayidx5 to double*
+  store double %mul5, double* %arrayidx5, align 8, !tbaa !4
+  ret void
+}
+
+;CHECK-DAG: ![[TBAA]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
+;CHECK-DAG: ![[FP1]] = metadata !{float 5.000000e+00}
+;CHECK-DAG: ![[FP2]] = metadata !{float 2.500000e+00}
+!0 = metadata !{ float 5.0 }
+!1 = metadata !{ float 2.5 }
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"omnipotent char", metadata !2}
+!4 = metadata !{metadata !"double", metadata !3}
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll
index 964e0e4..0c53b60 100644
--- a/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -221,7 +221,7 @@ entry:
 ; CHECK: load x86_fp80*
 ; CHECK: load x86_fp80*
 ; CHECK-NOT: insertelement <2 x x86_fp80>
-; CHECK_NOT: insertelement <2 x x86_fp80>
+; CHECK-NOT: insertelement <2 x x86_fp80>
   br i1 undef, label %then, label %end
 
 then:
diff --git a/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index 2747a1f..10c3130 100644
--- a/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -138,3 +138,18 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+
+; CHECK-LABEL: store_splat
+; CHECK: store <4 x float>
+define void @store_splat(float*, float) {
+  %3 = getelementptr inbounds float* %0, i64 0
+  store float %1, float* %3, align 4
+  %4 = getelementptr inbounds float* %0, i64 1
+  store float %1, float* %4, align 4
+  %5 = getelementptr inbounds float* %0, i64 2
+  store float %1, float* %5, align 4
+  %6 = getelementptr inbounds float* %0, i64 3
+  store float %1, float* %6, align 4
+  ret void
+}
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll
new file mode 100644
index 0000000..847f285
--- /dev/null
+++ b/test/Transforms/SROA/address-spaces.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+
+
+; Make sure an illegal bitcast isn't introduced
+define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
+; CHECK-LABEL: @test_address_space_1_1(
+; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
+; CHECK-LABEL: @test_address_space_1_0(
+; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16* %b to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
+; CHECK-LABEL: @test_address_space_0_1(
+; CHECK: load <2 x i64>* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64>* %a to i8*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] }
+
+; Function Attrs: nounwind
+define void @copy_struct([5 x i64] %in.coerce) {
+; CHECK-LABEL: @copy_struct(
+; CHECK-NOT: memcpy
+for.end:
+  %in = alloca %struct.struct_test_27.0.13, align 8
+  %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]*
+  store [5 x i64] %in.coerce, [5 x i64]* %0, align 8
+  %scevgep9 = getelementptr %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0
+  %scevgep910 = bitcast i32* %scevgep9 to i8*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false)
+  ret void
+}
+ 
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 5d3e4b5..dc2b165 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.lifetime.start(i64, i8* nocapture)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
@@ -404,6 +404,7 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
@@ -1150,6 +1151,24 @@ entry:
 ; CHECK: ret
 }
 
+define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
+; Make sure this the right address space pointer is used for type check.
+; CHECK-LABEL: @PR14105_as1(
+
+entry:
+  %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+  %gep = getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0
+
+  %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
+  %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+  ret void
+; CHECK: ret
+}
+
 define void @PR14465() {
 ; Ensure that we don't crash when analyzing a alloca larger than the maximum
 ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
@@ -1317,6 +1336,28 @@ define void @PR15805(i1 %a, i1 %b) {
   ret void
 }
 
+define void @PR15805.1(i1 %a, i1 %b) {
+; Same as the normal PR15805, but rigged to place the use before the def inside
+; of looping unreachable code. This helps ensure that we aren't sensitive to the
+; order in which the uses of the alloca are visited.
+;
+; CHECK-LABEL: @PR15805.1(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+  %c = alloca i64, align 8
+  br label %exit
+
+loop:
+  %cond.in = select i1 undef, i64* %c, i64* %p.0.c
+  %p.0.c = select i1 undef, i64* %c, i64* %c
+  %cond = load i64* %cond.in, align 8
+  br i1 undef, label %loop, label %exit
+
+exit:
+  ret void
+}
+
 define void @PR16651.1(i8* %a) {
 ; This test case caused a crash due to the volatile memcpy in combination with
 ; lowering to integer loads and stores of a width other than that of the original
@@ -1356,3 +1397,46 @@ entry:
   %cond105.i.i = load float* %cond105.in.i.i, align 8
   ret void
 }
+
+define void @test23(i32 %x) {
+; CHECK-LABEL: @test23(
+; CHECK-NOT: alloca
+; CHECK: ret void
+entry:
+  %a = alloca i32, align 4
+  store i32 %x, i32* %a, align 4
+  %gep1 = getelementptr inbounds i32* %a, i32 1
+  %gep0 = getelementptr inbounds i32* %a, i32 0
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cast0 = bitcast i32* %gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
+  ret void
+}
+
+define void @PR18615() {
+; CHECK-LABEL: @PR18615(
+; CHECK-NOT: alloca
+; CHECK: ret void
+entry:
+  %f = alloca i8
+  %gep = getelementptr i8* %f, i64 -1
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i32 1, i1 false)
+  ret void
+}
+
+define void @test24(i8* %src, i8* %dst) {
+; CHECK-LABEL: @test24(
+; CHECK: alloca i64, align 16
+; CHECK: load volatile i64* %{{[^,]*}}, align 1
+; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16
+; CHECK: load volatile i64* %{{[^,]*}}, align 16
+; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1
+
+entry:
+  %a = alloca i64, align 16
+  %ptr = bitcast i64* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 true)
+  ret void
+}
+
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 4f08421..9c9f6a1 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -150,6 +150,53 @@ entry:
 ; CHECK-NEXT: ret
 }
 
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
+
+; Same as test4 with a different sized address  space pointer source.
+define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
+; CHECK-LABEL: @test4_as1(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  %z.tmp1 = getelementptr inbounds <4 x i32> addrspace(1)* %z, i16 0, i16 2
+  %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false)
+  %tmp1 = load i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK:      %[[load:.*]] = load <4 x i32> addrspace(1)* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32> addrspace(1)* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32 addrspace(1)* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
 define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
 ; CHECK-LABEL: @test5(
 ; The same as the above, but with reversed source and destination for the
diff --git a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
new file mode 100644
index 0000000..cc7f0d4
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
@@ -0,0 +1,2 @@
+empty:100:0
+1.-3: 10
diff --git a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
new file mode 100644
index 0000000..abcb0ba
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
@@ -0,0 +1,3 @@
+3empty:100:BAD
+0: 0
+1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/bad_line_values.prof b/test/Transforms/SampleProfile/Inputs/bad_line_values.prof
new file mode 100644
index 0000000..61ba7c0
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_line_values.prof
@@ -0,0 +1,2 @@
+empty:100:0
+-1: 10
diff --git a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
new file mode 100644
index 0000000..50fe861
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
@@ -0,0 +1,3 @@
+double convert<std::string, float>(float):2909472:181842
+0: 181842
+1: 181842
diff --git a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
new file mode 100644
index 0000000..038c45f
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
@@ -0,0 +1,3 @@
+empty:100:0
+0: 0
+1: BAD
diff --git a/test/Transforms/SampleProfile/Inputs/bad_samples.prof b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
new file mode 100644
index 0000000..a121d8c
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
@@ -0,0 +1,2 @@
+empty:100:0
+1.3: -10
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
index d19894d..cd1cb5b 100644
--- a/test/Transforms/SampleProfile/Inputs/branch.prof
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -1,7 +1,4 @@
-symbol table
-1
-main
-main:15680:0:7
+main:15680:0
 0: 0
 4: 0
 7: 0
diff --git a/test/Transforms/SampleProfile/Inputs/calls.prof b/test/Transforms/SampleProfile/Inputs/calls.prof
new file mode 100644
index 0000000..57d3887
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/calls.prof
@@ -0,0 +1,10 @@
+_Z3sumii:105580:5279
+0: 5279
+1: 5279
+2: 5279
+main:225715:0
+2.1: 5553
+3: 5391
+# This indicates that at line 3 of this function, the 'then' branch
+# of the conditional is taken (discriminator '1').
+3.1: 5752  _Z3sumii:5860
diff --git a/test/Transforms/SampleProfile/Inputs/discriminator.prof b/test/Transforms/SampleProfile/Inputs/discriminator.prof
new file mode 100644
index 0000000..a6bcbc5
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/discriminator.prof
@@ -0,0 +1,8 @@
+foo:1000:0
+1: 1
+2: 1
+2.1: 100
+3: 100
+3.1: 5
+4: 100
+5: 1
diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof
new file mode 100644
index 0000000..b28609b
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/propagate.prof
@@ -0,0 +1,17 @@
+_Z3fooiil:58139:0
+0: 0
+1: 0
+2: 0
+4: 1
+5: 10
+6: 0
+7: 5
+8: 3
+9: 0
+10: 0
+11: 6339
+12: 16191
+13: 8141
+16: 1
+18: 0
+19: 0
diff --git a/test/Transforms/SampleProfile/Inputs/syntax.prof b/test/Transforms/SampleProfile/Inputs/syntax.prof
new file mode 100644
index 0000000..f373891
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/syntax.prof
@@ -0,0 +1,3 @@
+empty:100:0
+0: 0
+1: 100
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 5167627..65f1f17 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -46,8 +46,8 @@ if.end:                                           ; preds = %entry
   tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
   %cmp1 = icmp sgt i32 %call, 100, !dbg !35
   br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
-; CHECK: edge if.end -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
-; CHECK: edge if.end -> if.end6 probability is 1 / 2244 = 0.0445633%
+; CHECK: edge if.end -> for.body probability is 1 / 2 = 50%
+; CHECK: edge if.end -> if.end6 probability is 1 / 2 = 50%
 
 for.body:                                         ; preds = %if.end, %for.body
   %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
@@ -65,8 +65,8 @@ for.body:                                         ; preds = %if.end, %for.body
   tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
   %exitcond = icmp eq i32 %inc, %call, !dbg !38
   br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
-; CHECK: edge for.body -> if.end6 probability is 1 / 2244 = 0.0445633%
-; CHECK: edge for.body -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+; CHECK: edge for.body -> if.end6 probability is 1 / 10227 = 0.00977804
+; CHECK: edge for.body -> for.body probability is 10226 / 10227 = 99.9902% [HOT edge]
 
 if.end6:                                          ; preds = %for.body, %if.end
   %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
@@ -117,17 +117,17 @@ attributes #4 = { nounwind readonly }
 !16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
 !18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
-!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
 !21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
-!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
 !23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
-!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
 !25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
 !26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
 !27 = metadata !{i32 4, i32 0, metadata !4, null}
 !28 = metadata !{i32 5, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
 !30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
 !31 = metadata !{metadata !32, metadata !32, i64 0}
 !32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
new file mode 100644
index 0000000..381be87
--- /dev/null
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+;   return x + y;
+; }
+;
+; int main() {
+;   int s, i = 0;
+;   while (i++ < 20000 * 20000)
+;     if (i != 100) s = sum(i, s); else s = 30;
+;   printf("sum is %d\n", s);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32* %x.addr, align 4, !dbg !11
+  %1 = load i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+; CHECK: edge while.cond -> while.body probability is 5391 / 5392 = 99.9815% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 1 / 5392 = 0.018546%
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+; Without discriminator information, the profiler used to think that
+; both branches out of while.body had the same weight. In reality,
+; the edge while.body->if.then is taken most of the time.
+;
+; CHECK: edge while.body -> if.then probability is 5752 / 5753 = 99.9826% [HOT edge]
+; CHECK: edge while.body -> if.else probability is 1 / 5753 = 0.0173822%
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32* %i, align 4, !dbg !18
+  %3 = load i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"calls.cc", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @_Z3sumii, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [sum]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./calls.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5 "}
+!11 = metadata !{i32 4, i32 0, metadata !4, null}
+!12 = metadata !{i32 8, i32 0, metadata !7, null} ; [ DW_TAG_imported_declaration ]
+!13 = metadata !{i32 9, i32 0, metadata !7, null}
+!14 = metadata !{i32 9, i32 0, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !7, i32 9, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!16 = metadata !{i32 10, i32 0, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !1, metadata !7, i32 10, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!18 = metadata !{i32 10, i32 0, metadata !19, null}
+!19 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!20 = metadata !{i32 10, i32 0, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!22 = metadata !{i32 10, i32 0, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 3, i32 4} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!24 = metadata !{i32 11, i32 0, metadata !7, null}
+!25 = metadata !{i32 12, i32 0, metadata !7, null}
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
new file mode 100644
index 0000000..0f773a5
--- /dev/null
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original code
+;
+; 1   int foo(int i) {
+; 2     int x = 0;
+; 3     while (i < 100) {
+; 4       if (i < 5) x--;
+; 5       i++;
+; 6     }
+; 7     return x;
+; 8   }
+;
+; In this test, if the loop is executed 100 times, the decrement operation
+; at line 4 should only execute 5 times. This is reflected in the profile
+; data for line offset 3.  In Inputs/discriminator.prof, we have:
+;
+; 3: 100
+; 3.1: 5
+;
+; This means that the predicate 'i < 5' (line 3) is executed 100 times,
+; but the then branch (line 3.1) is only executed 5 times.
+
+define i32 @foo(i32 %i) #0 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo':
+entry:
+  %i.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %x, align 4, !dbg !10
+  br label %while.cond, !dbg !11
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32* %i.addr, align 4, !dbg !12
+  %cmp = icmp slt i32 %0, 100, !dbg !12
+  br i1 %cmp, label %while.body, label %while.end, !dbg !12
+; CHECK: edge while.cond -> while.body probability is 100 / 101 = 99.0099% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 1 / 101 = 0.990099%
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32* %i.addr, align 4, !dbg !14
+  %cmp1 = icmp slt i32 %1, 50, !dbg !14
+  br i1 %cmp1, label %if.then, label %if.end, !dbg !14
+; CHECK: edge while.body -> if.then probability is 5 / 100 = 5%
+; CHECK: edge while.body -> if.end probability is 95 / 100 = 95% [HOT edge]
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32* %x, align 4, !dbg !17
+  %dec = add nsw i32 %2, -1, !dbg !17
+  store i32 %dec, i32* %x, align 4, !dbg !17
+  br label %if.end, !dbg !17
+
+if.end:                                           ; preds = %if.then, %while.body
+  %3 = load i32* %i.addr, align 4, !dbg !19
+  %inc = add nsw i32 %3, 1, !dbg !19
+  store i32 %inc, i32* %i.addr, align 4, !dbg !19
+  br label %while.cond, !dbg !20
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32* %x, align 4, !dbg !21
+  ret i32 %4, !dbg !21
+}
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [discriminator.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"discriminator.c", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [discriminator.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5 "}
+!10 = metadata !{i32 2, i32 0, metadata !4, null}
+!11 = metadata !{i32 3, i32 0, metadata !4, null}
+!12 = metadata !{i32 3, i32 0, metadata !13, null}
+!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!14 = metadata !{i32 4, i32 0, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!17 = metadata !{i32 4, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 0, i32 1, i32 3} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!19 = metadata !{i32 5, i32 0, metadata !16, null}
+!20 = metadata !{i32 6, i32 0, metadata !16, null}
+!21 = metadata !{i32 7, i32 0, metadata !4, null}
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
new file mode 100644
index 0000000..939361b
--- /dev/null
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -0,0 +1,243 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+;
+; long foo(int x, int y, long N) {
+;   if (x < y) {
+;     return y - x;
+;   } else {
+;     for (long i = 0; i < N; i++) {
+;       if (i > N / 3)
+;         x--;
+;       if (i > N / 4) {
+;         y++;
+;         x += 3;
+;       } else {
+;         for (unsigned j = 0; j < i; j++) {
+;           x += j;
+;           y -= 3;
+;         }
+;       }
+;     }
+;   }
+;   return y * x;
+; }
+;
+; int main() {
+;   int x = 5678;
+;   int y = 1234;
+;   long N = 999999;
+;   printf("foo(%d, %d, %ld) = %ld\n", x, y, N, foo(x, y, N));
+;   return 0;
+; }
+
+; ModuleID = 'propagate.cc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 {
+entry:
+  %retval = alloca i64, align 8
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %N.addr = alloca i64, align 8
+  %i = alloca i64, align 8
+  %j = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i64 %N, i64* %N.addr, align 8
+  %0 = load i32* %x.addr, align 4, !dbg !11
+  %1 = load i32* %y.addr, align 4, !dbg !11
+  %cmp = icmp slt i32 %0, %1, !dbg !11
+  br i1 %cmp, label %if.then, label %if.else, !dbg !11
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* %y.addr, align 4, !dbg !13
+  %3 = load i32* %x.addr, align 4, !dbg !13
+  %sub = sub nsw i32 %2, %3, !dbg !13
+  %conv = sext i32 %sub to i64, !dbg !13
+  store i64 %conv, i64* %retval, !dbg !13
+  br label %return, !dbg !13
+
+if.else:                                          ; preds = %entry
+  store i64 0, i64* %i, align 8, !dbg !15
+  br label %for.cond, !dbg !15
+
+for.cond:                                         ; preds = %for.inc16, %if.else
+  %4 = load i64* %i, align 8, !dbg !15
+  %5 = load i64* %N.addr, align 8, !dbg !15
+  %cmp1 = icmp slt i64 %4, %5, !dbg !15
+  br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
+; CHECK: edge for.cond -> for.body probability is 10 / 11 = 90.9091% [HOT edge]
+; CHECK: edge for.cond -> for.end18 probability is 1 / 11 = 9.09091%
+
+for.body:                                         ; preds = %for.cond
+  %6 = load i64* %i, align 8, !dbg !18
+  %7 = load i64* %N.addr, align 8, !dbg !18
+  %div = sdiv i64 %7, 3, !dbg !18
+  %cmp2 = icmp sgt i64 %6, %div, !dbg !18
+  br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
+; CHECK: edge for.body -> if.then3 probability is 1 / 5 = 20%
+; CHECK: edge for.body -> if.end probability is 4 / 5 = 80%
+
+if.then3:                                         ; preds = %for.body
+  %8 = load i32* %x.addr, align 4, !dbg !21
+  %dec = add nsw i32 %8, -1, !dbg !21
+  store i32 %dec, i32* %x.addr, align 4, !dbg !21
+  br label %if.end, !dbg !21
+
+if.end:                                           ; preds = %if.then3, %for.body
+  %9 = load i64* %i, align 8, !dbg !22
+  %10 = load i64* %N.addr, align 8, !dbg !22
+  %div4 = sdiv i64 %10, 4, !dbg !22
+  %cmp5 = icmp sgt i64 %9, %div4, !dbg !22
+  br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
+; CHECK: edge if.end -> if.then6 probability is 3 / 6342 = 0.0473037%
+; CHECK: edge if.end -> if.else7 probability is 6339 / 6342 = 99.9527% [HOT edge]
+
+if.then6:                                         ; preds = %if.end
+  %11 = load i32* %y.addr, align 4, !dbg !24
+  %inc = add nsw i32 %11, 1, !dbg !24
+  store i32 %inc, i32* %y.addr, align 4, !dbg !24
+  %12 = load i32* %x.addr, align 4, !dbg !26
+  %add = add nsw i32 %12, 3, !dbg !26
+  store i32 %add, i32* %x.addr, align 4, !dbg !26
+  br label %if.end15, !dbg !27
+
+if.else7:                                         ; preds = %if.end
+  store i32 0, i32* %j, align 4, !dbg !28
+  br label %for.cond8, !dbg !28
+
+for.cond8:                                        ; preds = %for.inc, %if.else7
+  %13 = load i32* %j, align 4, !dbg !28
+  %conv9 = zext i32 %13 to i64, !dbg !28
+  %14 = load i64* %i, align 8, !dbg !28
+  %cmp10 = icmp slt i64 %conv9, %14, !dbg !28
+  br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
+; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16192 = 99.9938% [HOT edge]
+; CHECK: edge for.cond8 -> for.end probability is 1 / 16192 = 0.00617589%
+
+for.body11:                                       ; preds = %for.cond8
+  %15 = load i32* %j, align 4, !dbg !31
+  %16 = load i32* %x.addr, align 4, !dbg !31
+  %add12 = add i32 %16, %15, !dbg !31
+  store i32 %add12, i32* %x.addr, align 4, !dbg !31
+  %17 = load i32* %y.addr, align 4, !dbg !33
+  %sub13 = sub nsw i32 %17, 3, !dbg !33
+  store i32 %sub13, i32* %y.addr, align 4, !dbg !33
+  br label %for.inc, !dbg !34
+
+for.inc:                                          ; preds = %for.body11
+  %18 = load i32* %j, align 4, !dbg !28
+  %inc14 = add i32 %18, 1, !dbg !28
+  store i32 %inc14, i32* %j, align 4, !dbg !28
+  br label %for.cond8, !dbg !28
+
+for.end:                                          ; preds = %for.cond8
+  br label %if.end15
+
+if.end15:                                         ; preds = %for.end, %if.then6
+  br label %for.inc16, !dbg !35
+
+for.inc16:                                        ; preds = %if.end15
+  %19 = load i64* %i, align 8, !dbg !15
+  %inc17 = add nsw i64 %19, 1, !dbg !15
+  store i64 %inc17, i64* %i, align 8, !dbg !15
+  br label %for.cond, !dbg !15
+
+for.end18:                                        ; preds = %for.cond
+  br label %if.end19
+
+if.end19:                                         ; preds = %for.end18
+  %20 = load i32* %y.addr, align 4, !dbg !36
+  %21 = load i32* %x.addr, align 4, !dbg !36
+  %mul = mul nsw i32 %20, %21, !dbg !36
+  %conv20 = sext i32 %mul to i64, !dbg !36
+  store i64 %conv20, i64* %retval, !dbg !36
+  br label %return, !dbg !36
+
+return:                                           ; preds = %if.end19, %if.then
+  %22 = load i64* %retval, !dbg !37
+  ret i64 %22, !dbg !37
+}
+
+; Function Attrs: uwtable
+define i32 @main() #1 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %N = alloca i64, align 8
+  store i32 0, i32* %retval
+  store i32 5678, i32* %x, align 4, !dbg !38
+  store i32 1234, i32* %y, align 4, !dbg !39
+  store i64 999999, i64* %N, align 8, !dbg !40
+  %0 = load i32* %x, align 4, !dbg !41
+  %1 = load i32* %y, align 4, !dbg !41
+  %2 = load i64* %N, align 8, !dbg !41
+  %3 = load i32* %x, align 4, !dbg !41
+  %4 = load i32* %y, align 4, !dbg !41
+  %5 = load i64* %N, align 8, !dbg !41
+  %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !41
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !41
+  ret i32 0, !dbg !42
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [propagate.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"propagate.cc", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !7}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i64 (i32, i32, i64)* @_Z3fooiil, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [propagate.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 24, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [main]
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5 "}
+!11 = metadata !{i32 4, i32 0, metadata !12, null}
+!12 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!13 = metadata !{i32 5, i32 0, metadata !14, null}
+!14 = metadata !{i32 786443, metadata !1, metadata !12, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!15 = metadata !{i32 7, i32 0, metadata !16, null}
+!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 7, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!17 = metadata !{i32 786443, metadata !1, metadata !12, i32 6, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!18 = metadata !{i32 8, i32 0, metadata !19, null} ; [ DW_TAG_imported_declaration ]
+!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 8, i32 0, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!20 = metadata !{i32 786443, metadata !1, metadata !16, i32 7, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!21 = metadata !{i32 9, i32 0, metadata !19, null}
+!22 = metadata !{i32 10, i32 0, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !20, i32 10, i32 0, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!24 = metadata !{i32 11, i32 0, metadata !25, null}
+!25 = metadata !{i32 786443, metadata !1, metadata !23, i32 10, i32 0, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!26 = metadata !{i32 12, i32 0, metadata !25, null}
+!27 = metadata !{i32 13, i32 0, metadata !25, null}
+!28 = metadata !{i32 14, i32 0, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !1, metadata !30, i32 14, i32 0, i32 0, i32 9} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!30 = metadata !{i32 786443, metadata !1, metadata !23, i32 13, i32 0, i32 0, i32 8} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!31 = metadata !{i32 15, i32 0, metadata !32, null}
+!32 = metadata !{i32 786443, metadata !1, metadata !29, i32 14, i32 0, i32 0, i32 10} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!33 = metadata !{i32 16, i32 0, metadata !32, null}
+!34 = metadata !{i32 17, i32 0, metadata !32, null}
+!35 = metadata !{i32 19, i32 0, metadata !20, null}
+!36 = metadata !{i32 21, i32 0, metadata !4, null}
+!37 = metadata !{i32 22, i32 0, metadata !4, null}
+!38 = metadata !{i32 25, i32 0, metadata !7, null}
+!39 = metadata !{i32 26, i32 0, metadata !7, null}
+!40 = metadata !{i32 27, i32 0, metadata !7, null}
+!41 = metadata !{i32 28, i32 0, metadata !7, null}
+!42 = metadata !{i32 29, i32 0, metadata !7, null}
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
new file mode 100644
index 0000000..53c65f4
--- /dev/null
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -0,0 +1,20 @@
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null
+
+define void @empty() {
+entry:
+  ret void
+}
+; NO-DEBUG: error: No debug information found in function empty
+; MISSING-FILE: error: missing.prof:
+; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
+; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
+; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
+; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10
+; BAD-SAMPLES: error: {{.*}}bad_samples.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.3: -10
diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index 3510dfc..8ac1d25 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -1,6 +1,6 @@
 ; PR1226
 ; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep "call void @llvm.memcpy.i32"
+; RUN:   not grep "call void @llvm.memcpy.p0i8.p0i8.i32"
 ; RUN: opt < %s -scalarrepl -S | grep getelementptr
 ; END.
 
@@ -14,10 +14,10 @@ entry:
 	%L = alloca %struct.foo, align 2		; <%struct.foo*> [#uses=1]
 	%L2 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
 	%tmp13 = getelementptr %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 2, i32 1 )
+	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
 	%tmp5 = load i8* %L2		; <i8> [#uses=1]
 	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
 	ret i32 %tmp56
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
index 33e8034..dfba9e2 100644
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -9,8 +9,7 @@ define <16 x float> @foo(<16 x float> %A) nounwind {
 	store <16 x float> %A, <16 x float>* %tmp
 	%s = bitcast <16 x float>* %tmp to i8*
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memcpy.i64(i8* %s2, i8* %s, i64 64, i32 16)
-	
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
 	%R = load <16 x float>* %tmp2
 	ret <16 x float> %R
 }
@@ -19,12 +18,11 @@ define <16 x float> @foo2(<16 x float> %A) nounwind {
 	%tmp2 = alloca <16 x float>, align 16
 
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memset.i64(i8* %s2, i8 0, i64 64, i32 16)
+	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
 	
 	%R = load <16 x float>* %tmp2
 	ret <16 x float> %R
 }
 
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/Scalarizer/basic.ll b/test/Transforms/Scalarizer/basic.ll
new file mode 100644
index 0000000..1cfc0dd
--- /dev/null
+++ b/test/Transforms/Scalarizer/basic.ll
@@ -0,0 +1,451 @@
+; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare <4 x float> @ext(<4 x float>)
+@g = global <4 x float> zeroinitializer
+
+define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
+; CHECK-LABEL: @f1(
+; CHECK: entry:
+; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
+; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
+; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
+; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
+; CHECK:   br label %loop
+; CHECK: loop:
+; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
+; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
+; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
+; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
+; CHECK:   %nexti = sub i32 %i, 1
+; CHECK:   %ptr = getelementptr <4 x float>* %base, i32 %i
+; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
+; CHECK:   %val.i0 = load float* %ptr.i0, align 16
+; CHECK:   %ptr.i1 = getelementptr float* %ptr.i0, i32 1
+; CHECK:   %val.i1 = load float* %ptr.i1, align 4
+; CHECK:   %ptr.i2 = getelementptr float* %ptr.i0, i32 2
+; CHECK:   %val.i2 = load float* %ptr.i2, align 8
+; CHECK:   %ptr.i3 = getelementptr float* %ptr.i0, i32 3
+; CHECK:   %val.i3 = load float* %ptr.i3, align 4
+; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
+; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
+; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
+; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
+; CHECK:   %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
+; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
+; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
+; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
+; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
+; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
+; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
+; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
+; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
+; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
+; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
+; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
+; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
+; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
+; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
+; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
+; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
+; CHECK:   store float %sel.i0, float* %ptr.i0
+; CHECK:   store float %sel.i1, float* %ptr.i1
+; CHECK:   store float %sel.i2, float* %ptr.i2
+; CHECK:   store float %sel.i3, float* %ptr.i3
+; CHECK:   %test = icmp eq i32 %nexti, 0
+; CHECK:   br i1 %test, label %loop, label %exit
+; CHECK: exit:
+; CHECK:   ret void
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x float> *%base, i32 %i
+  %val = load <4 x float> *%ptr
+  %dval = bitcast <4 x float> %val to <2 x double>
+  %dacc = bitcast <4 x float> %acc to <2 x double>
+  %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
+                            <2 x i32> <i32 0, i32 2>
+  %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
+                            <2 x i32> <i32 1, i32 3>
+  %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
+  %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
+  %add = fadd <4 x float> %f1, %f2
+  %call = call <4 x float> @ext(<4 x float> %add)
+  %cmp = fcmp ogt <4 x float> %call,
+                  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %sel = select <4 x i1> %cmp, <4 x float> %call,
+                <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  store <4 x float> %sel, <4 x float> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
+; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
+; CHECK: entry:
+; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
+; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
+; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
+; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
+; CHECK:   br label %loop
+; CHECK: loop:
+; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
+; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
+; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
+; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
+; CHECK:   %nexti = sub i32 %i, 1
+; CHECK:   %ptr = getelementptr <4 x i8>* %base, i32 %i
+; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
+; CHECK:   %val.i0 = load i8* %ptr.i0, align 4
+; CHECK:   %ptr.i1 = getelementptr i8* %ptr.i0, i32 1
+; CHECK:   %val.i1 = load i8* %ptr.i1, align 1
+; CHECK:   %ptr.i2 = getelementptr i8* %ptr.i0, i32 2
+; CHECK:   %val.i2 = load i8* %ptr.i2, align 2
+; CHECK:   %ptr.i3 = getelementptr i8* %ptr.i0, i32 3
+; CHECK:   %val.i3 = load i8* %ptr.i3, align 1
+; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
+; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
+; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
+; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
+; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
+; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
+; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
+; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
+; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
+; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
+; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
+; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
+; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
+; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
+; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
+; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
+; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
+; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
+; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
+; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
+; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
+; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
+; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
+; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
+; CHECK:   %test = icmp eq i32 %nexti, 0
+; CHECK:   br i1 %test, label %loop, label %exit
+; CHECK: exit:
+; CHECK:   ret void
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x i8> *%base, i32 %i
+  %val = load <4 x i8> *%ptr
+  %ext = sext <4 x i8> %val to <4 x i32>
+  %add = add <4 x i32> %ext, %acc
+  %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
+  %single = insertelement <4 x i32> undef, i32 %i, i32 0
+  %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
+                         <4 x i32> zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
+  %trunc = trunc <4 x i32> %sel to <4 x i8>
+  store <4 x i8> %trunc, <4 x i8> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Check that !tbaa information is preserved.
+define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f3(
+; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa ![[TAG]]
+; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa ![[TAG]]
+; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
+; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
+; CHECK: ret void
+  %val = load <4 x i32> *%src, !tbaa !1
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
+  ret void
+}
+
+; Check that !tbaa.struct information is preserved.
+define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f4(
+; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
+; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
+; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
+; CHECK: ret void
+  %val = load <4 x i32> *%src, !tbaa.struct !5
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
+  ret void
+}
+
+; Check that llvm.mem.parallel_loop_access information is preserved.
+define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f5(
+; CHECK: %val.i0 = load i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: %val.i2 = load i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: %val.i3 = load i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: ret void
+entry:
+  br label %loop
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
+  %this_src = getelementptr <4 x i32> *%src, i32 %index
+  %this_dst = getelementptr <4 x i32> *%dst, i32 %index
+  %val = load <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
+  %next_index = add i32 %index, -1
+  %continue = icmp ne i32 %next_index, %count
+  br i1 %continue, label %loop, label %end, !llvm.loop !3
+
+end:
+  ret void
+}
+
+; Check that fpmath information is preserved.
+define <4 x float> @f6(<4 x float> %x) {
+; CHECK-LABEL: @f6(
+; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
+; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
+; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
+; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
+; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
+; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
+; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
+; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
+; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
+; CHECK: ret <4 x float> %res
+  %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
+    !fpmath !4
+  ret <4 x float> %res
+}
+
+; Check that random metadata isn't kept.
+define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f7(
+; CHECK-NOT: !foo
+; CHECK: ret void
+  %val = load <4 x i32> *%src, !foo !5
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !foo !5
+  ret void
+}
+
+; Test GEP with vectors.
+define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
+                float *%other) {
+; CHECK-LABEL: @f8(
+; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
+; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3
+; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
+; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
+; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
+; CHECK: %val.i0 = getelementptr float* %ptr0.i0, i32 100
+; CHECK: %val.i1 = getelementptr float* %other, i32 %i0.i1
+; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
+; CHECK: %val.i2 = getelementptr float* %ptr0.i2, i32 100
+; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
+; CHECK: %val.i3 = getelementptr float* %ptr0.i3, i32 %i0.i3
+; CHECK: store float* %val.i0, float** %dest.i0, align 32
+; CHECK: store float* %val.i1, float** %dest.i1, align 8
+; CHECK: store float* %val.i2, float** %dest.i2, align 16
+; CHECK: store float* %val.i3, float** %dest.i3, align 8
+; CHECK: ret void
+  %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
+  %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
+  %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
+  %val = getelementptr <4 x float *> %ptr1, <4 x i32> %i2
+  store <4 x float *> %val, <4 x float *> *%dest
+  ret void
+}
+
+; Test the handling of unaligned loads.
+define void @f9(<4 x float> *%dest, <4 x float> *%src) {
+; CHECK: @f9(
+; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
+; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
+; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
+; CHECK: %val.i0 = load float* %src.i0, align 4
+; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
+; CHECK: %val.i1 = load float* %src.i1, align 4
+; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
+; CHECK: %val.i2 = load float* %src.i2, align 4
+; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
+; CHECK: %val.i3 = load float* %src.i3, align 4
+; CHECK: store float %val.i0, float* %dest.i0, align 8
+; CHECK: store float %val.i1, float* %dest.i1, align 4
+; CHECK: store float %val.i2, float* %dest.i2, align 8
+; CHECK: store float %val.i3, float* %dest.i3, align 4
+; CHECK: ret void
+  %val = load <4 x float> *%src, align 4
+  store <4 x float> %val, <4 x float> *%dest, align 8
+  ret void
+}
+
+; ...and again with subelement alignment.
+define void @f10(<4 x float> *%dest, <4 x float> *%src) {
+; CHECK: @f10(
+; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
+; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
+; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
+; CHECK: %val.i0 = load float* %src.i0, align 1
+; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
+; CHECK: %val.i1 = load float* %src.i1, align 1
+; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
+; CHECK: %val.i2 = load float* %src.i2, align 1
+; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
+; CHECK: %val.i3 = load float* %src.i3, align 1
+; CHECK: store float %val.i0, float* %dest.i0, align 2
+; CHECK: store float %val.i1, float* %dest.i1, align 2
+; CHECK: store float %val.i2, float* %dest.i2, align 2
+; CHECK: store float %val.i3, float* %dest.i3, align 2
+; CHECK: ret void
+  %val = load <4 x float> *%src, align 1
+  store <4 x float> %val, <4 x float> *%dest, align 2
+  ret void
+}
+
+; Test that sub-byte loads aren't scalarized.
+define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
+; CHECK: @f11(
+; CHECK: %val0 = load <32 x i1>* %src0
+; CHECK: %val1 = load <32 x i1>* %src1
+; CHECK: store <32 x i1> %and, <32 x i1>* %dest
+; CHECK: ret void
+  %src1 = getelementptr <32 x i1> *%src0, i32 1
+  %val0 = load <32 x i1> *%src0
+  %val1 = load <32 x i1> *%src1
+  %and = and <32 x i1> %val0, %val1
+  store <32 x i1> %and, <32 x i1> *%dest
+  ret void
+}
+
+; Test that variable inserts aren't scalarized.
+define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
+; CHECK: @f12(
+; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
+; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
+; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
+; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
+; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
+; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
+; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
+; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
+; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
+; CHECK: ret void
+  %val0 = load <4 x i32> *%src
+  %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
+  %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
+  store <4 x i32> %val2, <4 x i32> *%dest
+  ret void
+}
+
+; Test vector GEPs with more than one index.
+define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
+                 float *%other) {
+; CHECK-LABEL: @f13(
+; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
+; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3
+; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
+; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
+; CHECK: %val.i0 = getelementptr inbounds [4 x float]* %ptr.i0, i32 0, i32 %i.i0
+; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
+; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
+; CHECK: %val.i1 = getelementptr inbounds [4 x float]* %ptr.i1, i32 1, i32 %i.i1
+; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
+; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
+; CHECK: %val.i2 = getelementptr inbounds [4 x float]* %ptr.i2, i32 2, i32 %i.i2
+; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
+; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
+; CHECK: %val.i3 = getelementptr inbounds [4 x float]* %ptr.i3, i32 3, i32 %i.i3
+; CHECK: store float* %val.i0, float** %dest.i0, align 32
+; CHECK: store float* %val.i1, float** %dest.i1, align 8
+; CHECK: store float* %val.i2, float** %dest.i2, align 16
+; CHECK: store float* %val.i3, float** %dest.i3, align 8
+; CHECK: ret void
+  %val = getelementptr inbounds <4 x [4 x float] *> %ptr,
+                                <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
+                                <4 x i32> %i
+  store <4 x float *> %val, <4 x float *> *%dest
+  ret void
+}
+
+; Test combinations of vector and non-vector PHIs.
+define <4 x float> @f14(<4 x float> %acc, i32 %count) {
+; CHECK-LABEL: @f14(
+; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
+; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
+; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
+; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
+; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
+; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
+; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
+; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
+; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
+; CHECK: ret <4 x float> %next_acc
+entry:
+  br label %loop
+
+loop:
+  %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
+  %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
+  %foo = call <4 x float> @ext(<4 x float> %this_acc)
+  %next_acc = fadd <4 x float> %this_acc, %foo
+  %next_count = sub i32 %this_count, 1
+  %cmp = icmp eq i32 %next_count, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret <4 x float> %next_acc
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !"set1", metadata !0 }
+!2 = metadata !{ metadata !"set2", metadata !0 }
+!3 = metadata !{ metadata !3 }
+!4 = metadata !{ float 4.0 }
+!5 = metadata !{ i64 0, i64 8, null }
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
new file mode 100644
index 0000000..546e89d
--- /dev/null
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -0,0 +1,86 @@
+; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 {
+; CHECK: @f1(
+; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
+; CHECK: %a.i1 = getelementptr i32* %a.i0, i32 1
+; CHECK: %a.i2 = getelementptr i32* %a.i0, i32 2
+; CHECK: %a.i3 = getelementptr i32* %a.i0, i32 3
+; CHECK: %c.i0 = bitcast <4 x i32>* %c to i32*
+; CHECK: %c.i1 = getelementptr i32* %c.i0, i32 1
+; CHECK: %c.i2 = getelementptr i32* %c.i0, i32 2
+; CHECK: %c.i3 = getelementptr i32* %c.i0, i32 3
+; CHECK: %b.i0 = bitcast <4 x i32>* %b to i32*
+; CHECK: %b.i1 = getelementptr i32* %b.i0, i32 1
+; CHECK: %b.i2 = getelementptr i32* %b.i0, i32 2
+; CHECK: %b.i3 = getelementptr i32* %b.i0, i32 3
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
+; CHECK: %bval.i0 = load i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
+; CHECK: %bval.i1 = load i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i2 = load i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i3 = load i32* %b.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i0 = load i32* %c.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i1 = load i32* %c.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i2 = load i32* %c.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i3 = load i32* %c.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %add.i0 = add i32 %bval.i0, %cval.i0, !dbg ![[TAG1]]
+; CHECK: %add.i1 = add i32 %bval.i1, %cval.i1, !dbg ![[TAG1]]
+; CHECK: %add.i2 = add i32 %bval.i2, %cval.i2, !dbg ![[TAG1]]
+; CHECK: %add.i3 = add i32 %bval.i3, %cval.i3, !dbg ![[TAG1]]
+; CHECK: store i32 %add.i0, i32* %a.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i1, i32* %a.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i2, i32* %a.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i3, i32* %a.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: ret void
+entry:
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !16), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !17), !dbg !20
+  %bval = load <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
+  %cval = load <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
+  %add = add <4 x i32> %bval, %cval, !dbg !21
+  store <4 x i32> %add, <4 x i32>* %a, align 16, !dbg !21, !tbaa !22
+  ret void, !dbg !25
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !26}
+!llvm.ident = !{!19}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"/tmp/add.c", metadata !"/home/richards/llvm/build"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, metadata !14, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
+!9 = metadata !{i32 786454, metadata !1, null, metadata !"V4SI", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!14 = metadata !{metadata !15, metadata !16, metadata !17}
+!15 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554435, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 3]
+!17 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331651, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 3]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!19 = metadata !{metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
+!20 = metadata !{i32 3, i32 0, metadata !4, null}
+!21 = metadata !{i32 5, i32 0, metadata !4, null}
+!22 = metadata !{metadata !23, metadata !23, i64 0}
+!23 = metadata !{metadata !"omnipotent char", metadata !24, i64 0}
+!24 = metadata !{metadata !"Simple C/C++ TBAA"}
+!25 = metadata !{i32 6, i32 0, metadata !4, null}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Scalarizer/no-data-layout.ll b/test/Transforms/Scalarizer/no-data-layout.ll
new file mode 100644
index 0000000..3eaf669
--- /dev/null
+++ b/test/Transforms/Scalarizer/no-data-layout.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
+
+; Test the handling of loads and stores when no data layout is available.
+define void @f1(<4 x float> *%dest, <4 x float> *%src) {
+; CHECK: @f1(
+; CHECK: %val = load <4 x float>* %src, align 4
+; CHECK: %val.i0 = extractelement <4 x float> %val, i32 0
+; CHECK: %add.i0 = fadd float %val.i0, %val.i0
+; CHECK: %val.i1 = extractelement <4 x float> %val, i32 1
+; CHECK: %add.i1 = fadd float %val.i1, %val.i1
+; CHECK: %val.i2 = extractelement <4 x float> %val, i32 2
+; CHECK: %add.i2 = fadd float %val.i2, %val.i2
+; CHECK: %val.i3 = extractelement <4 x float> %val, i32 3
+; CHECK: %add.i3 = fadd float %val.i3, %val.i3
+; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
+; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
+; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
+; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
+; CHECK: store <4 x float> %add, <4 x float>* %dest, align 8
+; CHECK: ret void
+  %val = load <4 x float> *%src, align 4
+  %add = fadd <4 x float> %val, %val
+  store <4 x float> %add, <4 x float> *%dest, align 8
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 3687327..81079b1 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -806,3 +806,115 @@ return:
 ; CHECK-NOT: @switch.table
 ; CHECK: switch i32 %c
 }
+
+; If we can build a lookup table without any holes, we don't need a default result.
+declare void @exit(i32)
+define i32 @nodefaultnoholes(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: call void @exit(i32 1)
+            unreachable
+return:
+  %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i32 %x
+
+; CHECK-LABEL: @nodefaultnoholes(
+; CHECK: @switch.table
+; CHECK-NOT: switch i32
+}
+
+; This lookup table will have holes, so we need to test for the holes.
+define i32 @nodefaultwithholes(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 5, label %sw.bb3
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: call void @exit(i32 1)
+            unreachable
+return:
+  %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i32 %x
+
+; CHECK-LABEL: @nodefaultwithholes(
+; CHECK: entry:
+; CHECK: br i1 %{{.*}}, label %switch.hole_check, label %sw.default
+; CHECK: switch.hole_check:
+; CHECK-NEXT: %switch.maskindex = trunc i32 %switch.tableidx to i6
+; CHECK-NEXT: %switch.shifted = lshr i6 -17, %switch.maskindex
+; The mask is binary 101111.
+; CHECK-NEXT: %switch.lobit = trunc i6 %switch.shifted to i1
+; CHECK-NEXT: br i1 %switch.lobit, label %switch.lookup, label %sw.default
+; CHECK-NOT: switch i32
+}
+
+; We don't build lookup tables with holes for switches with less than four cases.
+define i32 @threecasesholes(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 3, label %sw.bb2
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.default: br label %return
+return:
+  %x = phi i32 [ %c, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
+  ret i32 %x
+; CHECK-LABEL: @threecasesholes(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
+
+; We build lookup tables for switches with three or more cases.
+define i32 @threecases(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.default: br label %return
+return:
+  %x = phi i32 [ 3, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
+  ret i32 %x
+; CHECK-LABEL: @threecases(
+; CHECK-NOT: switch i32
+; CHECK: @switch.table
+}
+
+; We don't build tables for switches with two cases.
+define i32 @twocases(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+  ]
+sw.bb1: br label %return
+sw.default: br label %return
+return:
+  %x = phi i32 [ 3, %sw.default ], [ 7, %sw.bb1 ], [ 9, %entry ]
+  ret i32 %x
+; CHECK-LABEL: @twocases(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 9c4edd6..d6958a9 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -41,3 +41,33 @@ return:                                           ; preds = %entry
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT: ret void
 }
+
+
+; PR14893
+define i8 @test6f() {
+; CHECK-LABEL: @test6f
+; CHECK: alloca i8, align 1
+; CHECK-NEXT: call i8 @test6g
+; CHECK-NEXT: icmp eq i8 %tmp, 0
+; CHECK-NEXT: load i8* %r, align 1{{$}}
+
+bb0:
+  %r = alloca i8, align 1
+  %tmp = call i8 @test6g(i8* %r)
+  %tmp1 = icmp eq i8 %tmp, 0
+  br i1 %tmp1, label %bb2, label %bb1
+bb1:
+  %tmp3 = load i8* %r, align 1, !range !2, !tbaa !1
+  %tmp4 = icmp eq i8 %tmp3, 1
+  br i1 %tmp4, label %bb2, label %bb3
+bb2:
+  br label %bb3
+bb3:
+  %tmp6 = phi i8 [ 0, %bb2 ], [ 1, %bb1 ]
+  ret i8 %tmp6
+}
+declare i8 @test6g(i8*)
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"foo"}
+!2 = metadata !{i8 0, i8 2}
diff --git a/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll b/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll
new file mode 100644
index 0000000..b388cc5
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll
@@ -0,0 +1,40 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; Make sure we don't speculate loads under ThreadSanitizer.
+@g = global i32 0, align 4
+
+define i32 @TestNoTsan(i32 %cond) nounwind readonly uwtable {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* @g, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %retval
+; CHECK-LABEL: @TestNoTsan
+; CHECK: %[[LOAD:[^ ]*]] = load
+; CHECK: select{{.*}}[[LOAD]]
+; CHECK: ret i32
+}
+
+define i32 @TestTsan(i32 %cond) nounwind readonly uwtable sanitize_thread {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* @g, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %retval
+; CHECK-LABEL: @TestTsan
+; CHECK: br i1
+; CHECK: load i32* @g
+; CHECK: br label
+; CHECK: ret i32
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index 4022ed6..bdd25ba 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -87,7 +87,7 @@ entry:
     i32 2, label %sw.bb
     i32 3, label %sw.bb1
   ], !prof !3
-; CHECK: test5
+; CHECK-LABEL: @test5(
 ; CHECK: switch i32 %N, label %sw2 [
 ; CHECK: i32 3, label %sw.bb1
 ; CHECK: i32 2, label %sw.bb
@@ -119,7 +119,7 @@ entry:
     i32 2, label %sw.bb
     i32 3, label %sw.bb1
   ], !prof !4
-; CHECK: test6
+; CHECK-LABEL: @test6(
 ; CHECK: switch i32 %N, label %sw.epilog
 ; CHECK: i32 3, label %sw.bb1
 ; CHECK: i32 2, label %sw.bb
@@ -266,7 +266,7 @@ lor.end:
  call void @helper(i32 0) nounwind
  ret void
 
-; CHECK: test10
+; CHECK-LABEL: @test10(
 ; CHECK: %x.off = add i32 %x, -1
 ; CHECK: %switch = icmp ult i32 %x.off, 3
 ; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !8
@@ -279,6 +279,7 @@ define void @test11(i32 %x) nounwind {
     i32 21, label %b
     i32 24, label %c
   ], !prof !8
+; CHECK-LABEL: @test11(
 ; CHECK: %cond = icmp eq i32 %i, 24
 ; CHECK: br i1 %cond, label %c, label %a, !prof !9
 
@@ -293,6 +294,76 @@ c:
  ret void
 }
 
+;; test12 - Don't crash if the whole switch is removed
+define void @test12(i32 %M, i32 %N) nounwind uwtable {
+entry:
+  switch i32 %N, label %sw.bb [
+    i32 1, label %sw.bb
+  ], !prof !9
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @helper
+; CHECK-NEXT: ret void
+
+sw.bb:
+  call void @helper(i32 0)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+}
+
+;; If every case is dead, make sure they are all removed. This used to
+;; crash trying to merge the metadata.
+define void @test13(i32 %x) nounwind {
+entry:
+  %i = shl i32 %x, 1
+  switch i32 %i, label %a [
+    i32 21, label %b
+    i32 25, label %c
+  ], !prof !8
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @helper
+; CHECK-NEXT: ret void
+
+a:
+ call void @helper(i32 0) nounwind
+ ret void
+b:
+ call void @helper(i32 1) nounwind
+ ret void
+c:
+ call void @helper(i32 2) nounwind
+ ret void
+}
+
+;; When folding branches to common destination, the updated branch weights
+;; can exceed uint32 by more than factor of 2. We should keep halving the
+;; weights until they can fit into uint32.
+@max_regno = common global i32 0, align 4
+define void @test14(i32* %old, i32 %final) {
+; CHECK-LABEL: @test14
+; CHECK: br i1 %or.cond, label %for.exit, label %for.inc, !prof !10
+for.cond:
+  br label %for.cond2
+for.cond2:
+  %i.1 = phi i32 [ %inc19, %for.inc ], [ 0, %for.cond ]
+  %bit.0 = phi i32 [ %shl, %for.inc ], [ 1, %for.cond ]
+  %tobool = icmp eq i32 %bit.0, 0
+  br i1 %tobool, label %for.exit, label %for.body3, !prof !10
+for.body3:
+  %v3 = load i32* @max_regno, align 4
+  %cmp4 = icmp eq i32 %i.1, %v3
+  br i1 %cmp4, label %for.exit, label %for.inc, !prof !11
+for.inc:
+  %shl = shl i32 %bit.0, 1
+  %inc19 = add nsw i32 %i.1, 1
+  br label %for.cond2
+for.exit:
+  ret void
+}
+
 !0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
 !1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
 !2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
@@ -302,6 +373,9 @@ c:
 !6 = metadata !{metadata !"branch_weights", i32 1, i32 3}
 !7 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8, i32 7}
 !8 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8}
+!9 = metadata !{metadata !"branch_weights", i32 7, i32 6}
+!10 = metadata !{metadata !"branch_weights", i32 672646, i32 21604207}
+!11 = metadata !{metadata !"branch_weights", i32 6960, i32 21597248}
 
 ; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
 ; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
@@ -313,4 +387,6 @@ c:
 ; CHECK: !7 = metadata !{metadata !"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
 ; CHECK: !8 = metadata !{metadata !"branch_weights", i32 24, i32 33}
 ; CHECK: !9 = metadata !{metadata !"branch_weights", i32 8, i32 33}
-; CHECK-NOT: !9
+;; The false weight prints out as a negative integer here, but inside llvm, we
+;; treat the weight as an unsigned integer.
+; CHECK: !10 = metadata !{metadata !"branch_weights", i32 112017436, i32 -735157296}
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
new file mode 100644
index 0000000..fa7976d
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+
+declare float @llvm.sqrt.f32(float) nounwind readonly
+declare float @llvm.fma.f32(float, float, float) nounwind readonly
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
+
+; CHECK-LABEL: @sqrt_test(
+; CHECK: select
+define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_sqrt.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.sqrt.f32(float %a) nounwind readnone
+  br label %test_sqrt.exit
+
+test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+
+; CHECK-LABEL: @fma_test(
+; CHECK: select
+define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fma.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fma.exit
+
+test_fma.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: @fmuladd_test(
+; CHECK: select
+define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fmuladd.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fmuladd.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fmuladd.exit
+
+test_fmuladd.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index e9d93e8..5ae62af 100644
--- a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -65,7 +65,7 @@ define void @test5(i1 %C, i32* %P) {
 entry:
   br i1 %C, label %T, label %F
 T:
-  cmpxchg volatile i32* %P, i32 0, i32 1 seq_cst
+  cmpxchg volatile i32* %P, i32 0, i32 1 seq_cst seq_cst
   unreachable
 F:
   ret void
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 85ab376..4aac6d6 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -62,3 +62,82 @@ X:                                     ; preds = %5, %3
   ret i32 %R
 }
 
+; We shouldn't sink constant sized allocas from the entry block, since CodeGen
+; interprets allocas outside the entry block as dynamically sized stack objects.
+
+; CHECK-LABEL: @alloca_nosink
+; CHECK: entry:
+; CHECK-NEXT: alloca
+define i32 @alloca_nosink(i32 %a, i32 %b) {
+entry:
+  %0 = alloca i32
+  %1 = icmp ne i32 %a, 0
+  br i1 %1, label %if, label %endif
+
+if:
+  %2 = getelementptr i32* %0, i32 1
+  store i32 0, i32* %0
+  store i32 1, i32* %2
+  %3 = getelementptr i32* %0, i32 %b
+  %4 = load i32* %3
+  ret i32 %4
+
+endif:
+  ret i32 0
+}
+
+; Make sure we sink dynamic sized allocas
+
+; CHECK-LABEL: @alloca_sink_dynamic
+; CHECK: entry:
+; CHECK-NOT: alloca
+; CHECK: if:
+; CHECK-NEXT: alloca
+define i32 @alloca_sink_dynamic(i32 %a, i32 %b, i32 %size) {
+entry:
+  %0 = alloca i32, i32 %size
+  %1 = icmp ne i32 %a, 0
+  br i1 %1, label %if, label %endif
+
+if:
+  %2 = getelementptr i32* %0, i32 1
+  store i32 0, i32* %0
+  store i32 1, i32* %2
+  %3 = getelementptr i32* %0, i32 %b
+  %4 = load i32* %3
+  ret i32 %4
+
+endif:
+  ret i32 0
+}
+
+; We also want to sink allocas that are not in the entry block.  These
+; will already be considered as dynamically sized stack objects, so sinking
+; them does no further damage.
+
+; CHECK-LABEL: @alloca_sink_nonentry
+; CHECK: if0:
+; CHECK-NOT: alloca
+; CHECK: if:
+; CHECK-NEXT: alloca
+define i32 @alloca_sink_nonentry(i32 %a, i32 %b, i32 %c) {
+entry:
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %endif, label %if0
+
+if0:
+  %0 = alloca i32
+  %1 = icmp ne i32 %a, 0
+  br i1 %1, label %if, label %endif
+
+if:
+  %2 = getelementptr i32* %0, i32 1
+  store i32 0, i32* %0
+  store i32 1, i32* %2
+  %3 = getelementptr i32* %0, i32 %b
+  %4 = load i32* %3
+  ret i32 %4
+
+endif:
+  ret i32 0
+}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 2878468..b55ac3c 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -9,7 +9,7 @@ entry:
 
 !0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 2d687ae..8ce7b87 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -30,7 +30,7 @@ attributes #2 = { nounwind readonly ssp }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!25}
 
-!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
 !1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
 !2 = metadata !{null}
 !3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
author	Stephen Hines <srhines@google.com>	2014-04-23 16:57:46 -0700
committer	Stephen Hines <srhines@google.com>	2014-04-24 15:53:16 -0700
commit	36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree	e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /test/Transforms
parent	69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download	external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2