Update aosp/master LLVM for rebase to r222494.

Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
author: Stephen Hines <srhines@google.com> 2014-12-01 14:51:49 -0800
committer: Stephen Hines <srhines@google.com> 2014-12-02 16:08:10 -0800
commit: 37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree: 8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /test/Transforms
parent: d2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
download: external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
385 files changed, 13422 insertions, 1549 deletions
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index b12cbee..6c1e532 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -40,20 +40,20 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.5 \000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"basic.c", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [basic.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", metadata !1, metadata !5, metadata !6, null, void (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [basic.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5 "}
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [basic.c]
+!11 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [basic.c]
 !12 = metadata !{i32 4, i32 0, metadata !4, null}
 
 ; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./basic.c]
+; CHECK: !13 = metadata !{metadata !"0xb\001", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] [./basic.c]
 ; CHECK: !14 = metadata !{i32 4, i32 0, metadata !4, null}
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index f3b0357..e15a80a 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -50,28 +50,28 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.5 (trunk 199750) (llvm/trunk 199751)\000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"first-only.c", metadata !"."}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [first-only.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", metadata !1, metadata !5, metadata !6, null, void (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [first-only.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
 
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [first-only.c]
-; CHECK: !11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0}
+!11 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !11 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4}
 
 !12 = metadata !{i32 3, i32 0, metadata !13, null}
 
-!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [first-only.c]
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !14, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./first-only.c]
+!13 = metadata !{metadata !"0xb\003\000\001", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !13 = metadata !{metadata !"0xb\001", metadata !1, metadata !14} ; [ DW_TAG_lexical_block ] [./first-only.c]
 
 !14 = metadata !{i32 4, i32 0, metadata !13, null}
-; CHECK: !14 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1}
+; CHECK: !14 = metadata !{metadata !"0xb\003\000\001", metadata !1, metadata !11}
 
 !15 = metadata !{i32 5, i32 0, metadata !13, null}
 ; CHECK: !15 = metadata !{i32 4, i32 0, metadata !14, null}
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 0241a0c..8418c9e 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -51,21 +51,21 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.5 (trunk 199750) (llvm/trunk 199751)\000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"multiple.c", metadata !"."}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [multiple.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", metadata !1, metadata !5, metadata !6, null, void (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [multiple.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [multiple.c]
+!11 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [multiple.c]
 !12 = metadata !{i32 4, i32 0, metadata !4, null}
 
 ; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./multiple.c]
+; CHECK: !13 = metadata !{metadata !"0xb\001", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] [./multiple.c]
 ; CHECK: !14 = metadata !{i32 3, i32 0, metadata !15, null}
-; CHECK: !15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 2, i32 1} ; [ DW_TAG_lexical_block ] [./multiple.c]
+; CHECK: !15 = metadata !{metadata !"0xb\002", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] [./multiple.c]
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index f7b45e29..66a2c4e 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -17,7 +17,7 @@ entry:
   %retval = alloca i32, align 4
   %i.addr = alloca i64, align 8
   store i64 %i, i64* %i.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %i.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata !{i64* %i.addr}, metadata !13, metadata !{}), !dbg !14
   %0 = load i64* %i.addr, align 8, !dbg !15
 ; CHECK:  %0 = load i64* %i.addr, align 8, !dbg !15
   %cmp = icmp slt i64 %0, 5, !dbg !15
@@ -39,7 +39,7 @@ return:                                           ; preds = %if.else, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -48,24 +48,24 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [./no-discriminators] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./no-discriminators] [DW_LANG_C99]
 !1 = metadata !{metadata !"no-discriminators", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i64)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./no-discriminators]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", metadata !1, metadata !5, metadata !6, null, i32 (i64)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./no-discriminators]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
 ; CHECK: !10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !12 = metadata !{metadata !"clang version 3.5.0 "}
-!13 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!13 = metadata !{metadata !"0x101\00i\0016777217\000", metadata !4, metadata !5, metadata !9} ; [ DW_TAG_arg_variable ] [i] [line 1]
 !14 = metadata !{i32 1, i32 0, metadata !4, null}
 !15 = metadata !{i32 2, i32 0, metadata !16, null}
 ; CHECK: !15 = metadata !{i32 2, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
-; CHECK: !16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+!16 = metadata !{metadata !"0xb\002\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+; CHECK: !16 = metadata !{metadata !"0xb\002\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./no-discriminators]
 !17 = metadata !{i32 3, i32 0, metadata !4, null}
diff --git a/test/Transforms/AlignmentFromAssumptions/simple.ll b/test/Transforms/AlignmentFromAssumptions/simple.ll
new file mode 100644
index 0000000..884c8ba
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -0,0 +1,215 @@
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: ret i32
+}
+
+define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 -1
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2a
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @goo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @hoo
+; CHECK: load i32* %arrayidx, align 32
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @joo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo2
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @moo(i32* nocapture %a) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo2
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+declare void @llvm.assume(i1) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/AlignmentFromAssumptions/simple32.ll b/test/Transforms/AlignmentFromAssumptions/simple32.ll
new file mode 100644
index 0000000..166e7ef
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -0,0 +1,215 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: ret i32
+}
+
+define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 -1
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2a
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @goo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @hoo
+; CHECK: load i32* %arrayidx, align 32
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @joo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo2
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @moo(i32* nocapture %a) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo2
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+declare void @llvm.assume(i1) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/AlignmentFromAssumptions/start-unk.ll b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
new file mode 100644
index 0000000..b7fe249
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
@@ -0,0 +1,154 @@
+; RUN: opt -alignment-from-assumptions -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%type1 = type { %type2 }
+%type2 = type { [4 x i8] }
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #1
+
+; Function Attrs: nounwind uwtable
+define void @test1() unnamed_addr #2 align 2 {
+
+; CHECK-LABEL: @test1
+
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %return, label %if.end8
+
+if.end8:                                          ; preds = %if.end
+  br i1 undef, label %if.then13, label %if.end14
+
+if.then13:                                        ; preds = %if.end8
+  unreachable
+
+if.end14:                                         ; preds = %if.end8
+  br i1 undef, label %cond.false.i129, label %cond.end.i136
+
+cond.false.i129:                                  ; preds = %if.end14
+  unreachable
+
+cond.end.i136:                                    ; preds = %if.end14
+  br i1 undef, label %land.lhs.true.i, label %if.end.i145
+
+land.lhs.true.i:                                  ; preds = %cond.end.i136
+  br i1 undef, label %if.end.i145, label %if.then.i137
+
+if.then.i137:                                     ; preds = %land.lhs.true.i
+  br i1 undef, label %cond.false8.i, label %cond.end9.i
+
+cond.false8.i:                                    ; preds = %if.then.i137
+  unreachable
+
+cond.end9.i:                                      ; preds = %if.then.i137
+  br i1 undef, label %if.then23, label %if.end24
+
+if.end.i145:                                      ; preds = %land.lhs.true.i, %cond.end.i136
+  unreachable
+
+if.then23:                                        ; preds = %cond.end9.i
+  unreachable
+
+if.end24:                                         ; preds = %cond.end9.i
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.end24
+  unreachable
+
+for.end:                                          ; preds = %if.end24
+  br i1 undef, label %if.end123, label %if.then121
+
+if.then121:                                       ; preds = %for.end
+  unreachable
+
+if.end123:                                        ; preds = %for.end
+  br i1 undef, label %if.end150, label %if.then126
+
+if.then126:                                       ; preds = %if.end123
+  %ptrint.i.i185 = ptrtoint %type1* undef to i64
+  %maskedptr.i.i186 = and i64 %ptrint.i.i185, 1
+  %maskcond.i.i187 = icmp eq i64 %maskedptr.i.i186, 0
+  tail call void @llvm.assume(i1 %maskcond.i.i187) #0
+  %ret.0..sroa_cast.i.i188 = bitcast %type1* undef to i32*
+  %ret.0.copyload.i.i189 = load i32* %ret.0..sroa_cast.i.i188, align 2
+
+; CHECK: load {{.*}} align 2
+
+  %0 = tail call i32 @llvm.bswap.i32(i32 %ret.0.copyload.i.i189) #0
+  %conv131 = zext i32 %0 to i64
+  %add.ptr132 = getelementptr inbounds i8* undef, i64 %conv131
+  %1 = bitcast i8* %add.ptr132 to %type1*
+  br i1 undef, label %if.end150, label %if.end.i173
+
+if.end.i173:                                      ; preds = %if.then126
+  br i1 undef, label %test1.exit, label %cond.false.i.i.i.i174
+
+cond.false.i.i.i.i174:                            ; preds = %if.end.i173
+  unreachable
+
+test1.exit: ; preds = %if.end.i173
+  br i1 undef, label %test1a.exit, label %if.end.i124
+
+if.end.i124:                                      ; preds = %test1.exit
+  unreachable
+
+test1a.exit: ; preds = %test1.exit
+  br i1 undef, label %if.end150, label %for.body137.lr.ph
+
+for.body137.lr.ph:                                ; preds = %test1a.exit
+  br label %for.body137
+
+for.body137:                                      ; preds = %test1b.exit, %for.body137.lr.ph
+  %ShndxTable.0309 = phi %type1* [ %1, %for.body137.lr.ph ], [ %incdec.ptr, %test1b.exit ]
+  %ret.0..sroa_cast.i.i106 = bitcast %type1* %ShndxTable.0309 to i32*
+  br i1 undef, label %for.body137.if.end146_crit_edge, label %if.then140
+
+for.body137.if.end146_crit_edge:                  ; preds = %for.body137
+  %incdec.ptr = getelementptr inbounds %type1* %ShndxTable.0309, i64 1
+  br i1 undef, label %cond.false.i70, label %cond.end.i
+
+if.then140:                                       ; preds = %for.body137
+  %ret.0.copyload.i.i102 = load i32* %ret.0..sroa_cast.i.i106, align 2
+
+; CHECK: load {{.*}} align 2
+
+  unreachable
+
+cond.false.i70:                                   ; preds = %for.body137.if.end146_crit_edge
+  unreachable
+
+cond.end.i:                                       ; preds = %for.body137.if.end146_crit_edge
+  br i1 undef, label %test1b.exit, label %cond.false.i.i
+
+cond.false.i.i:                                   ; preds = %cond.end.i
+  unreachable
+
+test1b.exit: ; preds = %cond.end.i
+  br i1 undef, label %if.end150, label %for.body137
+
+if.end150:                                        ; preds = %test1b.exit, %test1a.exit, %if.then126, %if.end123
+  br i1 undef, label %for.end176, label %for.body155.lr.ph
+
+for.body155.lr.ph:                                ; preds = %if.end150
+  unreachable
+
+for.end176:                                       ; preds = %if.end150
+  unreachable
+
+return:                                           ; preds = %if.end
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind uwtable }
+
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index 70503af..d155750 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -1,22 +1,26 @@
 ; RUN: opt < %s -argpromotion -S | FileCheck %s
-; CHECK: call void @test(), !dbg [[DBG_LOC:![0-9]]]
-; CHECK: [[TEST_FN:.*]] = {{.*}} void ()* @test
-; CHECK: [[DBG_LOC]] = metadata !{i32 8, i32 0, metadata [[TEST_FN]], null}
+; CHECK: call void @test(i32 %
+; CHECK: void (i32)* @test, {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [test]
 
-define internal void @test(i32* %X) {
+declare void @sink(i32)
+
+define internal void @test(i32** %X) {
+  %1 = load i32** %X, align 8
+  %2 = load i32* %1, align 8
+  call void @sink(i32 %2)
   ret void
 }
 
-define void @caller() {
-  call void @test(i32* null), !dbg !1
+define void @caller(i32** %Y) {
+  call void @test(i32** %Y)
   ret void
 }
 
 !llvm.module.flags = !{!0}
 !llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !1 = metadata !{i32 8, i32 0, metadata !2, null}
-!2 = metadata !{i32 786478, null, null, metadata !"test", metadata !"test", metadata !"", i32 3, null, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @test, null, null, null, i32 3}
-!3 = metadata !{i32 786449, null, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, null, null, metadata !4, null, null, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
+!2 = metadata !{metadata !"0x2e\00test\00test\00\003\001\001\000\006\00256\000\003", null, null, null, null, void (i32**)* @test, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = metadata !{metadata !"0x11\004\00clang version 3.5.0 \000\00\000\00\002", null, null, null, metadata !4, null, null} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
 !4 = metadata !{metadata !2}
diff --git a/test/Transforms/ArgumentPromotion/fp80.ll b/test/Transforms/ArgumentPromotion/fp80.ll
new file mode 100644
index 0000000..a770d60
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/fp80.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.u = type { x86_fp80 }
+%struct.s = type { double, i16, i8, [5 x i8] }
+
+@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
+
+%struct.Foo = type { i32, i64 }
+@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
+
+define void @run() {
+entry:
+  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+  call i64 @CaptureAStruct(%struct.Foo* @a)
+  ret void
+}
+
+; CHECK: internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+entry:
+  %bitcast = bitcast %union.u* %arg to %struct.s*
+  %gep = getelementptr inbounds %struct.s* %bitcast, i64 0, i32 2
+  %result = load i8* %gep
+  ret i8 %result
+}
+
+; CHECK: internal x86_fp80 @UseLongDoubleSafely(x86_fp80 {{%.*}}) {
+define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
+  %gep = getelementptr inbounds %union.u* %arg, i64 0, i32 0
+  %fp80 = load x86_fp80* %gep
+  ret x86_fp80 %fp80
+}
+
+; CHECK: define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+  %p = bitcast %struct.Foo* %a to i64*
+  %v = load i64* %p
+  ret i64 %v
+}
+
+; CHECK: define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+entry:
+  %a_ptr = alloca %struct.Foo*
+  br label %loop
+
+loop:
+  %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
+  %0   = phi %struct.Foo* [ %a, %entry ],   [ %0, %loop ]
+  store %struct.Foo* %phi, %struct.Foo** %a_ptr
+  %gep = getelementptr %struct.Foo* %a, i64 0
+  br label %loop
+}
diff --git a/test/Transforms/ArgumentPromotion/tail.ll b/test/Transforms/ArgumentPromotion/tail.ll
index 43b8996..2ea387c 100644
--- a/test/Transforms/ArgumentPromotion/tail.ll
+++ b/test/Transforms/ArgumentPromotion/tail.ll
@@ -1,6 +1,8 @@
 ; RUN: opt %s -argpromotion -S -o - | FileCheck %s
 ; PR14710
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 %pair = type { i32, i32 }
 
 declare i8* @foo(%pair*)
diff --git a/test/Transforms/ArgumentPromotion/variadic.ll b/test/Transforms/ArgumentPromotion/variadic.ll
new file mode 100644
index 0000000..0ae52b3
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/variadic.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
+; Unused arguments from variadic functions cannot be eliminated as that changes
+; their classiciation according to the SysV amd64 ABI. Clang and other frontends
+; bake in the classification when they use things like byval, as in this test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.tt0 = type { i64, i64 }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+entry:
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...)* @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
+entry:
+  ret void
+}
+
+; CHECK-LABEL: define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...)
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 6a93016..282d42f 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -1,8 +1,8 @@
-; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s
+; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
 
 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
 ; CHECK-LABEL: @test_atomic_xchg_i8
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -12,7 +12,7 @@ define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
   ret i8 %res
@@ -20,7 +20,7 @@ define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
 
 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
 ; CHECK-LABEL: @test_atomic_add_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
@@ -31,7 +31,7 @@ define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i16 [[OLDVAL]]
   %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
   ret i16 %res
@@ -39,7 +39,7 @@ define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
 
 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
 ; CHECK-LABEL: @test_atomic_sub_i32
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
@@ -48,7 +48,7 @@ define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i32 [[OLDVAL]]
   %res = atomicrmw sub i32* %ptr, i32 %subend acquire
   ret i32 %res
@@ -56,7 +56,7 @@ define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
 
 define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
 ; CHECK-LABEL: @test_atomic_and_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -67,7 +67,7 @@ define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw and i8* %ptr, i8 %andend release
   ret i8 %res
@@ -75,7 +75,7 @@ define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
 
 define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 ; CHECK-LABEL: @test_atomic_nand_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
@@ -87,7 +87,7 @@ define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i16 [[OLDVAL]]
   %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
   ret i16 %res
@@ -95,7 +95,7 @@ define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 
 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
 ; CHECK-LABEL: @test_atomic_or_i64
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
@@ -115,7 +115,7 @@ define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i64 [[OLDVAL]]
   %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
   ret i64 %res
@@ -123,7 +123,7 @@ define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
 
 define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
 ; CHECK-LABEL: @test_atomic_xor_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -134,7 +134,7 @@ define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
   ret i8 %res
@@ -142,7 +142,7 @@ define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
 
 define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
 ; CHECK-LABEL: @test_atomic_max_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -154,7 +154,7 @@ define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
   ret i8 %res
@@ -162,7 +162,7 @@ define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
 
 define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
 ; CHECK-LABEL: @test_atomic_min_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -174,7 +174,7 @@ define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
   ret i8 %res
@@ -182,7 +182,7 @@ define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
 
 define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
 ; CHECK-LABEL: @test_atomic_umax_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -194,7 +194,7 @@ define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
   ret i8 %res
@@ -202,7 +202,7 @@ define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
 
 define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
 ; CHECK-LABEL: @test_atomic_umin_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -214,7 +214,7 @@ define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
   ret i8 %res
@@ -222,7 +222,7 @@ define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
 
 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -238,11 +238,11 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -256,7 +256,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 
 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -272,11 +272,11 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -290,7 +290,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 
 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -304,11 +304,11 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -322,7 +322,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 
 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -347,11 +347,11 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -361,4 +361,4 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
   %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 8092c10..42d7b78 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s
+; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
 
 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
 ; CHECK-LABEL: @test_atomic_xchg_i8
@@ -223,4 +223,4 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
   %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 07a4a7f..5465300 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -1,8 +1,9 @@
-; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
 
 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_seq_cst
-; CHECK:     fence release
+; Intrinsic for "dmb ishst" is then expected
+; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -16,11 +17,11 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
@@ -34,7 +35,7 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 
 define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK:     fence release
+; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -48,11 +49,11 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
@@ -66,7 +67,7 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 
 define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -80,11 +81,11 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpand/ARM/lit.local.cfg
index 98c6700..98c6700 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
+++ b/test/Transforms/AtomicExpand/ARM/lit.local.cfg
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index 598ea0e..d4b94fe 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -O3 -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
 
 ;; We don't want branch folding to fold asm directives.
 
diff --git a/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg b/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg
new file mode 100644
index 0000000..cec29af
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll b/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll
new file mode 100644
index 0000000..b4e6a40
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -codegenprepare -mtriple=arm64-apple-ios7.0 %s | FileCheck %s
+
+%foo = type { i8 }
+
+define %foo @test_merge(i32 %in) {
+; CHECK-LABEL: @test_merge
+
+  ; CodeGenPrepare was requesting the EVT for { i8 } to determine
+  ; whether the insertvalue user of the trunc was legal. This
+  ; asserted.
+
+; CHECK: insertvalue %foo undef, i8 %byte, 0
+  %lobit = lshr i32 %in, 31
+  %byte = trunc i32 %lobit to i8
+  %struct = insertvalue %foo undef, i8 %byte, 0
+  ret %"foo" %struct
+}
+
+define i64* @test_merge_PR21548(i32 %a, i64* %p1, i64* %p2, i64* %p3) {
+; CHECK-LABEL: @test_merge_PR21548
+  %as = lshr i32 %a, 3
+  %Tr = trunc i32 %as to i1
+  br i1 %Tr, label %BB2, label %BB3
+
+BB2:
+  ; Similarly to above:
+  ; CodeGenPrepare was requesting the EVT for i8* to determine
+  ; whether the select user of the trunc was legal. This asserted.
+
+; CHECK: select i1 {{%.*}}, i64* %p1, i64* %p2
+  %p = select i1 %Tr, i64* %p1, i64* %p2
+  ret i64* %p
+
+BB3:
+  ret i64* %p3
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 0ea9c47..5a23dad 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -36,6 +36,19 @@ define i16 @test2() {
 ; BE: ret i16 -8531
 }
 
+define i16 @test2_addrspacecast() {
+  %r = load i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
+  ret i16 %r
+
+; 0xBEEF
+; LE-LABEL: @test2_addrspacecast(
+; LE: ret i16 -16657
+
+; 0xDEAD
+; BE-LABEL: @test2_addrspacecast(
+; BE: ret i16 -8531
+}
+
 ; Load of second 16 bits of 32-bit value.
 define i16 @test3() {
   %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
diff --git a/test/Transforms/ConstProp/trunc_vec.ll b/test/Transforms/ConstProp/trunc_vec.ll
new file mode 100644
index 0000000..99db329
--- /dev/null
+++ b/test/Transforms/ConstProp/trunc_vec.ll
@@ -0,0 +1,9 @@
+; RUN: opt -constprop < %s
+
+; Make sure we don't crash on this one
+
+define <8 x i8> @test_truc_vec() {
+  %x = bitcast <2 x i64> <i64 1, i64 2> to <8 x i16>
+  %y = trunc <8 x i16> %x to <8 x i8>
+  ret <8 x i8> %y
+}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 26982db..077394f 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -4,24 +4,24 @@
 
 define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !0)
-  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
-  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
-  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !0, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12, metadata !{})
 ; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !{{[0-9]+}}
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
-  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
-  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !21)
-  call void @llvm.dbg.value(metadata !{i8 %extra}, i64 0, metadata !22)
-  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !23)
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !21, metadata !{})
+  call void @llvm.dbg.value(metadata !{i8 %extra}, i64 0, metadata !22, metadata !{})
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !23, metadata !{})
   %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !24
 
@@ -36,7 +36,7 @@ bb2:                                              ; preds = %bb1, %bb
   ret i8* %.0, !dbg !27
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { nounwind ssp }
 ; CHECK: attributes #1 = { nounwind readnone }
@@ -45,34 +45,34 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
-!0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !28} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, metadata !28, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x101\00name\008\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{metadata !"0x2e\00vfs_addname\00vfs_addname\00vfs_addname\0012\000\001\000\006\000\000\000", metadata !28, metadata !2, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{metadata !"0x29", metadata !28} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)\001\00\000\00\000", metadata !28, metadata !29, metadata !29, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !28, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 524303, metadata !28, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524326, metadata !28, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!8 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 524545, metadata !1, metadata !"len", metadata !2, i32 9, metadata !9} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 524545, metadata !1, metadata !"hash", metadata !2, i32 10, metadata !9} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 524545, metadata !1, metadata !"flags", metadata !2, i32 11, metadata !9} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", metadata !28, metadata !2, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{metadata !"0x26\00\000\008\008\000\000", metadata !28, metadata !2, metadata !8} ; [ DW_TAG_const_type ]
+!8 = metadata !{metadata !"0x24\00char\000\008\008\000\000\006", metadata !28, metadata !2} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !"0x24\00unsigned int\000\0032\0032\000\000\007", metadata !28, metadata !2} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !"0x101\00len\009\000", metadata !1, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{metadata !"0x101\00hash\0010\000", metadata !1, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{metadata !"0x101\00flags\0011\000", metadata !1, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 13, i32 0, metadata !14, null}
-!14 = metadata !{i32 524299, metadata !28, metadata !1, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{metadata !"0xb\0012\000\000", metadata !28, metadata !1} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !"0x101\00name\0017\000", metadata !16, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{metadata !"0x2e\00add_name_internal\00add_name_internal\00add_name_internal\0022\001\001\000\006\000\000\000", metadata !28, metadata !2, metadata !17, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !28, metadata !2, null, metadata !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
-!19 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 524545, metadata !16, metadata !"hash", metadata !2, i32 19, metadata !9} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !16, metadata !"extra", metadata !2, i32 20, metadata !19} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !16, metadata !"flags", metadata !2, i32 21, metadata !9} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{metadata !"0x24\00unsigned char\000\008\008\000\000\008", metadata !28, metadata !2} ; [ DW_TAG_base_type ]
+!20 = metadata !{metadata !"0x101\00len\0018\000", metadata !16, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{metadata !"0x101\00hash\0019\000", metadata !16, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{metadata !"0x101\00extra\0020\000", metadata !16, metadata !2, metadata !19} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{metadata !"0x101\00flags\0021\000", metadata !16, metadata !2, metadata !9} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 23, i32 0, metadata !25, null}
-!25 = metadata !{i32 524299, metadata !28, metadata !16, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{metadata !"0xb\0022\000\000", metadata !28, metadata !16} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 24, i32 0, metadata !25, null}
 !27 = metadata !{i32 26, i32 0, metadata !25, null}
 !28 = metadata !{metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/"}
 !29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 7bdcbf5..b457f01 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -1,65 +1,70 @@
 ; RUN: opt -deadargelim -S < %s | FileCheck %s
 ; PR14016
 
-; Check that debug info metadata for subprograms stores pointers to
-; updated LLVM functions.
+; Built with clang (then manually running -mem2reg with opt) from the following source:
+; static void f1(int, ...) {
+; }
+;
+; void f2() {
+;   f1(1);
+; }
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; Test both varargs removal and removal of a traditional dead arg together, to
+; test both the basic functionality, and a particular wrinkle involving updating
+; the function->debug info mapping on update to ensure it's accurate when used
+; again for the next removal.
 
-@x = global i32 0, align 4
+; CHECK: void ()* @_ZL2f1iz, {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [f1]
 
-define void @_Z3runv() uwtable {
-entry:
-  call void @_ZN12_GLOBAL__N_18dead_argEPv(i8* null), !dbg !10
-  call void (...)* @_ZN12_GLOBAL__N_111dead_varargEz(), !dbg !12
-  ret void, !dbg !13
-}
+; Check that debug info metadata for subprograms stores pointers to
+; updated LLVM functions.
 
-; Argument will be deleted
-define internal void @_ZN12_GLOBAL__N_18dead_argEPv(i8* %foo) nounwind uwtable {
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
 entry:
-  %0 = load i32* @x, align 4, !dbg !14
-  %inc = add nsw i32 %0, 1, !dbg !14
-  store i32 %inc, i32* @x, align 4, !dbg !14
+  call void (i32, ...)* @_ZL2f1iz(i32 1), !dbg !15
   ret void, !dbg !16
 }
 
-; Vararg will be deleted
-define internal void @_ZN12_GLOBAL__N_111dead_varargEz(...) nounwind uwtable {
+; Function Attrs: nounwind uwtable
+define internal void @_ZL2f1iz(i32, ...) #1 {
 entry:
-  %0 = load i32* @x, align 4, !dbg !17
-  %inc = add nsw i32 %0, 1, !dbg !17
-  store i32 %inc, i32* @x, align 4, !dbg !17
-  ret void, !dbg !19
+  call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !17, metadata !18), !dbg !19
+  ret void, !dbg !20
 }
 
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!21}
-
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
-; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
-!9 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
 
-; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
 
-!10 = metadata !{i32 8, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !20, metadata !5, i32 8, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!12 = metadata !{i32 8, i32 27, metadata !11, null}
-!13 = metadata !{i32 8, i32 42, metadata !11, null}
-!14 = metadata !{i32 4, i32 28, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !20, metadata !9, i32 4, i32 26, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!16 = metadata !{i32 4, i32 33, metadata !15, null}
-!17 = metadata !{i32 5, i32 25, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !20, metadata !8, i32 5, i32 23, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!19 = metadata !{i32 5, i32 30, metadata !18, null}
-!20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = metadata !{metadata !"0x11\004\00clang version 3.6.0 \000\00\000\00\001", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"dbg.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{metadata !"0x2e\00f2\00f2\00_Z2f2v\004\000\001\000\000\00256\000\004", metadata !1, metadata !5, metadata !6, null, void ()* @_Z2f2v, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 4] [def] [f2]
+!5 = metadata !{metadata !"0x29", metadata !1}    ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg.cpp]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", null, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{metadata !"0x2e\00f1\00f1\00_ZL2f1iz\001\001\001\000\000\00256\000\001", metadata !1, metadata !5, metadata !9, null, void (i32, ...)* @_ZL2f1iz, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [local] [def] [f1]
+!9 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", null, null, null, metadata !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11, null}
+!11 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!13 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
+!14 = metadata !{metadata !"clang version 3.6.0 "}
+!15 = metadata !{i32 5, i32 3, metadata !4, null}
+!16 = metadata !{i32 6, i32 1, metadata !4, null}
+!17 = metadata !{metadata !"0x101\00\0016777217\000", metadata !8, metadata !5, metadata !11} ; [ DW_TAG_arg_variable ] [line 1]
+!18 = metadata !{metadata !"0x102"}               ; [ DW_TAG_expression ]
+!19 = metadata !{i32 1, i32 19, metadata !8, null}
+!20 = metadata !{i32 2, i32 1, metadata !8, null}
diff --git a/test/Transforms/DeadArgElim/dead_vaargs.ll b/test/Transforms/DeadArgElim/dead_vaargs.ll
index db3135c..c8189c6 100644
--- a/test/Transforms/DeadArgElim/dead_vaargs.ll
+++ b/test/Transforms/DeadArgElim/dead_vaargs.ll
@@ -1,12 +1,36 @@
-; RUN: opt < %s -deadargelim -S | not grep 47 
-; RUN: opt < %s -deadargelim -S | not grep 1.0
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 define i32 @bar(i32 %A) {
-        %tmp4 = tail call i32 (i32, ...)* @foo( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )   ; <i32> [#uses=1]
-        ret i32 %tmp4
+  call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
+  %a = call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+  %b = call i32 (i32, ...)* @no_vastart( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )
+  %c = add i32 %a, %b
+  ret i32 %c
 }
+; CHECK-LABEL: define i32 @bar
+; CHECK: call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 @no_vastart(i32 %A)
 
-define internal i32 @foo(i32 %X, ...) {
-        ret i32 %X
+declare void @thunk_target(i32 %X, ...)
+
+define internal void @thunk(i32 %X, ...) {
+  musttail call void(i32, ...)* @thunk_target(i32 %X, ...)
+  ret void
+}
+; CHECK-LABEL: define internal void @thunk(i32 %X, ...)
+; CHECK: musttail call void (i32, ...)* @thunk_target(i32 %X, ...)
+
+define internal i32 @has_vastart(i32 %X, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+  ret i32 %X
 }
+; CHECK-LABEL: define internal i32 @has_vastart(i32 %X, ...)
 
+declare void @llvm.va_start(i8*)
+
+define internal i32 @no_vastart(i32 %X, ...) {
+  ret i32 %X
+}
+; CHECK-LABEL: define internal i32 @no_vastart(i32 %X)
diff --git a/test/Transforms/DeadStoreElimination/PartialStore.ll b/test/Transforms/DeadStoreElimination/PartialStore.ll
index 4799ef3..80c2bfa 100644
--- a/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -45,9 +45,9 @@ define void @test4(i8* %P) {
 
   store i8 19, i8* %P  ;; dead
   %A = getelementptr i8* %P, i32 3
-  
+
   store i8 42, i8* %A  ;; dead
-  
+
   %Q = bitcast i8* %P to double*
   store double 0.0, double* %Q
   ret void
@@ -61,7 +61,7 @@ define void @test5(i32 %i) nounwind ssp {
   %C = getelementptr i8* %B, i32 %i
   store i8 10, i8* %C        ;; Dead store to variable index.
   store i32 20, i32* %A
-  
+
   call void @test5a(i32* %A)
   ret void
 ; CHECK-LABEL: @test5(
@@ -69,3 +69,19 @@ define void @test5(i32 %i) nounwind ssp {
 ; CHECK-NEXT: store i32 20
 ; CHECK-NEXT: call void @test5a
 }
+
+declare void @test5a_as1(i32*)
+define void @test5_addrspacecast(i32 %i) nounwind ssp {
+  %A = alloca i32
+  %B = addrspacecast i32* %A to i8 addrspace(1)*
+  %C = getelementptr i8 addrspace(1)* %B, i32 %i
+  store i8 10, i8 addrspace(1)* %C        ;; Dead store to variable index.
+  store i32 20, i32* %A
+
+  call void @test5a(i32* %A)
+  ret void
+; CHECK-LABEL: @test5_addrspacecast(
+; CHECK-NEXT: alloca
+; CHECK-NEXT: store i32 20
+; CHECK-NEXT: call void @test5a
+}
diff --git a/test/Transforms/DeadStoreElimination/atomic.ll b/test/Transforms/DeadStoreElimination/atomic.ll
index 2e84298..af303fa 100644
--- a/test/Transforms/DeadStoreElimination/atomic.ll
+++ b/test/Transforms/DeadStoreElimination/atomic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Sanity tests for atomic stores.
 ; Note that it turns out essentially every transformation DSE does is legal on
-; atomic ops, just some transformations are not allowed across them. 
+; atomic ops, just some transformations are not allowed across release-acquire pairs.
 
 @x = common global i32 0, align 4
 @y = common global i32 0, align 4
@@ -13,35 +13,32 @@ target triple = "x86_64-apple-macosx10.7.0"
 declare void @randomop(i32*)
 
 ; DSE across unordered store (allowed)
-define void @test1()  nounwind uwtable ssp {
-; CHECK: test1
+define void @test1() {
+; CHECK-LABEL: test1
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
-entry:
   store i32 0, i32* @x
   store atomic i32 0, i32* @y unordered, align 4
   store i32 1, i32* @x
   ret void
 }
 
-; DSE across seq_cst load (allowed in theory; not implemented ATM)
-define i32 @test2()  nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: store i32 0
+; DSE across seq_cst load (allowed)
+define i32 @test2() {
+; CHECK-LABEL: test2
+; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
-entry:
   store i32 0, i32* @x
   %x = load atomic i32* @y seq_cst, align 4
   store i32 1, i32* @x
   ret i32 %x
 }
 
-; DSE across seq_cst store (store before atomic store must not be removed)
-define void @test3()  nounwind uwtable ssp {
-; CHECK: test3
-; CHECK: store i32
+; DSE across seq_cst store (allowed)
+define void @test3() {
+; CHECK-LABEL: test3
+; CHECK-NOT: store i32 0
 ; CHECK: store atomic i32 2
-entry:
   store i32 0, i32* @x
   store atomic i32 2, i32* @y seq_cst, align 4
   store i32 1, i32* @x
@@ -49,32 +46,29 @@ entry:
 }
 
 ; DSE remove unordered store (allowed)
-define void @test4()  nounwind uwtable ssp {
-; CHECK: test4
+define void @test4() {
+; CHECK-LABEL: test4
 ; CHECK-NOT: store atomic
 ; CHECK: store i32 1
-entry:
   store atomic i32 0, i32* @x unordered, align 4
   store i32 1, i32* @x
   ret void
 }
 
 ; DSE unordered store overwriting non-atomic store (allowed)
-define void @test5()  nounwind uwtable ssp {
-; CHECK: test5
+define void @test5() {
+; CHECK-LABEL: test5
 ; CHECK: store atomic i32 1
-entry:
   store i32 0, i32* @x
   store atomic i32 1, i32* @x unordered, align 4
   ret void
 }
 
 ; DSE no-op unordered atomic store (allowed)
-define void @test6()  nounwind uwtable ssp {
-; CHECK: test6
+define void @test6() {
+; CHECK-LABEL: test6
 ; CHECK-NOT: store
 ; CHECK: ret void
-entry:
   %x = load atomic i32* @x unordered, align 4
   store atomic i32 %x, i32* @x unordered, align 4
   ret void
@@ -82,10 +76,9 @@ entry:
 
 ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
 ; to reason about atomic operations).
-define void @test7()  nounwind uwtable ssp {
-; CHECK: test7
-; CHECK: store atomic 
-entry:
+define void @test7() {
+; CHECK-LABEL: test7
+; CHECK: store atomic
   %a = alloca i32
   store atomic i32 0, i32* %a seq_cst, align 4
   ret void
@@ -93,11 +86,10 @@ entry:
 
 ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
 ; to reason about atomic operations).
-define i32 @test8()  nounwind uwtable ssp {
-; CHECK: test8
+define i32 @test8() {
+; CHECK-LABEL: test8
 ; CHECK: store
-; CHECK: load atomic 
-entry:
+; CHECK: load atomic
   %a = alloca i32
   call void @randomop(i32* %a)
   store i32 0, i32* %a, align 4
@@ -105,3 +97,82 @@ entry:
   ret i32 %x
 }
 
+; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
+define i32 @test9() {
+; CHECK-LABEL: test9
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
+define void @test10() {
+; CHECK-LABEL: test10
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 42, i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE across monotonic load (forbidden since the eliminated store is atomic)
+define i32 @test11() {
+; CHECK-LABEL: test11
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  %x = load atomic i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret i32 %x
+}
+
+; DSE across monotonic store (forbidden since the eliminated store is atomic)
+define void @test12() {
+; CHECK-LABEL: test12
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  store atomic i32 42, i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret void
+}
+
+; DSE is allowed across a pair of an atomic read and then write.
+define i32 @test13() {
+; CHECK-LABEL: test13
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y seq_cst, align 4
+  store atomic i32 %x, i32* @y seq_cst, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; Same if it is acquire-release instead of seq_cst/seq_cst
+define i32 @test14() {
+; CHECK-LABEL: test14
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y acquire, align 4
+  store atomic i32 %x, i32* @y release, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; But DSE is not allowed across a release-acquire pair.
+define i32 @test15() {
+; CHECK-LABEL: test15
+; CHECK: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y release, align 4
+  %x = load atomic i32* @y acquire, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index c90d824..3e772d7 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll b/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
new file mode 100644
index 0000000..8953f9c
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
@@ -0,0 +1,74 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.basic_string = type { %"class.__gnu_cxx::__versa_string" }
+%"class.__gnu_cxx::__versa_string" = type { %"class.__gnu_cxx::__sso_string_base" }
+%"class.__gnu_cxx::__sso_string_base" = type { %"struct.__gnu_cxx::__vstring_utility<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.__gnu_cxx::__vstring_utility<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
+
+; Function Attrs: noinline nounwind readonly uwtable
+declare zeroext i1 @callee_takes_string(%class.basic_string* nonnull) #1 align 2
+
+; Function Attrs: nounwind uwtable
+define weak_odr zeroext i1 @test() #2 align 2 {
+
+; CHECK-LABEL: @test
+
+bb:
+  %tmp = alloca %class.basic_string, align 8
+  %tmp1 = alloca %class.basic_string, align 8
+  %tmp3 = getelementptr inbounds %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 2
+  %tmp4 = bitcast %union.anon* %tmp3 to i8*
+  %tmp5 = getelementptr inbounds %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 0, i32 0
+  %tmp6 = getelementptr inbounds %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 1
+  %tmp7 = getelementptr inbounds i8* %tmp4, i64 1
+  %tmp8 = bitcast %class.basic_string* %tmp to i8*
+  %tmp9 = bitcast i64 0 to i64
+  %tmp10 = getelementptr inbounds %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 2
+  %tmp11 = bitcast %union.anon* %tmp10 to i8*
+  %tmp12 = getelementptr inbounds %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 0, i32 0
+  %tmp13 = getelementptr inbounds %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 1
+  %tmp14 = getelementptr inbounds i8* %tmp11, i64 1
+  %tmp15 = bitcast %class.basic_string* %tmp1 to i8*
+  br label %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit
+
+_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit: ; preds = %bb
+  store i8* %tmp4, i8** %tmp5, align 8
+  store i8 62, i8* %tmp4, align 8
+  store i64 1, i64* %tmp6, align 8
+  store i8 0, i8* %tmp7, align 1
+  %tmp16 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp)
+  br label %_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3
+
+_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3: ; preds = %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit
+
+; CHECK: _ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3:
+
+; The following can be read through the call %tmp17:
+  store i8* %tmp11, i8** %tmp12, align 8
+  store i8 125, i8* %tmp11, align 8
+  store i64 1, i64* %tmp13, align 8
+  store i8 0, i8* %tmp14, align 1
+
+; CHECK: store i8* %tmp11, i8** %tmp12, align 8
+; CHECK: store i8 125, i8* %tmp11, align 8
+; CHECK: store i64 1, i64* %tmp13, align 8
+; CHECK: store i8 0, i8* %tmp14, align 1
+
+  %tmp17 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp1)
+  call void @llvm.memset.p0i8.i64(i8* %tmp11, i8 -51, i64 16, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp15, i8 -51, i64 32, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp4, i8 -51, i64 16, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp8, i8 -51, i64 32, i32 8, i1 false) #0
+  ret i1 %tmp17
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { noinline nounwind readonly uwtable }
+attributes #2 = { nounwind uwtable }
+
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 9df8801..3d78bb5 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; If there are two stores to the same location, DSE should be able to remove
 ; the first store if the two stores are separated by no more than 98
@@ -117,7 +118,7 @@ entry:
 
   ; Insert a meaningless dbg.value intrinsic; it should have no
   ; effect on the working of DSE in any way.
-  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10)
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10, metadata !{})
 
   ; CHECK:  store i32 -1, i32* @x, align 4
   store i32 -1, i32* @x, align 4
@@ -239,23 +240,23 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.4\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"test.c", metadata !"/home/tmp"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_within_limit", metadata !"test_within_limit", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @test_within_limit, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00test_within_limit\00test_within_limit\00\003\000\001\000\006\00256\000\004", metadata !1, metadata !5, metadata !6, null, i32 ()* @test_within_limit, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!10 = metadata !{metadata !"0x34\00x\00x\00\001\000\001", null, metadata !5, metadata !8, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
 !11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
 !12 = metadata !{i32* undef}
 
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index c0c7c58..2539533 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,15 +1,21 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
-declare void @test1f()
-
-define void @test1(i32* noalias %p) {
-       store i32 1, i32* %p
-       call void @test1f()
-       store i32 2, i32 *%p
-       ret void
-; CHECK-LABEL: define void @test1(
-; CHECK-NOT: store
-; CHECK-NEXT: call void
-; CHECK-NEXT: store i32 2
-; CHECK-NEXT: ret void
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @fn(i8* nocapture %buf) #0 {
+entry:
+
+; We would not eliminate the first memcpy with data layout, and we should not
+; eliminate it without data layout.
+; CHECK-LABEL: @fn
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: ret void
+
+  %arrayidx = getelementptr i8* %buf, i64 18
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i32 1, i1 false)
+  store i8 1, i8* %arrayidx, align 1
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i32 1, i1 false)
+  ret void
 }
+
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index cdfe226..1e81385 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -172,6 +172,23 @@ define i32* @test13() {
 ; CHECK-NEXT: call void
 }
 
+define i32 addrspace(1)* @test13_addrspacecast() {
+  %p = tail call i8* @malloc(i32 4)
+  %p.bc = bitcast i8* %p to i32*
+  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
+  %DEAD = load i32 addrspace(1)* %P
+  %DEAD2 = add i32 %DEAD, 1
+  store i32 %DEAD2, i32 addrspace(1)* %P
+  call void @test13f( )
+  store i32 0, i32 addrspace(1)* %P
+  ret i32 addrspace(1)* %P
+; CHECK: @test13_addrspacecast()
+; CHECK-NEXT: malloc
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: addrspacecast
+; CHECK-NEXT: call void
+}
+
 declare noalias i8* @malloc(i32)
 declare noalias i8* @calloc(i32, i32)
 
diff --git a/test/Transforms/DebugIR/simple-addrspace.ll b/test/Transforms/DebugIR/simple-addrspace.ll
index 6bea9b2..6539c8a 100644
--- a/test/Transforms/DebugIR/simple-addrspace.ll
+++ b/test/Transforms/DebugIR/simple-addrspace.ll
@@ -8,6 +8,4 @@ define void @foo(i32 addrspace(1)*) nounwind {
 
 ; Make sure the pointer size is 16
 
-; CHECK: metadata !"i32 addrspace(1)*", i32 0, i64 16, i64 2, i64 0, i32 0
-
-
+; CHECK: metadata !"0xf\00i32 addrspace(1)*\000\0016\002\000\000"
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 80704df..155d36f 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -S -early-cse | FileCheck %s
 
+declare void @llvm.assume(i1) nounwind
 
 ; CHECK-LABEL: @test1(
 define void @test1(i8 %V, i32 *%P) {
@@ -42,6 +43,16 @@ define i32 @test2(i32 *%P) {
   ; CHECK: ret i32 0
 }
 
+; CHECK-LABEL: @test2a(
+define i32 @test2a(i32 *%P, i1 %b) {
+  %V1 = load i32* %P
+  tail call void @llvm.assume(i1 %b)
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
 ;; Cross block load value numbering.
 ; CHECK-LABEL: @test3(
 define i32 @test3(i32 *%P, i1 %Cond) {
@@ -58,6 +69,22 @@ F:
   ; CHECK: ret i32 0
 }
 
+; CHECK-LABEL: @test3a(
+define i32 @test3a(i32 *%P, i1 %Cond, i1 %b) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  store i32 4, i32* %P
+  ret i32 42
+F:
+  tail call void @llvm.assume(i1 %b)
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 0
+}
+
 ;; Cross block load value numbering stops when stores happen.
 ; CHECK-LABEL: @test4(
 define i32 @test4(i32 *%P, i1 %Cond) {
@@ -97,6 +124,15 @@ define i32 @test6(i32 *%P) {
   ; CHECK: ret i32 42
 }
 
+; CHECK-LABEL: @test6a(
+define i32 @test6a(i32 *%P, i1 %b) {
+  store i32 42, i32* %P
+  tail call void @llvm.assume(i1 %b)
+  %V1 = load i32* %P
+  ret i32 %V1
+  ; CHECK: ret i32 42
+}
+
 ;; Trivial dead store elimination.
 ; CHECK-LABEL: @test7(
 define void @test7(i32 *%P) {
@@ -118,4 +154,42 @@ define i32 @test8(i32 *%P) {
   ; CHECK: ret i32 0
 }
 
+;; Trivial DSE can't be performed across a readonly call.  The call
+;; can observe the earlier write.
+; CHECK-LABEL: @test9(
+define i32 @test9(i32 *%P) {
+  store i32 4, i32* %P
+  %V1 = call i32 @func(i32* %P) readonly
+  store i32 5, i32* %P        
+  ret i32 %V1
+  ; CHECK: store i32 4, i32* %P        
+  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
+  ; CHECK-NEXT: store i32 5, i32* %P        
+  ; CHECK-NEXT: ret i32 %V1
+}
+
+;; Trivial DSE can be performed across a readnone call.
+; CHECK-LABEL: @test10
+define i32 @test10(i32 *%P) {
+  store i32 4, i32* %P
+  %V1 = call i32 @func(i32* %P) readnone
+  store i32 5, i32* %P        
+  ret i32 %V1
+  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
+  ; CHECK-NEXT: store i32 5, i32* %P        
+  ; CHECK-NEXT: ret i32 %V1
+}
+
+;; Trivial dead store elimination - should work for an entire series of dead stores too.
+; CHECK-LABEL: @test11(
+define void @test11(i32 *%P) {
+  store i32 42, i32* %P
+  store i32 43, i32* %P
+  store i32 44, i32* %P
+  store i32 45, i32* %P
+  ret void
+  ; CHECK-NEXT: store i32 45
+  ; CHECK-NEXT: ret void
+}
+
 
diff --git a/test/Transforms/FunctionAttrs/optnone-simple.ll b/test/Transforms/FunctionAttrs/optnone-simple.ll
new file mode 100644
index 0000000..9d0f8e3
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/optnone-simple.ll
@@ -0,0 +1,135 @@
+; RUN: opt -O3 -S < %s | FileCheck %s
+; Show 'optnone' suppresses optimizations.
+
+; Two attribute groups that differ only by 'optnone'.
+; 'optnone' requires 'noinline' so #0 is 'noinline' by itself,
+; even though it would otherwise be irrelevant to this example.
+attributes #0 = { noinline }
+attributes #1 = { noinline optnone }
+
+; int iadd(int a, int b){ return a + b; }
+
+define i32 @iadd_optimize(i32 %a, i32 %b) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @iadd_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define i32 @iadd_optnone(i32 %a, i32 %b) #1 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @iadd_optnone
+; CHECK: alloca i32
+; CHECK: alloca i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: load i32
+; CHECK: load i32
+; CHECK: add nsw i32
+; CHECK: ret i32
+
+; float fsub(float a, float b){ return a - b; }
+
+define float @fsub_optimize(float %a, float %b) #0 {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float* %a.addr, align 4
+  %1 = load float* %b.addr, align 4
+  %sub = fsub float %0, %1
+  ret float %sub
+}
+
+; CHECK-LABEL: @fsub_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define float @fsub_optnone(float %a, float %b) #1 {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float* %a.addr, align 4
+  %1 = load float* %b.addr, align 4
+  %sub = fsub float %0, %1
+  ret float %sub
+}
+
+; CHECK-LABEL: @fsub_optnone
+; CHECK: alloca float
+; CHECK: alloca float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load float
+; CHECK: load float
+; CHECK: fsub float
+; CHECK: ret float
+
+; typedef float __attribute__((ext_vector_type(4))) float4;
+; float4 vmul(float4 a, float4 b){ return a * b; }
+
+define <4 x float> @vmul_optimize(<4 x float> %a, <4 x float> %b) #0 {
+entry:
+  %a.addr = alloca <4 x float>, align 16
+  %b.addr = alloca <4 x float>, align 16
+  store <4 x float> %a, <4 x float>* %a.addr, align 16
+  store <4 x float> %b, <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>* %b.addr, align 16
+  %mul = fmul <4 x float> %0, %1
+  ret <4 x float> %mul
+}
+
+; CHECK-LABEL: @vmul_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define <4 x float> @vmul_optnone(<4 x float> %a, <4 x float> %b) #1 {
+entry:
+  %a.addr = alloca <4 x float>, align 16
+  %b.addr = alloca <4 x float>, align 16
+  store <4 x float> %a, <4 x float>* %a.addr, align 16
+  store <4 x float> %b, <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>* %b.addr, align 16
+  %mul = fmul <4 x float> %0, %1
+  ret <4 x float> %mul
+}
+
+; CHECK-LABEL: @vmul_optnone
+; CHECK: alloca <4 x float>
+; CHECK: alloca <4 x float>
+; CHECK: store <4 x float>
+; CHECK: store <4 x float>
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: fmul <4 x float>
+; CHECK: ret
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
new file mode 100644
index 0000000..7694bfe
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+@x = global i32 0
+
+define void @test_opt(i8* %p) {
+; CHECK-LABEL: @test_opt
+; CHECK: (i8* nocapture readnone %p) #0 {
+  ret void
+}
+
+define void @test_optnone(i8* %p) noinline optnone {
+; CHECK-LABEL: @test_optnone
+; CHECK: (i8* %p) #1 {
+  ret void
+}
+
+declare i8 @strlen(i8*) noinline optnone
+; CHECK-LABEL: @strlen
+; CHECK: (i8*) #1
+
+; CHECK-LABEL: attributes #0
+; CHECK: = { readnone }
+; CHECK-LABEL: attributes #1
+; CHECK: = { noinline optnone }
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
new file mode 100644
index 0000000..2480820
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -0,0 +1,56 @@
+; Test that GCOV instrumentation numbers functions correctly when some
+; functions aren't emitted.
+
+; Inject metadata to set the .gcno file location
+; RUN: echo '!14 = metadata !{metadata !"%/T/function-numbering.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+
+; RUN: opt -insert-gcov-profiling -S < %t2 | FileCheck --check-prefix GCDA %s
+; RUN: llvm-cov -n -dump %T/function-numbering.gcno 2>&1 | FileCheck --check-prefix GCNO %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; GCDA: @[[FOO:[0-9]+]] = private unnamed_addr constant [4 x i8] c"foo\00"
+; GCDA-NOT: @{{[0-9]+}} = private unnamed_addr constant .* c"bar\00"
+; GCDA: @[[BAZ:[0-9]+]] = private unnamed_addr constant [4 x i8] c"baz\00"
+; GCDA: define internal void @__llvm_gcov_writeout()
+; GCDA: call void @llvm_gcda_emit_function(i32 0, i8* getelementptr inbounds ([4 x i8]* @[[FOO]]
+; GCDA: call void @llvm_gcda_emit_function(i32 1, i8* getelementptr inbounds ([4 x i8]* @[[BAZ]]
+
+; GCNO: == foo (0) @
+; GCNO-NOT: == bar ({{[0-9]+}}) @
+; GCNO: == baz (1) @
+
+define void @foo() {
+  ret void, !dbg !12
+}
+
+define void @bar() {
+  ; This function is referenced by the debug info, but no lines have locations.
+  ret void
+}
+
+define void @baz() {
+  ret void, !dbg !13
+}
+
+!llvm.gcov = !{!14}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0 \000\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [function-numbering.ll] [DW_LANG_C99]
+!1 = metadata !{metadata !".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !7, metadata !8}
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", metadata !1, metadata !5, metadata !6, null, void ()* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}    ; [ DW_TAG_file_type ] [/Users/bogner/build/llvm-debug//tmp/foo.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", null, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !"0x2e\00bar\00bar\00\002\000\001\000\000\000\000\002", metadata !1, metadata !5, metadata !6, null, void ()* @bar, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!8 = metadata !{metadata !"0x2e\00baz\00baz\00\003\000\001\000\000\000\000\003", metadata !1, metadata !5, metadata !6, null, void ()* @baz, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 3] [def] [baz]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
+!11 = metadata !{metadata !"clang version 3.6.0 "}
+!12 = metadata !{i32 1, i32 13, metadata !4, null}
+!13 = metadata !{i32 3, i32 13, metadata !8, null}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 722a096..1dff3f0 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -1,11 +1,9 @@
-; RUN: echo '!16 = metadata !{metadata !"%T/global-ctor.ll", metadata !0}' > %t1
+; RUN: echo '!16 = metadata !{metadata !"%/T/global-ctor.ll", metadata !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
 ; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
 ; RUN: rm %T/global-ctor.gcno
 
-; REQUIRES: shell
-
 @x = global i32 0, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
 
@@ -40,19 +38,19 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.gcov = !{!16}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 210217)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0 (trunk 210217)\000\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<stdin>", metadata !"/home/nlewycky"}
 !2 = metadata !{}
 !3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
+!4 = metadata !{metadata !"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\002\001\001\000\006\00256\000\002", metadata !5, metadata !6, metadata !7, null, void ()* @__cxx_global_var_init, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
 !5 = metadata !{metadata !"global-ctor.ll", metadata !"/home/nlewycky"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"", metadata !"", metadata !"_GLOBAL__sub_I_global-ctor.ll", i32 0, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, metadata !2, i32 0} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
-!9 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
+!6 = metadata !{metadata !"0x29", metadata !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !"0x2e\00\00\00_GLOBAL__sub_I_global-ctor.ll\000\001\001\000\006\0064\000\000", metadata !1, metadata !9, metadata !7, null, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!9 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
 !10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !12 = metadata !{metadata !"clang version 3.5.0 (trunk 210217)"}
 !13 = metadata !{i32 2, i32 0, metadata !4, null}
 !14 = metadata !{i32 0, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !5, metadata !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
+!15 = metadata !{metadata !"0xb\000", metadata !5, metadata !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index e2f8324..50e026c 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -1,7 +1,6 @@
-; RUN: sed -e 's@PATTERN@\%T@g' < %s > %t1
+; RUN: sed -e 's|PATTERN|%/T|g' < %s > %t1
 ; RUN: opt -insert-gcov-profiling -disable-output < %t1
 ; RUN: rm %T/linezero.gcno %t1
-; REQUIRES: shell
 
 ; This is a crash test.
 
@@ -20,17 +19,17 @@ entry:
   %__begin = alloca i8*, align 8
   %__end = alloca i8*, align 8
   %spec = alloca i8, align 1
-  call void @llvm.dbg.declare(metadata !{%struct.vector** %__range}, metadata !27), !dbg !30
+  call void @llvm.dbg.declare(metadata !{%struct.vector** %__range}, metadata !27, metadata !{}), !dbg !30
   br label %0
 
 ; <label>:0                                       ; preds = %entry
   call void @_Z13TagFieldSpecsv(), !dbg !31
   store %struct.vector* %ref.tmp, %struct.vector** %__range, align 8, !dbg !31
-  call void @llvm.dbg.declare(metadata !{i8** %__begin}, metadata !32), !dbg !30
+  call void @llvm.dbg.declare(metadata !{i8** %__begin}, metadata !32, metadata !{}), !dbg !30
   %1 = load %struct.vector** %__range, align 8, !dbg !31
   %call = call i8* @_ZN6vector5beginEv(%struct.vector* %1), !dbg !31
   store i8* %call, i8** %__begin, align 8, !dbg !31
-  call void @llvm.dbg.declare(metadata !{i8** %__end}, metadata !33), !dbg !30
+  call void @llvm.dbg.declare(metadata !{i8** %__end}, metadata !33, metadata !{}), !dbg !30
   %2 = load %struct.vector** %__range, align 8, !dbg !31
   %call1 = call i8* @_ZN6vector3endEv(%struct.vector* %2), !dbg !31
   store i8* %call1, i8** %__end, align 8, !dbg !31
@@ -43,7 +42,7 @@ for.cond:                                         ; preds = %for.inc, %0
   br i1 %cmp, label %for.body, label %for.end, !dbg !34
 
 for.body:                                         ; preds = %for.cond
-  call void @llvm.dbg.declare(metadata !{i8* %spec}, metadata !37), !dbg !31
+  call void @llvm.dbg.declare(metadata !{i8* %spec}, metadata !37, metadata !{}), !dbg !31
   %5 = load i8** %__begin, align 8, !dbg !38
   %6 = load i8* %5, align 1, !dbg !38
   store i8 %6, i8* %spec, align 1, !dbg !38
@@ -65,7 +64,7 @@ return:                                           ; No predecessors!
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_Z13TagFieldSpecsv() #2
 
@@ -95,49 +94,49 @@ attributes #3 = { noreturn nounwind }
 !llvm.gcov = !{!25}
 !llvm.ident = !{!26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 209871)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0 (trunk 209871)\000\00\000\00\001", metadata !1, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<stdin>", metadata !"PATTERN"}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"vector", i32 21, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
+!4 = metadata !{metadata !"0x13\00vector\0021\008\008\000\000\000", metadata !5, null, null, metadata !6, null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
 !5 = metadata !{metadata !"linezero.cc", metadata !"PATTERN"}
 !6 = metadata !{metadata !7, metadata !13}
-!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"begin", metadata !"begin", metadata !"_ZN6vector5beginEv", i32 25, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 25} ; [ DW_TAG_subprogram ] [line 25] [begin]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !"0x2e\00begin\00begin\00_ZN6vector5beginEv\0025\000\000\000\006\00256\000\0025", metadata !5, metadata !"_ZTS6vector", metadata !8, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 25] [begin]
+!8 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{metadata !10, metadata !12}
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
-!13 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"end", metadata !"end", metadata !"_ZN6vector3endEv", i32 26, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 26} ; [ DW_TAG_subprogram ] [line 26] [end]
+!10 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{metadata !"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{metadata !"0xf\00\000\0064\0064\000\001088", null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
+!13 = metadata !{metadata !"0x2e\00end\00end\00_ZN6vector3endEv\0026\000\000\000\006\00256\000\0026", metadata !5, metadata !"_ZTS6vector", metadata !8, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 26] [end]
 !14 = metadata !{metadata !15, metadata !20}
-!15 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 50, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4testv, null, null, metadata !2, i32 50} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
-!16 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !"0x2e\00test\00test\00_Z4testv\0050\000\001\000\006\00256\000\0050", metadata !5, metadata !16, metadata !17, null, i32 ()* @_Z4testv, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
+!16 = metadata !{metadata !"0x29", metadata !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
+!17 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!20 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 54, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z2f1v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
-!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = metadata !{metadata !"0x2e\00f1\00f1\00_Z2f1v\0054\000\001\000\006\00256\000\0054", metadata !5, metadata !16, metadata !21, null, void ()* @_Z2f1v, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
+!21 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{null}
 !23 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!24 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!24 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !25 = metadata !{metadata !"PATTERN/linezero.o", metadata !0}
 !26 = metadata !{metadata !"clang version 3.5.0 (trunk 209871)"}
-!27 = metadata !{i32 786688, metadata !28, metadata !"__range", null, i32 0, metadata !29, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__range] [line 0]
-!28 = metadata !{i32 786443, metadata !5, metadata !15, i32 51, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!29 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
+!27 = metadata !{metadata !"0x100\00__range\000\0064", metadata !28, null, metadata !29} ; [ DW_TAG_auto_variable ] [__range] [line 0]
+!28 = metadata !{metadata !"0xb\0051\000\000", metadata !5, metadata !15} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!29 = metadata !{metadata !"0x42\00\000\000\000\000\000", null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
 !30 = metadata !{i32 0, i32 0, metadata !28, null}
 !31 = metadata !{i32 51, i32 0, metadata !28, null}
-!32 = metadata !{i32 786688, metadata !28, metadata !"__begin", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
-!33 = metadata !{i32 786688, metadata !28, metadata !"__end", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__end] [line 0]
+!32 = metadata !{metadata !"0x100\00__begin\000\0064", metadata !28, null, metadata !10} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
+!33 = metadata !{metadata !"0x100\00__end\000\0064", metadata !28, null, metadata !10} ; [ DW_TAG_auto_variable ] [__end] [line 0]
 !34 = metadata !{i32 51, i32 0, metadata !35, null}
-!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 51, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!36 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!37 = metadata !{i32 786688, metadata !28, metadata !"spec", metadata !16, i32 51, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [spec] [line 51]
+!35 = metadata !{metadata !"0xb\0051\000\005", metadata !5, metadata !36} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!36 = metadata !{metadata !"0xb\0051\000\001", metadata !5, metadata !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!37 = metadata !{metadata !"0x100\00spec\0051\000", metadata !28, metadata !16, metadata !11} ; [ DW_TAG_auto_variable ] [spec] [line 51]
 !38 = metadata !{i32 51, i32 0, metadata !39, null}
-!39 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!39 = metadata !{metadata !"0xb\0051\000\002", metadata !5, metadata !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
 !40 = metadata !{i32 51, i32 0, metadata !41, null}
-!41 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!41 = metadata !{metadata !"0xb\0051\000\004", metadata !5, metadata !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
 !42 = metadata !{i32 51, i32 0, metadata !43, null}
-!43 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!43 = metadata !{metadata !"0xb\0051\000\003", metadata !5, metadata !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
 !44 = metadata !{i32 52, i32 0, metadata !15, null}
 !45 = metadata !{i32 54, i32 0, metadata !20, null}
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index ed3a5bd..04281b2 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -1,11 +1,9 @@
-; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1
+; RUN: echo '!9 = metadata !{metadata !"%/T/linkagename.ll", metadata !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
 ; RUN: grep _Z3foov %T/linkagename.gcno
 ; RUN: rm %T/linkagename.gcno
 
-; REQUIRES: shell
-
 define void @_Z3foov() {
 entry:
   ret void, !dbg !8
@@ -15,15 +13,15 @@ entry:
 !llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
-!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.3 (trunk 177323)\000\00\000\00\000", metadata !2, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"0x29", metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
 !2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !"0x2e\00foo\00foo\00_Z3foov\001\000\001\000\006\00256\000\001", metadata !1, metadata !1, metadata !6, null, void ()* @_Z3foov, null, null, metadata !3} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 
 
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 04f3f99..1af684e 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -1,32 +1,30 @@
-; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
+; RUN: echo '!9 = metadata !{metadata !"%/T/version.ll", metadata !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
-; RUN: head -c8 %T/version.gcno | grep '^oncg\*204'
+; RUN: head -c8 %T/version.gcno | grep '^oncg.204'
 ; RUN: rm %T/version.gcno
 ; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
 ; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
-; RUN: head -c8 %T/version.gcno | grep '^oncg\*704'
+; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
 ; RUN: rm %T/version.gcno
 
 define void @test() {
   ret void, !dbg !8
 }
 
-; REQUIRES: shell
-
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, metadata !11, i32 4, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
-!2 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.3 (trunk 176994)\000\00\000\00\000", metadata !11, metadata !3, metadata !3, metadata !4, metadata !3, null} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
+!2 = metadata !{metadata !"0x29", metadata !11} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !10, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
-!6 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\000\001", metadata !10, metadata !6, metadata !7, null, void ()* @test, null, null, metadata !3} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!6 = metadata !{metadata !"0x29", metadata !10} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !3, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 ;; !9 is added through the echo line at the top.
 !10 = metadata !{metadata !"<stdin>", metadata !"."}
 !11 = metadata !{metadata !"version", metadata !"/usr/local/google/home/nlewycky"}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index fd31fce..fdf17e0 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -17,20 +17,20 @@ target triple = "i386-pc-linux-gnu"
 	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
 	%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
 
 declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
 
diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll
index 094e22b..8c13d20 100644
--- a/test/Transforms/GVN/atomic.ll
+++ b/test/Transforms/GVN/atomic.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; GVN across unordered store (allowed)
 define i32 @test1() nounwind uwtable ssp {
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
@@ -18,10 +18,10 @@ entry:
   ret i32 %z
 }
 
-; GVN across seq_cst store (allowed in theory; not implemented ATM)
+; GVN across seq_cst store (allowed)
 define i32 @test2() nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: add i32 %x, %y
+; CHECK-LABEL: test2
+; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
   store atomic i32 %x, i32* @x seq_cst, align 4
@@ -32,7 +32,7 @@ entry:
 
 ; GVN across unordered load (allowed)
 define i32 @test3() nounwind uwtable ssp {
-; CHECK: test3
+; CHECK-LABEL: test3
 ; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
@@ -43,11 +43,11 @@ entry:
   ret i32 %b
 }
 
-; GVN across acquire load (load after atomic load must not be removed)
+; GVN across acquire load (allowed as the original load was not atomic)
 define i32 @test4() nounwind uwtable ssp {
-; CHECK: test4
+; CHECK-LABEL: test4
 ; CHECK: load atomic i32* @x
-; CHECK: load i32* @y
+; CHECK-NOT: load i32* @y
 entry:
   %x = load i32* @y
   %y = load atomic i32* @x seq_cst, align 4
@@ -59,7 +59,7 @@ entry:
 
 ; GVN load to unordered load (allowed)
 define i32 @test5() nounwind uwtable ssp {
-; CHECK: test5
+; CHECK-LABEL: test5
 ; CHECK: add i32 %x, %x
 entry:
   %x = load atomic i32* @x unordered, align 4
@@ -70,7 +70,7 @@ entry:
 
 ; GVN unordered load to load (unordered load must not be removed)
 define i32 @test6() nounwind uwtable ssp {
-; CHECK: test6
+; CHECK-LABEL: test6
 ; CHECK: load atomic i32* @x unordered
 entry:
   %x = load i32* @x
@@ -78,3 +78,54 @@ entry:
   %x3 = add i32 %x, %x2
   ret i32 %x3
 }
+
+; GVN across release-acquire pair (forbidden)
+define i32 @test7() nounwind uwtable ssp {
+; CHECK-LABEL: test7
+; CHECK: add i32 %x, %y
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x release, align 4
+  %w = load atomic i32* @x acquire, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across acquire-release pair (allowed)
+define i32 @test8() nounwind uwtable ssp {
+; CHECK-LABEL: test8
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  %w = load atomic i32* @x acquire, align 4
+  store atomic i32 %x, i32* @x release, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across monotonic store (allowed)
+define i32 @test9() nounwind uwtable ssp {
+; CHECK-LABEL: test9
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x monotonic, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN of an unordered across monotonic load (not allowed)
+define i32 @test10() nounwind uwtable ssp {
+; CHECK-LABEL: test10
+; CHECK: add i32 %x, %y
+entry:
+  %x = load atomic i32* @y unordered, align 4
+  %clobber = load atomic i32* @x monotonic, align 4
+  %y = load atomic i32* @y monotonic, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
diff --git a/test/Transforms/GVN/noalias.ll b/test/Transforms/GVN/noalias.ll
new file mode 100644
index 0000000..a774f38
--- /dev/null
+++ b/test/Transforms/GVN/noalias.ll
@@ -0,0 +1,43 @@
+; RUN: opt -scoped-noalias -basicaa -gvn -S < %s | FileCheck %s
+
+define i32 @test1(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1(i32* %p, i32* %q)
+; CHECK: load i32* %p
+; CHECK-NOT: noalias
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !noalias !0
+  %b = load i32* %p
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test2(i32* %p, i32* %q) {
+; CHECK-LABEL: @test2(i32* %p, i32* %q)
+; CHECK: load i32* %p, !alias.scope !0
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !alias.scope !0
+  %b = load i32* %p, !alias.scope !0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; FIXME: In this case we can do better than intersecting the scopes, and can
+; concatenate them instead. Both loads are in the same basic block, the first
+; makes the second safe to speculatively execute, and there are no calls that may
+; throw in between.
+define i32 @test3(i32* %p, i32* %q) {
+; CHECK-LABEL: @test3(i32* %p, i32* %q)
+; CHECK: load i32* %p, !alias.scope !1
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !alias.scope !1
+  %b = load i32* %p, !alias.scope !2
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare i32 @foo(i32*) readonly
+
+!0 = metadata !{metadata !0}
+!1 = metadata !{metadata !1}
+!2 = metadata !{metadata !0, metadata !1}
+
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 8d289b0..6aac93e 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -318,6 +318,19 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
 ; CHECK: ret i8
 }
 
+define i8 @coerce_offset0_addrspacecast(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+
+  %P2 = addrspacecast i32* %P to i8 addrspace(1)*
+  %P3 = getelementptr i8 addrspace(1)* %P2, i32 2
+
+  %A = load i8 addrspace(1)* %P3
+  ret i8 %A
+; CHECK-LABEL: @coerce_offset0_addrspacecast(
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
   %P2 = bitcast i32* %P to float*
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 0bdced5..584f0bf 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -5,14 +5,14 @@
 @A = global i32 0
 ; CHECK: @A = global i32 0
 
-@D = alias internal i32* @A
+@D = internal alias i32* @A
 ; DEAD-NOT: @D
 
 @L1 = alias i32* @A
 ; CHECK: @L1 = alias i32* @A
 
-@L2 = alias internal i32* @L1
-; CHECK: @L2 = alias internal i32* @L1
+@L2 = internal alias i32* @L1
+; CHECK: @L2 = internal alias i32* @L1
 
 @L3 = alias i32* @L2
 ; CHECK: @L3 = alias i32* @L2
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 68933c6..5fb4444 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
 ; RUN: opt < %s -globaldce
 
-@A = alias internal void ()* @F
+@A = internal alias void ()* @F
 define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/deadblockaddr.ll b/test/Transforms/GlobalDCE/deadblockaddr.ll
new file mode 100644
index 0000000..1ec5994
--- /dev/null
+++ b/test/Transforms/GlobalDCE/deadblockaddr.ll
@@ -0,0 +1,16 @@
+; RUN: opt -globaldce -simplifycfg -S < %s | FileCheck %s
+
+; Tests whether globaldce does the right cleanup while removing @bar
+; so that a dead BlockAddress reference to foo won't prevent other passes
+; to work properly, e.g. simplifycfg
+@bar = internal unnamed_addr constant i8* blockaddress(@foo, %L1)
+
+; CHECK-LABEL: foo
+; CHECK-NOT: br label %L1
+; CHECK: ret void
+define void @foo() {
+entry:
+  br label %L1
+L1:
+  ret void
+}
diff --git a/test/Transforms/GlobalDCE/pr20981.ll b/test/Transforms/GlobalDCE/pr20981.ll
new file mode 100644
index 0000000..92d2840
--- /dev/null
+++ b/test/Transforms/GlobalDCE/pr20981.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+
+$c1 = comdat any
+; CHECK: $c1 = comdat any
+
+@a1 = linkonce_odr alias void ()* @f1
+; CHECK: @a1 = linkonce_odr alias void ()* @f1
+
+define linkonce_odr void @f1() comdat $c1 {
+  ret void
+}
+; CHECK: define linkonce_odr void @f1() comdat $c1
+
+define void @g() {
+  call void @f1()
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index b98faca..8efd018 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -13,7 +13,7 @@ define void @g() {
 	ret void
 }
 
-@b = alias internal void ()* @g
+@b = internal alias  void ()* @g
 ; CHECK-NOT: @b
 
 define void @h() {
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 0108960..0513829 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -6,14 +6,14 @@
 define i32 @foo(i32 %i) nounwind ssp {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !3)
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !3, metadata !{})
   %0 = icmp eq i32 %i, 1, !dbg !7                 ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !7
 
 bb:                                               ; preds = %entry
   store i32 0, i32* @Stop, align 4, !dbg !9
   %1 = mul nsw i32 %i, 42, !dbg !10               ; <i32> [#uses=1]
-  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !3), !dbg !10
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !3, metadata !{}), !dbg !10
   br label %bb2, !dbg !10
 
 bb1:                                              ; preds = %entry
@@ -28,7 +28,7 @@ return:                                           ; preds = %bb2
   ret i32 %i_addr.0, !dbg !12
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @bar() nounwind ssp {
 entry:
@@ -51,27 +51,27 @@ return:                                           ; preds = %bb2
   ret i32 %.0, !dbg !19
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.gv = !{!0}
 
-!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"Stop", metadata !"Stop", metadata !"", metadata !1, i32 2, metadata !2, i1 true, i1 true, i32* @Stop} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 458769, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
-!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x34\00Stop\00Stop\00\002\001\001", metadata !1, metadata !1, metadata !2, i32* @Stop} ; [ DW_TAG_variable ]
+!1 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", metadata !20, metadata !21, metadata !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !1} ; [ DW_TAG_base_type ]
+!3 = metadata !{metadata !"0x101\00i\004\000", metadata !4, metadata !1, metadata !2} ; [ DW_TAG_arg_variable ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00foo\004\000\001\000\006\000\000\000", i32 0, metadata !1, metadata !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!5 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !1, null, null, metadata !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !2, metadata !2}
 !7 = metadata !{i32 5, i32 0, metadata !8, null}
-!8 = metadata !{i32 458763, metadata !20, metadata !4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{metadata !"0xb\000\000\000", metadata !20, metadata !4} ; [ DW_TAG_lexical_block ]
 !9 = metadata !{i32 6, i32 0, metadata !8, null}
 !10 = metadata !{i32 7, i32 0, metadata !8, null}
 !11 = metadata !{i32 9, i32 0, metadata !8, null}
 !12 = metadata !{i32 11, i32 0, metadata !8, null}
 !13 = metadata !{i32 14, i32 0, metadata !14, null}
-!14 = metadata !{i32 458763, metadata !20, metadata !15, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{metadata !"0xb\000\000\000", metadata !20, metadata !15} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !"0x2e\00bar\00bar\00bar\0013\000\001\000\006\000\000\000", i32 0, metadata !1, metadata !16, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!16 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !1, null, null, metadata !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{metadata !2}
 !18 = metadata !{i32 15, i32 0, metadata !14, null}
 !19 = metadata !{i32 16, i32 0, metadata !14, null}
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 9d70c70..ebc20c6 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -9,12 +9,12 @@
 @bar1  = alias void ()* @bar2
 ; CHECK: @bar1 = alias void ()* @bar2
 
-@weak1 = alias weak void ()* @bar2
-; CHECK: @weak1 = alias weak void ()* @bar2
+@weak1 = weak alias void ()* @bar2
+; CHECK: @weak1 = weak alias void ()* @bar2
 
 @bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
-@foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
-; CHECK: @foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+@foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
 
 define void @bar2() {
   ret void
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 633cd34..62e74ba 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -7,7 +7,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @i = internal addrspace(1) global i8 42
 
 ; CHECK: @ia = internal addrspace(1) global i8 42
-@ia = alias internal i8 addrspace(1)* @i
+@ia = internal alias i8 addrspace(1)* @i
 
 @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
 ; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
@@ -18,8 +18,8 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
 ; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
 
-@ca = alias internal i8 addrspace(1)* @c
-; CHECK: @ca = alias internal i8 addrspace(1)* @c
+@ca = internal alias i8 addrspace(1)* @c
+; CHECK: @ca = internal alias i8 addrspace(1)* @c
 
 define i8 addrspace(1)* @h() {
   ret i8 addrspace(1)* @ca
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 1217937..4dab2f5 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -globalopt < %s | FileCheck %s
 
 @_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = dllexport alias internal i8* @_Z17in_custom_section
+@in_custom_section = internal dllexport alias i8* @_Z17in_custom_section
 
 ; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
 
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 05ac7f9..21f06b7 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -4,7 +4,7 @@
 
 @i = internal global i8 42
 ; CHECK: @ia = internal global i8 42
-@ia = alias internal i8* @i
+@ia = internal alias i8* @i
 
 @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
 ; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
@@ -18,17 +18,17 @@
 @other = global i32* bitcast (void ()* @fa to i32*)
 ; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
 
-@fa = alias internal void ()* @f
-; CHECK: @fa = alias internal void ()* @f
+@fa = internal alias void ()* @f
+; CHECK: @fa = internal alias void ()* @f
 
-@fa2 = alias internal void ()* @f
+@fa2 = internal alias void ()* @f
 ; CHECK-NOT: @fa2
 
-@fa3 = alias internal void ()* @f
+@fa3 = internal alias void ()* @f
 ; CHECK: @fa3
 
-@ca = alias internal i8* @c
-; CHECK: @ca = alias internal i8* @c
+@ca = internal alias i8* @c
+; CHECK: @ca = internal alias i8* @c
 
 define void @f() {
   ret void
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 4a25d66..36de19c 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -81,10 +81,23 @@ entry:
   ret void
 }
 
+@test6_v1 = internal global { i32, i32 } { i32 42, i32 0 }, align 8
+@test6_v2 = global i32 0, align 4
+; CHECK: @test6_v2 = global i32 42, align 4
+define internal void @test6() {
+  %load = load { i32, i32 }* @test6_v1, align 8
+  %xv0 = extractvalue { i32, i32 } %load, 0
+  %iv = insertvalue { i32, i32 } %load, i32 %xv0, 1
+  %xv1 = extractvalue { i32, i32 } %iv, 1
+  store i32 %xv1, i32* @test6_v2, align 4
+  ret void
+}
+
 @llvm.global_ctors = appending constant
-  [5 x { i32, void ()* }]
+  [6 x { i32, void ()* }]
   [{ i32, void ()* } { i32 65535, void ()* @test1 },
    { i32, void ()* } { i32 65535, void ()* @test2 },
    { i32, void ()* } { i32 65535, void ()* @test3 },
    { i32, void ()* } { i32 65535, void ()* @test4 },
-   { i32, void ()* } { i32 65535, void ()* @test5 }]
+   { i32, void ()* } { i32 65535, void ()* @test5 },
+   { i32, void ()* } { i32 65535, void ()* @test6 }]
diff --git a/test/Transforms/GlobalOpt/pr21191.ll b/test/Transforms/GlobalOpt/pr21191.ll
new file mode 100644
index 0000000..39b8eee
--- /dev/null
+++ b/test/Transforms/GlobalOpt/pr21191.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+$c = comdat any
+; CHECK: $c = comdat any
+
+define linkonce_odr void @foo() comdat $c {
+  ret void
+}
+; CHECK: define linkonce_odr void @foo() comdat $c
+
+define linkonce_odr void @bar() comdat $c {
+  ret void
+}
+; CHECK: define linkonce_odr void @bar() comdat $c
+
+define void @zed()  {
+  call void @foo()
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/preserve-comdats.ll b/test/Transforms/GlobalOpt/preserve-comdats.ll
new file mode 100644
index 0000000..08188b9
--- /dev/null
+++ b/test/Transforms/GlobalOpt/preserve-comdats.ll
@@ -0,0 +1,37 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+$comdat_global = comdat any
+
+@comdat_global = weak_odr global i8 0, comdat $comdat_global
+@simple_global = internal global i8 0
+; CHECK: @comdat_global = weak_odr global i8 0, comdat $comdat_global
+; CHECK: @simple_global = internal global i8 42
+
+@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [
+    { i32, void ()*, i8* } { i32 65535, void ()* @init_comdat_global, i8* @comdat_global },
+    { i32, void ()*, i8* } { i32 65535, void ()* @init_simple_global, i8* null }
+]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }]
+; CHECK: [{ i32, void ()*, i8* } { i32 65535, void ()* @init_comdat_global, i8* @comdat_global }]
+
+define void @init_comdat_global() {
+  store i8 42, i8* @comdat_global
+  ret void
+}
+; CHECK: define void @init_comdat_global()
+
+define internal void @init_simple_global() comdat $comdat_global {
+  store i8 42, i8* @simple_global
+  ret void
+}
+; CHECK-NOT: @init_simple_global()
+
+define i8* @use_simple() {
+  ret i8* @simple_global
+}
+; CHECK: define i8* @use_simple()
+
+define i8* @use_comdat() {
+  ret i8* @comdat_global
+}
+; CHECK: define i8* @use_comdat()
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index af9f1b3..64fef10 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
-; add219 should be extended to i64 because it is nsw, even though its
+; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -18,13 +18,26 @@ for.body153:                                      ; preds = %for.body153, %for.b
   br i1 undef, label %for.body170, label %for.body153
 
 ; CHECK: add nsw i64 %indvars.iv, 1
+; CHECK: sub nsw i64 %indvars.iv, 2
+; CHECK: sub nsw i64 4, %indvars.iv
+; CHECK: mul nsw i64 %indvars.iv, 8
 for.body170:                                      ; preds = %for.body170, %for.body153
   %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
-  %add219 = add nsw i32 %i2.19, 1
-  %idxprom220 = sext i32 %add219 to i64
+
+  %add = add nsw i32 %i2.19, 1
+  %add.idxprom = sext i32 %add to i64
+
+  %sub = sub nsw i32 %i2.19, 2
+  %sub.idxprom = sext i32 %sub to i64
+
+  %sub.neg = sub nsw i32 4, %i2.19
+  %sub.neg.idxprom = sext i32 %sub.neg to i64
+
+  %mul = mul nsw i32 %i2.19, 8
+  %mul.idxprom = sext i32 %mul to i64
+
   %add249 = add nsw i32 %i2.19, %shl132
   br label %for.body170
-
 for.end285:                                       ; preds = %entry
   ret void
 }
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
index a8020e6..e462712 100644
--- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin"
 
 ; CHECK-LABEL: @test(
 ; CHECK: if.end.i126:
-; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+; CHECK: %exitcond = icmp ne i8* %destYPixelPtr.010.i, getelementptr (i8* null, i32 undef)
 define void @test() nounwind {
 entry:
   br label %while.cond
diff --git a/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
new file mode 100644
index 0000000..2cb98eb
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
new file mode 100644
index 0000000..8744b19
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target triple = "nvptx64-unknown-unknown"
+
+; For the nvptx64 architecture, the cost of an arithmetic instruction on a
+; 64-bit integer is twice as expensive as that on a 32-bit integer, because the
+; hardware needs to simulate a 64-bit integer using two 32-bit integers.
+; Therefore, in this particular architecture, we should not widen induction
+; variables to 64-bit integers even though i64 is a legal type in the 64-bit
+; PTX ISA.
+
+define void @indvar_32_bit(i32 %n, i32* nocapture %output) {
+; CHECK-LABEL: @indvar_32_bit
+entry:
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ]
+; CHECK: phi i32
+  %mul = mul nsw i32 %i.06, %i.06
+  %0 = sext i32 %i.06 to i64
+  %arrayidx = getelementptr inbounds i32* %output, i64 %0
+  store i32 %mul, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.06, 3
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
index e4c31d1..9e55a17 100644
--- a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@ entry:
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@ entry:
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(3)*
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index 4736f85..f12c68c 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -21,7 +21,7 @@ for.end:                                          ; preds = %for.body
 
 ; Check that post-incrementing the backedge taken count does not overflow.
 ; CHECK-LABEL: @postinc(
-; CHECK: icmp eq i32 %indvars.iv.next, 256
+; CHECK: icmp eq i32 %indvars.iv, 255
 define i32 @postinc() #0 {
 entry:
   br label %do.body
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 1fdcdd1..efb96bd 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -82,15 +82,23 @@ exit:
 ; Perform LFTR without generating extra preheader code.
 define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
                          i32 %irow, i32 %ilead) nounwind {
-; CHECK: entry:
-; CHECK-NOT: zext
-; CHECK-NOT: add
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: phi i64
+; CHECK-LABEL: @guardedloop(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i32 1, %irow
+; CHECK-NEXT: br i1 %[[cmp]], label %[[loop_preheader:.*]], label %[[return:.*]]
+
+; CHECK: [[loop_preheader]]:
+; CHECK-NEXT: %[[sext:.*]] = sext i32 %ilead to i64
+; CHECK-NEXT: %[[add:.*]] = add i32 %irow, -1
+; CHECK-NEXT: br label %[[loop:.*]]
+
+; CHECK: [[loop]]:
+; CHECK-NEXT: %[[indvars_iv2:.*]] = phi i64
+; CHECK-NEXT: phi i64
 ; CHECK-NOT: phi
-; CHECK: icmp ne
-; CHECK: br i1
+; CHECK: %[[lftr_wideiv:.*]] = trunc i64 %[[indvars_iv2]] to i32
+; CHECK-NEXT: %[[exitcond:.*]] = icmp ne i32 %[[lftr_wideiv]], %[[add]]
+; CHECK-NEXT: br i1 %[[exitcond]], label %[[loop]], label
 entry:
   %cmp = icmp slt i32 1, %irow
   br i1 %cmp, label %loop, label %return
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 0576692..a7023f2 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -229,10 +229,11 @@ entry:
 ; loop and the OR instruction is replaced by an ADD keeping the result
 ; equivalent.
 ;
+; CHECK: sext
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: icmp slt i32
+; CHECK: icmp slt i64
 ; CHECK: exit:
 ; CHECK: add i64
 loop:
diff --git a/test/Transforms/IndVarSimplify/pr20680.ll b/test/Transforms/IndVarSimplify/pr20680.ll
new file mode 100644
index 0000000..88a7fd7
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr20680.ll
@@ -0,0 +1,219 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry:
+; CHECK: br label %[[for_cond2_preheader:.*]]
+
+; CHECK: [[for_cond2_preheader]]:
+; CHECK-NEXT: %[[indvars_iv:.*]] = phi i32 [ %[[indvars_iv_next:.*]], %[[for_inc13:.*]] ], [ -14, %entry ]
+; br i1 {{.*}}, label %[[for_inc13]], label %
+entry:
+  %0 = load i32* @a, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool = icmp eq i32 %1, 0
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.inc13, %entry
+  %storemerge15 = phi i8 [ -14, %entry ], [ %inc14, %for.inc13 ]
+  br i1 %tobool2, label %for.inc13, label %for.body3.lr.ph
+
+for.body3.lr.ph:                                  ; preds = %for.cond2.preheader
+  %tobool5 = icmp eq i8 %storemerge15, 0
+  %conv7 = sext i8 %storemerge15 to i32
+  %2 = add nsw i32 %conv7, 1
+  %3 = icmp ult i32 %2, 3
+  %div = select i1 %3, i32 %conv7, i32 0
+  br i1 %tobool5, label %for.body3.lr.ph.split.us, label %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+
+for.body3.lr.ph.for.body3.lr.ph.split_crit_edge:  ; preds = %for.body3.lr.ph
+  br label %for.body3.lr.ph.split
+
+for.body3.lr.ph.split.us:                         ; preds = %for.body3.lr.ph
+  br i1 %tobool, label %for.body3.lr.ph.split.us.split.us, label %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+
+for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge: ; preds = %for.body3.lr.ph.split.us
+  br label %for.body3.lr.ph.split.us.split
+
+for.body3.lr.ph.split.us.split.us:                ; preds = %for.body3.lr.ph.split.us
+  br label %for.body3.us.us
+
+for.body3.us.us:                                  ; preds = %for.cond2.loopexit.us.us, %for.body3.lr.ph.split.us.split.us
+  br i1 true, label %cond.false.us.us, label %cond.end.us.us
+
+cond.false.us.us:                                 ; preds = %for.body3.us.us
+  br label %cond.end.us.us
+
+cond.end.us.us:                                   ; preds = %cond.false.us.us, %for.body3.us.us
+  %cond.us.us = phi i32 [ %div, %cond.false.us.us ], [ %conv7, %for.body3.us.us ]
+  %4 = load i32* @b, align 4
+  %cmp91.us.us = icmp slt i32 %4, 1
+  br i1 %cmp91.us.us, label %for.inc.lr.ph.us.us, label %for.cond2.loopexit.us.us
+
+for.cond2.loopexit.us.us:                         ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us.us, %cond.end.us.us
+  br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us, label %for.body3.us.us
+
+for.inc.lr.ph.us.us:                              ; preds = %cond.end.us.us
+  br label %for.inc.us.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us.us:     ; preds = %for.inc.us.us
+  %inc.lcssa.us.us = phi i32 [ %inc.us.us, %for.inc.us.us ]
+  store i32 %inc.lcssa.us.us, i32* @b, align 4
+  br label %for.cond2.loopexit.us.us
+
+for.inc.us.us:                                    ; preds = %for.inc.us.us, %for.inc.lr.ph.us.us
+  %5 = phi i32 [ %4, %for.inc.lr.ph.us.us ], [ %inc.us.us, %for.inc.us.us ]
+  %inc.us.us = add nsw i32 %5, 1
+  %cmp9.us.us = icmp slt i32 %inc.us.us, 1
+  br i1 %cmp9.us.us, label %for.inc.us.us, label %for.cond8.for.cond2.loopexit_crit_edge.us.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us: ; preds = %for.cond2.loopexit.us.us
+  %cond.lcssa.ph.us.ph.us = phi i32 [ %cond.us.us, %for.cond2.loopexit.us.us ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.body3.lr.ph.split.us.split:                   ; preds = %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+  br label %for.body3.us
+
+for.body3.us:                                     ; preds = %for.cond2.loopexit.us, %for.body3.lr.ph.split.us.split
+  br i1 true, label %cond.false.us, label %cond.end.us
+
+cond.false.us:                                    ; preds = %for.body3.us
+  br label %cond.end.us
+
+cond.end.us:                                      ; preds = %cond.false.us, %for.body3.us
+  %cond.us = phi i32 [ %div, %cond.false.us ], [ %conv7, %for.body3.us ]
+  %6 = load i32* @b, align 4
+  %cmp91.us = icmp slt i32 %6, 1
+  br i1 %cmp91.us, label %for.inc.lr.ph.us, label %for.cond2.loopexit.us
+
+for.inc.us:                                       ; preds = %for.inc.lr.ph.us, %for.inc.us
+  %7 = phi i32 [ %6, %for.inc.lr.ph.us ], [ %inc.us, %for.inc.us ]
+  %inc.us = add nsw i32 %7, 1
+  %cmp9.us = icmp slt i32 %inc.us, 1
+  br i1 %cmp9.us, label %for.inc.us, label %for.cond8.for.cond2.loopexit_crit_edge.us
+
+for.cond2.loopexit.us:                            ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us, %cond.end.us
+  br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, label %for.body3.us
+
+for.inc.lr.ph.us:                                 ; preds = %cond.end.us
+  br label %for.inc.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us:        ; preds = %for.inc.us
+  %inc.lcssa.us = phi i32 [ %inc.us, %for.inc.us ]
+  store i32 %inc.lcssa.us, i32* @b, align 4
+  br label %for.cond2.loopexit.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa: ; preds = %for.cond2.loopexit.us
+  %cond.lcssa.ph.us.ph = phi i32 [ %cond.us, %for.cond2.loopexit.us ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us:        ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us
+  %cond.lcssa.ph.us = phi i32 [ %cond.lcssa.ph.us.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa ], [ %cond.lcssa.ph.us.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us ]
+  br label %for.cond2.for.inc13_crit_edge
+
+for.body3.lr.ph.split:                            ; preds = %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+  br i1 %tobool, label %for.body3.lr.ph.split.split.us, label %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+
+for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge: ; preds = %for.body3.lr.ph.split
+  br label %for.body3.lr.ph.split.split
+
+for.body3.lr.ph.split.split.us:                   ; preds = %for.body3.lr.ph.split
+  br label %for.body3.us3
+
+for.body3.us3:                                    ; preds = %for.cond2.loopexit.us11, %for.body3.lr.ph.split.split.us
+  br i1 false, label %cond.false.us4, label %cond.end.us5
+
+cond.false.us4:                                   ; preds = %for.body3.us3
+  br label %cond.end.us5
+
+cond.end.us5:                                     ; preds = %cond.false.us4, %for.body3.us3
+  %cond.us6 = phi i32 [ %div, %cond.false.us4 ], [ %conv7, %for.body3.us3 ]
+  %8 = load i32* @b, align 4
+  %cmp91.us7 = icmp slt i32 %8, 1
+  br i1 %cmp91.us7, label %for.inc.lr.ph.us12, label %for.cond2.loopexit.us11
+
+for.inc.us8:                                      ; preds = %for.inc.lr.ph.us12, %for.inc.us8
+  %9 = phi i32 [ %8, %for.inc.lr.ph.us12 ], [ %inc.us9, %for.inc.us8 ]
+  %inc.us9 = add nsw i32 %9, 1
+  %cmp9.us10 = icmp slt i32 %inc.us9, 1
+  br i1 %cmp9.us10, label %for.inc.us8, label %for.cond8.for.cond2.loopexit_crit_edge.us13
+
+for.cond2.loopexit.us11:                          ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us13, %cond.end.us5
+  br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us, label %for.body3.us3
+
+for.inc.lr.ph.us12:                               ; preds = %cond.end.us5
+  br label %for.inc.us8
+
+for.cond8.for.cond2.loopexit_crit_edge.us13:      ; preds = %for.inc.us8
+  %inc.lcssa.us14 = phi i32 [ %inc.us9, %for.inc.us8 ]
+  store i32 %inc.lcssa.us14, i32* @b, align 4
+  br label %for.cond2.loopexit.us11
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us: ; preds = %for.cond2.loopexit.us11
+  %cond.lcssa.ph.ph.us = phi i32 [ %cond.us6, %for.cond2.loopexit.us11 ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.body3.lr.ph.split.split:                      ; preds = %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+  br label %for.body3
+
+for.cond8.for.cond2.loopexit_crit_edge:           ; preds = %for.inc
+  %inc.lcssa = phi i32 [ %inc, %for.inc ]
+  store i32 %inc.lcssa, i32* @b, align 4
+  br label %for.cond2.loopexit
+
+for.cond2.loopexit:                               ; preds = %cond.end, %for.cond8.for.cond2.loopexit_crit_edge
+  br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, label %for.body3
+
+for.body3:                                        ; preds = %for.cond2.loopexit, %for.body3.lr.ph.split.split
+  br i1 false, label %cond.false, label %cond.end
+
+cond.false:                                       ; preds = %for.body3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %for.body3
+  %cond = phi i32 [ %div, %cond.false ], [ %conv7, %for.body3 ]
+  %10 = load i32* @b, align 4
+  %cmp91 = icmp slt i32 %10, 1
+  br i1 %cmp91, label %for.inc.lr.ph, label %for.cond2.loopexit
+
+for.inc.lr.ph:                                    ; preds = %cond.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.inc.lr.ph
+  %11 = phi i32 [ %10, %for.inc.lr.ph ], [ %inc, %for.inc ]
+  %inc = add nsw i32 %11, 1
+  %cmp9 = icmp slt i32 %inc, 1
+  br i1 %cmp9, label %for.inc, label %for.cond8.for.cond2.loopexit_crit_edge
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa:  ; preds = %for.cond2.loopexit
+  %cond.lcssa.ph.ph = phi i32 [ %cond, %for.cond2.loopexit ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.cond2.for.inc13_crit_edge.us-lcssa:           ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us
+  %cond.lcssa.ph = phi i32 [ %cond.lcssa.ph.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa ], [ %cond.lcssa.ph.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us ]
+  br label %for.cond2.for.inc13_crit_edge
+
+for.cond2.for.inc13_crit_edge:                    ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us
+  %cond.lcssa = phi i32 [ %cond.lcssa.ph, %for.cond2.for.inc13_crit_edge.us-lcssa ], [ %cond.lcssa.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us ]
+  store i32 %cond.lcssa, i32* @c, align 4
+  br label %for.inc13
+
+; CHECK: [[for_inc13]]:
+; CHECK-NEXT: %[[indvars_iv_next]] = add nuw nsw i32 %[[indvars_iv]], 1
+; CHECK-NEXT: %[[exitcond4:.*]] = icmp ne i32 %[[indvars_iv]], -1
+; CHECK-NEXT: br i1 %[[exitcond4]], label %[[for_cond2_preheader]], label %[[for_end15:.*]]
+for.inc13:                                        ; preds = %for.cond2.for.inc13_crit_edge, %for.cond2.preheader
+  %inc14 = add i8 %storemerge15, 1
+  %cmp = icmp ugt i8 %inc14, 50
+  br i1 %cmp, label %for.cond2.preheader, label %for.end15
+
+; CHECK: [[for_end15]]:
+; CHECK-NEXT: ret void
+for.end15:                                        ; preds = %for.inc13
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/sharpen-range.ll b/test/Transforms/IndVarSimplify/sharpen-range.ll
new file mode 100644
index 0000000..6a9d352
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/sharpen-range.ll
@@ -0,0 +1,113 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if llvm can narrow !range metadata based on loop entry
+;; predicates.
+
+declare void @abort()
+
+define i1 @bounded_below_slt(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_slt
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+define i1 @bounded_below_sle(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_sle
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp sle i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp sle i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+;; Assert that we're not making an incorrect transform.
+
+declare i32 @check(i8*)
+
+define void @NoChange() {
+; CHECK-LABEL: NoChange
+entry:
+  br label %loop.begin
+
+loop.begin:
+; CHECK: loop.begin:
+  %i.01 = phi i64 [ 2, %entry ], [ %add, %loop.end ]
+  %cmp = icmp ugt i64 %i.01, 1
+; CHECK: %cmp = icmp ugt i64 %i.01, 1
+  br i1 %cmp, label %loop, label %loop.end
+
+loop:
+; CHECK: loop
+  %.sum = add i64 %i.01, -2
+  %v = getelementptr inbounds i8* null, i64 %.sum
+  %r = tail call i32 @check(i8* %v)
+  %c = icmp eq i32 %r, 0
+  br i1 %c, label %loop.end, label %abort.now
+
+abort.now:
+  tail call void @abort()
+  unreachable
+
+loop.end:
+  %add = add i64 %i.01, -1
+  %eq = icmp eq i64 %add, 0
+  br i1 %eq, label %exit, label %loop.begin
+
+exit:
+  ret void
+}
+
+!0 = metadata !{i32 0, i32 100}
diff --git a/test/Transforms/IndVarSimplify/use-range-metadata.ll b/test/Transforms/IndVarSimplify/use-range-metadata.ll
new file mode 100644
index 0000000..7ac4f11
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/use-range-metadata.ll
@@ -0,0 +1,37 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if IndVarSimplify understands !range metadata.
+
+declare void @abort()
+
+define i1 @iterate(i32* nocapture readonly %buffer) {
+entry:
+  %length = load i32* %buffer, !range !0
+  br label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+!0 = metadata !{i32 1, i32 100}
diff --git a/test/Transforms/IndVarSimplify/verify-scev.ll b/test/Transforms/IndVarSimplify/verify-scev.ll
index 019f583..b9ce3d6 100644
--- a/test/Transforms/IndVarSimplify/verify-scev.ll
+++ b/test/Transforms/IndVarSimplify/verify-scev.ll
@@ -380,11 +380,11 @@ for.body48:                                       ; preds = %for.inc221, %for.bo
 
 for.body65.lr.ph:                                 ; preds = %for.body48
   %0 = load i32* undef, align 4
+  %1 = sext i32 %0 to i64
   br label %for.body65.us
 
 for.body65.us:                                    ; preds = %for.inc219.us, %for.body65.lr.ph
-  %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
-  %idxprom66.us = sext i32 %k.09.us to i64
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
   br i1 undef, label %for.inc219.us, label %if.end72.us
 
 if.end72.us:                                      ; preds = %for.body65.us
@@ -406,8 +406,8 @@ for.cond152.us:                                   ; preds = %for.cond152.us, %fo
   br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
 
 for.inc219.us:                                    ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
-  %inc.us = add nsw i32 %k.09.us, 1
-  %cmp64.us = icmp sgt i32 %inc.us, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp64.us = icmp sgt i64 %indvars.iv.next, %1
   br i1 %cmp64.us, label %for.inc221, label %for.body65.us
 
 for.cond139.loopexit.us:                          ; preds = %for.cond152.us
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
new file mode 100644
index 0000000..0930a0c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+; Check the loop exit i32 compare instruction and operand are widened to i64
+; instead of truncating IV before its use in the i32 compare instruction.
+
+@idx = common global i32 0, align 4
+@e = common global i32 0, align 4
+@ptr = common global i32* null, align 8
+
+; CHECK-LABEL: @test1
+; CHECK: for.body.lr.ph:
+; CHECK: sext i32
+; CHECK: for.cond:
+; CHECK: icmp slt i64
+; CHECK: for.body:
+; CHECK: phi i64
+
+define i32 @test1() {
+entry:
+  store i32 -1, i32* @idx, align 4
+  %0 = load i32* @e, align 4
+  %cmp4 = icmp slt i32 %0, 0
+  br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %1 = load i32** @ptr, align 8
+  %2 = load i32* @e, align 4
+  br label %for.body
+
+for.cond:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %i.05, %2
+  br i1 %cmp, label %for.body, label %for.cond.for.end.loopexit_crit_edge
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
+  %idxprom = sext i32 %i.05 to i64
+  %arrayidx = getelementptr inbounds i32* %1, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %3, 0
+  br i1 %tobool, label %if.then, label %for.cond
+
+if.then:
+  %i.05.lcssa = phi i32 [ %i.05, %for.body ]
+  store i32 %i.05.lcssa, i32* @idx, align 4
+  br label %for.end
+
+for.cond.for.end.loopexit_crit_edge:
+  br label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %4 = load i32* @idx, align 4
+  ret i32 %4
+}
+
+; CHECK-LABEL: @test2
+; CHECK: for.body4.us
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %cmp2.us = icmp slt i64
+; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
+; CHECK-NOT: %cmp2.us = icmp slt i32
+
+define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
+entry:
+  %conv = zext i8 %limit to i32
+  br i1 undef, label %for.cond1.preheader, label %for.cond1.preheader.us
+
+for.cond1.preheader.us:
+  %storemerge5.us = phi i32 [ 0, %entry ], [ %inc14.us, %for.inc13.us ]
+  br i1 true, label %for.body4.lr.ph.us, label %for.inc13.us
+
+for.inc13.us:
+  %inc14.us = add nsw i32 %storemerge5.us, 1
+  %cmp.us = icmp slt i32 %inc14.us, 4
+  br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end
+
+for.body4.us:
+  %storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
+  %idxprom.us = sext i32 %storemerge14.us to i64
+  %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
+  %0 = load i8* %arrayidx6.us, align 1
+  %idxprom7.us = zext i8 %0 to i64
+  %arrayidx8.us = getelementptr inbounds i8* %b, i64 %idxprom7.us
+  %1 = load i8* %arrayidx8.us, align 1
+  store i8 %1, i8* %arrayidx6.us, align 1
+  %inc.us = add nsw i32 %storemerge14.us, 1
+  %cmp2.us = icmp slt i32 %inc.us, %conv
+  br i1 %cmp2.us, label %for.body4.us, label %for.inc13.us
+
+for.body4.lr.ph.us:
+  %idxprom5.us = sext i32 %storemerge5.us to i64
+  br label %for.body4.us
+
+for.cond1.preheader:
+  %storemerge5 = phi i32 [ 0, %entry ], [ %inc14, %for.inc13 ]
+  br i1 false, label %for.inc13, label %for.inc13
+
+for.inc13:
+  %inc14 = add nsw i32 %storemerge5, 1
+  %cmp = icmp slt i32 %inc14, 4
+  br i1 %cmp, label %for.cond1.preheader, label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test3
+; CHECK: sext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp slt i64
+
+define i32 @test3(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
+
+declare i32 @fn1(i8 signext)
+
+; PR21030
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: phi i32
+; CHECK: icmp sgt i8
+
+define i32 @test4(i32 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %c.07 = phi i8 [ -3, %entry ], [ %dec, %for.body ]
+  %conv6 = zext i8 %c.07 to i32
+  %or = or i32 %a, %conv6
+  %conv3 = trunc i32 %or to i8
+  %call = call i32 @fn1(i8 signext %conv3)
+  %dec = add i8 %c.07, -1
+  %cmp = icmp sgt i8 %dec, -14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; CHECK-LABEL: @test5
+; CHECK: zext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp ule i64
+
+define i32 @test5(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ule i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
diff --git a/test/Transforms/Inline/align.ll b/test/Transforms/Inline/align.ll
new file mode 100644
index 0000000..9ac6d54
--- /dev/null
+++ b/test/Transforms/Inline/align.ll
@@ -0,0 +1,98 @@
+; RUN: opt -inline -preserve-alignment-assumptions-during-inlining -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %ptrint1 = ptrtoint float* %b to i64
+; CHECK:   %maskedptr2 = and i64 %ptrint1, 127
+; CHECK:   %maskcond3 = icmp eq i64 %maskedptr2, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond3)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/Inline/byval-tail-call.ll b/test/Transforms/Inline/byval-tail-call.ll
index 3a8906a..154f397 100644
--- a/test/Transforms/Inline/byval-tail-call.ll
+++ b/test/Transforms/Inline/byval-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; RUN: opt < %s -basicaa -tailcallelim -inline -instcombine -dse -S | FileCheck %s
 ; PR7272
 
 ; Calls that capture byval parameters cannot be marked as tail calls. Other
@@ -27,10 +27,13 @@ define internal void @qux(i32* byval %x) {
   tail call void @ext(i32* null)
   ret void
 }
+
 define void @frob(i32* %x) {
 ; CHECK-LABEL: define void @frob(
-; CHECK: alloca i32
-; CHECK: {{^ *}}call void @ext(
+; CHECK: %[[POS:.*]] = alloca i32
+; CHECK: %[[VAL:.*]] = load i32* %x
+; CHECK: store i32 %[[VAL]], i32* %[[POS]]
+; CHECK: {{^ *}}call void @ext(i32* %[[POS]]
 ; CHECK: tail call void @ext(i32* null)
 ; CHECK: ret void
   tail call void @qux(i32* byval %x)
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index 41d6074..0de2d22 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -31,7 +31,7 @@ lpad:
 }
 
 !llvm.module.flags = !{!1}
-!1 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!1 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !2 = metadata !{}
 !3 = metadata !{i32 1, i32 0, metadata !2, null}
 !4 = metadata !{i32 2, i32 0, metadata !2, null}
diff --git a/test/Transforms/Inline/ephemeral.ll b/test/Transforms/Inline/ephemeral.ll
new file mode 100644
index 0000000..d1135c6
--- /dev/null
+++ b/test/Transforms/Inline/ephemeral.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -Oz %s | FileCheck %s
+
+@a = global i32 4
+
+define i1 @inner() {
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1, %a1
+  %c = icmp eq i32 %x1, 0
+
+  ; Here are enough instructions to prevent inlining, but because they are used
+  ; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
+  ; still happen).
+  %a2 = mul i32 %a1, %a1
+  %a3 = sub i32 %a1, 5
+  %a4 = udiv i32 %a3, -13
+  %a5 = mul i32 %a4, %a4
+  %a6 = add i32 %a5, %x1
+  %ca = icmp sgt i32 %a6, -7
+  tail call void @llvm.assume(i1 %ca)
+
+  ret i1 %c
+}
+
+; @inner() should be inlined for -Oz.
+; CHECK-NOT: call i1 @inner
+define i1 @outer() optsize {
+   %r = call i1 @inner()
+   ret i1 %r
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index 543a89b..428b5d5 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -7,16 +7,16 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
 entry:
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %add = fadd <4 x float> %mul, %mul1
   ret <4 x float> %add
 }
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
 ; CHECK: ret float
 
 entry:
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %vecext = extractelement <4 x float> %call, i32 0
   %vecext1 = extractelement <4 x float> %call, i32 1
   %add = fadd float %vecext, %vecext1
@@ -47,9 +47,9 @@ attributes #0 = { nounwind readnone }
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !{}, metadata !2, metadata !2, metadata !""}
+!0 = metadata !{metadata !"0x11\004\00\000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !{}, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !"", metadata !""}
 !2 = metadata !{i32 0}
 !3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !5 = metadata !{metadata !""}
diff --git a/test/Transforms/Inline/inline-musttail-varargs.ll b/test/Transforms/Inline/inline-musttail-varargs.ll
new file mode 100644
index 0000000..7a89574
--- /dev/null
+++ b/test/Transforms/Inline/inline-musttail-varargs.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; We can't inline this thunk yet, but one day we will be able to.  And when we
+; do, this test case will be ready.
+
+declare void @ext_method(i8*, i32)
+
+define linkonce_odr void @thunk(i8* %this, ...) {
+  %this_adj = getelementptr i8* %this, i32 4
+  musttail call void (i8*, ...)* bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...)
+  ret void
+}
+
+define void @thunk_caller(i8* %p) {
+  call void (i8*, ...)* @thunk(i8* %p, i32 42)
+  ret void
+}
+; CHECK-LABEL: define void @thunk_caller(i8* %p)
+; CHECK: call void (i8*, ...)* @thunk(i8* %p, i32 42)
+
+; FIXME: Inline the thunk. This should be significantly easier than inlining
+; general varargs functions.
diff --git a/test/Transforms/Inline/noalias-calls.ll b/test/Transforms/Inline/noalias-calls.ll
new file mode 100644
index 0000000..13408e4
--- /dev/null
+++ b/test/Transforms/Inline/noalias-calls.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+declare void @hey() #0
+
+define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 {
+entry:
+  %l = alloca i8, i32 512, align 1
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 0)
+  call void @hey()
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i32 16, i1 0)
+  ret void
+}
+
+define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+entry:
+  tail call void @hello(i8* %a, i8* %c, i8* %b)
+  ret void
+}
+
+; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+; CHECK: entry:
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #0, !noalias !0
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #0, !alias.scope !5
+; CHECK:   call void @hey() #0, !noalias !5
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind uwtable }
+
+; CHECK: !0 = metadata !{metadata !1}
+; CHECK: !1 = metadata !{metadata !1, metadata !2, metadata !"hello: %c"}
+; CHECK: !2 = metadata !{metadata !2, metadata !"hello"}
+; CHECK: !3 = metadata !{metadata !4}
+; CHECK: !4 = metadata !{metadata !4, metadata !2, metadata !"hello: %a"}
+; CHECK: !5 = metadata !{metadata !4, metadata !1}
+
diff --git a/test/Transforms/Inline/noalias-cs.ll b/test/Transforms/Inline/noalias-cs.ll
new file mode 100644
index 0000000..acd9021
--- /dev/null
+++ b/test/Transforms/Inline/noalias-cs.ll
@@ -0,0 +1,84 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4, !noalias !3
+  %arrayidx.i = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
+  %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
+  %1 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %1, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  call void @foo2(float* %a, float* %b, float* %c), !noalias !0
+  call void @foo2(float* %b, float* %b, float* %a), !alias.scope !0
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !6
+; CHECK:   %arrayidx.i.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !12, !noalias !13
+; CHECK:   %arrayidx1.i.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i.i, align 4, !alias.scope !14, !noalias !15
+; CHECK:   %1 = load float* %c, align 4, !noalias !16
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx.i, align 4, !noalias !16
+; CHECK:   %2 = load float* %a, align 4, !alias.scope !16, !noalias !17
+; CHECK:   %arrayidx.i.i1 = getelementptr inbounds float* %b, i64 5
+; CHECK:   store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
+; CHECK:   %arrayidx1.i.i2 = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
+; CHECK:   %3 = load float* %a, align 4, !alias.scope !16
+; CHECK:   %arrayidx.i3 = getelementptr inbounds float* %b, i64 7
+; CHECK:   store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
+!0 = metadata !{metadata !1}
+!1 = metadata !{metadata !1, metadata !2, metadata !"hello: %a"}
+!2 = metadata !{metadata !2, metadata !"hello"}
+!3 = metadata !{metadata !4, metadata !6}
+!4 = metadata !{metadata !4, metadata !5, metadata !"hello2: %a"}
+!5 = metadata !{metadata !5, metadata !"hello2"}
+!6 = metadata !{metadata !6, metadata !5, metadata !"hello2: %b"}
+!7 = metadata !{metadata !4}
+!8 = metadata !{metadata !6}
+
+; CHECK: !0 = metadata !{metadata !1, metadata !3}
+; CHECK: !1 = metadata !{metadata !1, metadata !2, metadata !"hello2: %a"}
+; CHECK: !2 = metadata !{metadata !2, metadata !"hello2"}
+; CHECK: !3 = metadata !{metadata !3, metadata !2, metadata !"hello2: %b"}
+; CHECK: !4 = metadata !{metadata !1}
+; CHECK: !5 = metadata !{metadata !3}
+; CHECK: !6 = metadata !{metadata !7, metadata !9, metadata !10}
+; CHECK: !7 = metadata !{metadata !7, metadata !8, metadata !"hello2: %a"}
+; CHECK: !8 = metadata !{metadata !8, metadata !"hello2"}
+; CHECK: !9 = metadata !{metadata !9, metadata !8, metadata !"hello2: %b"}
+; CHECK: !10 = metadata !{metadata !10, metadata !11, metadata !"hello: %a"}
+; CHECK: !11 = metadata !{metadata !11, metadata !"hello"}
+; CHECK: !12 = metadata !{metadata !7}
+; CHECK: !13 = metadata !{metadata !9, metadata !10}
+; CHECK: !14 = metadata !{metadata !9}
+; CHECK: !15 = metadata !{metadata !7, metadata !10}
+; CHECK: !16 = metadata !{metadata !10}
+; CHECK: !17 = metadata !{metadata !18, metadata !20}
+; CHECK: !18 = metadata !{metadata !18, metadata !19, metadata !"hello2: %a"}
+; CHECK: !19 = metadata !{metadata !19, metadata !"hello2"}
+; CHECK: !20 = metadata !{metadata !20, metadata !19, metadata !"hello2: %b"}
+; CHECK: !21 = metadata !{metadata !18, metadata !10}
+; CHECK: !22 = metadata !{metadata !20}
+; CHECK: !23 = metadata !{metadata !20, metadata !10}
+; CHECK: !24 = metadata !{metadata !18}
+
diff --git a/test/Transforms/Inline/noalias.ll b/test/Transforms/Inline/noalias.ll
new file mode 100644
index 0000000..7a54d5d
--- /dev/null
+++ b/test/Transforms/Inline/noalias.ll
@@ -0,0 +1,76 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* noalias nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !0
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !0
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !3
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
+; CHECK: !0 = metadata !{metadata !1}
+; CHECK: !1 = metadata !{metadata !1, metadata !2, metadata !"hello: %a"}
+; CHECK: !2 = metadata !{metadata !2, metadata !"hello"}
+; CHECK: !3 = metadata !{metadata !4, metadata !6}
+; CHECK: !4 = metadata !{metadata !4, metadata !5, metadata !"hello2: %a"}
+; CHECK: !5 = metadata !{metadata !5, metadata !"hello2"}
+; CHECK: !6 = metadata !{metadata !6, metadata !5, metadata !"hello2: %b"}
+; CHECK: !7 = metadata !{metadata !4}
+; CHECK: !8 = metadata !{metadata !6}
+
diff --git a/test/Transforms/Inline/noalias2.ll b/test/Transforms/Inline/noalias2.ll
new file mode 100644
index 0000000..a4b38b0
--- /dev/null
+++ b/test/Transforms/Inline/noalias2.ll
@@ -0,0 +1,97 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !alias.scope !0, !noalias !3
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 6
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+; Check that when hello() is inlined into foo(), and then foo() is inlined into
+; foo2(), the noalias scopes are properly concatenated.
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @foo(float* %a, float* %c)
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !alias.scope !5, !noalias !10
+; CHECK:   %arrayidx.i.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5
+; CHECK:   %1 = load float* %c, align 4, !alias.scope !13, !noalias !14
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
+; CHECK:   %2 = load float* %c, align 4, !noalias !15
+; CHECK:   %arrayidx.i1 = getelementptr inbounds float* %a, i64 6
+; CHECK:   store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
+; CHECK:   %3 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %3, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+; CHECK: !0 = metadata !{metadata !1}
+; CHECK: !1 = metadata !{metadata !1, metadata !2, metadata !"hello: %c"}
+; CHECK: !2 = metadata !{metadata !2, metadata !"hello"}
+; CHECK: !3 = metadata !{metadata !4}
+; CHECK: !4 = metadata !{metadata !4, metadata !2, metadata !"hello: %a"}
+; CHECK: !5 = metadata !{metadata !6, metadata !8}
+; CHECK: !6 = metadata !{metadata !6, metadata !7, metadata !"hello: %c"}
+; CHECK: !7 = metadata !{metadata !7, metadata !"hello"}
+; CHECK: !8 = metadata !{metadata !8, metadata !9, metadata !"foo: %c"}
+; CHECK: !9 = metadata !{metadata !9, metadata !"foo"}
+; CHECK: !10 = metadata !{metadata !11, metadata !12}
+; CHECK: !11 = metadata !{metadata !11, metadata !7, metadata !"hello: %a"}
+; CHECK: !12 = metadata !{metadata !12, metadata !9, metadata !"foo: %a"}
+; CHECK: !13 = metadata !{metadata !8}
+; CHECK: !14 = metadata !{metadata !12}
+; CHECK: !15 = metadata !{metadata !16, metadata !18}
+; CHECK: !16 = metadata !{metadata !16, metadata !17, metadata !"hello2: %a"}
+; CHECK: !17 = metadata !{metadata !17, metadata !"hello2"}
+; CHECK: !18 = metadata !{metadata !18, metadata !17, metadata !"hello2: %b"}
+; CHECK: !19 = metadata !{metadata !16}
+; CHECK: !20 = metadata !{metadata !18}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/Inline/pr21206.ll b/test/Transforms/Inline/pr21206.ll
new file mode 100644
index 0000000..1a4366e
--- /dev/null
+++ b/test/Transforms/Inline/pr21206.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+$c = comdat any
+; CHECK: $c = comdat any
+
+define linkonce_odr void @foo() comdat $c {
+  ret void
+}
+; CHECK: define linkonce_odr void @foo() comdat $c
+
+define linkonce_odr void @bar() comdat $c {
+  ret void
+}
+; CHECK: define linkonce_odr void @bar() comdat $c
+
+define void()* @zed()  {
+  ret void()* @foo
+}
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f9bd9e..6259893 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1646
 
-@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
 define weak i32 @pthread_cancel(i32) {
        ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index c7cef75..3793a86 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1678
 
-@A = alias weak void ()* @B		; <void ()*> [#uses=1]
+@A = weak alias void ()* @B		; <void ()*> [#uses=1]
 
 define weak void @B() {
        ret void
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index fe935f9..656fb34 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | not grep call
-; RUN: opt < %s -std-compile-opts -S | not grep xyz
+; RUN: opt < %s -O3 -S | not grep xyz
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
deleted file mode 100644
index 917d3d9..0000000
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "ret i.* 0" | count 2
-; PR2048
-
-define i32 @i(i32 %a) {
-  %tmp1 = sdiv i32 %a, -1431655765
-  %tmp2 = sdiv i32 %tmp1, 3
-  ret i32 %tmp2
-}
-
-define i8 @j(i8 %a) {
-  %tmp1 = sdiv i8 %a, 64
-  %tmp2 = sdiv i8 %tmp1, 3
-  ret i8 %tmp2
-}
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index a75a465..895b260 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -15,7 +15,7 @@ define void @fu1(i32 %parm) nounwind ssp {
 
 ; <label>:4                                       ; preds = %0
   %5 = load i32* %1, align 4
-  %6 = mul nsw i32 %5, 8
+  %6 = shl nsw i32 %5, 3
 ; With "nsw", the alloca and its bitcast can be fused:
   %7 = add nsw i32 %6, 2048
 ;  CHECK: alloca double
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index d7eac4b..a166e5f 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -219,7 +219,7 @@ define i16 @mul_add_to_mul_1(i16 %x) {
  %add2 = add nsw i16 %x, %mul1
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_1(
-; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: %add2 = mul i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -228,7 +228,7 @@ define i16 @mul_add_to_mul_2(i16 %x) {
  %add2 = add nsw i16 %mul1, %x
  ret i16 %add2
 ; CHECK-LABEL: @mul_add_to_mul_2(
-; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: %add2 = mul i16 %x, 9
 ; CHECK-NEXT: ret i16 %add2
 }
 
@@ -248,7 +248,7 @@ define i16 @mul_add_to_mul_4(i16 %a) {
  %add = add nsw i16 %mul1, %mul2
  ret i16 %add
 ; CHECK-LABEL: @mul_add_to_mul_4(
-; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: %add = mul i16 %a, 9
 ; CHECK-NEXT: ret i16 %add
 }
 
@@ -313,3 +313,43 @@ define i16 @add_cttz_2(i16 %a) {
   ret i16 %b
 }
 !1 = metadata !{i16 0, i16 32}
+
+define i32 @add_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_or_and(
+; CHECK-NEXT: add i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nsw_or_and(
+; CHECK-NEXT: add nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_or_and(
+; CHECK-NEXT: add nuw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_nsw_or_and(
+; CHECK-NEXT: add nuw nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
deleted file mode 100644
index f9b7e3b..0000000
--- a/test/Transforms/InstCombine/add4.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define float @test1(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) -> select C, 0, A
-  %cf = uitofp i1 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test1(
-; CHECK: select i1 %C, float -0.000000e+00, float %A
-}
-
-define float @test2(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; B*(uitofp i1 C) -> select C, B, 0
-  %cf = uitofp i1 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test2(
-; CHECK: select i1 %C, float %B, float -0.000000e+00
-}
-
-define float @test3(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  select C, 0, B + select C, A, 0 -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %s1 = select i1 %C, float 0.000000e+00, float %B
-  %s2 = select i1 %C, float %A, float 0.000000e+00
-  %sum = fadd fast float %s1, %s2
-  ret float %sum
-; CHECK-LABEL: @test3(
-; CHECK: select i1 %C, float %A, float %B
-}
-
-define float @test4(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p2, %p1
-  ret float %s1
-; CHECK-LABEL: @test4(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-define float @test5(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p1, %p2
-  ret float %s1
-; CHECK-LABEL: @test5(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-; PR15952
-define float @test6(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test6(
-; CHECK: uitofp
-}
-
-define float @test7(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test7(
-; CHECK: uitofp
-}
-
-define <4 x float> @test8(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p2, %p1
-  ret <4 x float> %s1
-; CHECK-LABEL: @test8(
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
-
-define <4 x float> @test9(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p1, %p2
-  ret <4 x float> %s1
-; CHECK-LABEL: @test9
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
diff --git a/test/Transforms/InstCombine/align-attr.ll b/test/Transforms/InstCombine/align-attr.ll
new file mode 100644
index 0000000..9f366bf
--- /dev/null
+++ b/test/Transforms/InstCombine/align-attr.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* align 32 %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK: ret i32
+}
+
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index e88fd59..96b535d 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -45,7 +45,7 @@ define <4 x i32> @test5(<4 x i32> %A) {
 
 ; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
 define i32 @test6(i64 %x) nounwind {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: add i64 %x, 1
 ; CHECK-NEXT: icmp ugt i64 %x.off, 1
   %cmp1 = icmp ne i64 %x, -1
@@ -54,3 +54,26 @@ define i32 @test6(i64 %x) nounwind {
   %land.ext = zext i1 %.cmp1 to i32
   ret i32 %land.ext
 }
+
+define i1 @test7(i32 %i, i1 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], %b
+; CHECK-NEXT: ret i1 [[AND]]
+  %cmp1 = icmp slt i32 %i, 1
+  %cmp2 = icmp sgt i32 %i, -1
+  %and1 = and i1 %cmp1, %b
+  %and2 = and i1 %and1, %cmp2
+  ret i1 %and2
+}
+
+define i1 @test8(i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[DEC:%.*]] = add i32 %i, -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[DEC]], 13
+; CHECK-NEXT: ret i1 [[CMP]]
+  %cmp1 = icmp ne i32 %i, 0
+  %cmp2 = icmp ult i32 %i, 14
+  %cond = and i1 %cmp1, %cmp2
+  ret i1 %cond
+}
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index df8ec52..3b69c17 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -95,12 +95,6 @@ define i1024 @test14(i1024 %A) {
 	ret i1024 %D
 }
 
-define i14 @test15(i14 %A, i14 %B) {
-	%C = sub i14 0, %A		; <i14> [#uses=1]
-	%D = srem i14 %B, %C		; <i14> [#uses=1]
-	ret i14 %D
-}
-
 define i51 @test16(i51 %A) {
 	%X = sdiv i51 %A, 1123		; <i51> [#uses=1]
 	%Y = sub i51 0, %X		; <i51> [#uses=1]
diff --git a/test/Transforms/InstCombine/ashr-nop.ll b/test/Transforms/InstCombine/ashr-nop.ll
deleted file mode 100644
index 870ede3..0000000
--- a/test/Transforms/InstCombine/ashr-nop.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep ashr
-
-define i32 @foo(i32 %x) {
-  %o = and i32 %x, 1
-  %n = add i32 %o, -1
-  %t = ashr i32 %n, 17
-  ret i32 %t
-}
diff --git a/test/Transforms/InstCombine/assume-loop-align.ll b/test/Transforms/InstCombine/assume-loop-align.ll
new file mode 100644
index 0000000..19190de
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-loop-align.ll
@@ -0,0 +1,47 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %a, i32* %b) #0 {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 63
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr2 = and i64 %ptrint1, 63
+  %maskcond3 = icmp eq i64 %maskedptr2, 0
+  tail call void @llvm.assume(i1 %maskcond3)
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{.*}} align 64
+; CHECK: store i32 {{.*}}  align 64
+; CHECK: ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx5 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 1648
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
new file mode 100644
index 0000000..81fe094
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -0,0 +1,55 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { double* }
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 {
+
+; CHECK-LABEL: @_Z3fooR1s
+; CHECK: call void @llvm.assume
+; CHECK-NOT: call void @llvm.assume
+
+entry:
+  %a = getelementptr inbounds %struct.s* %x, i64 0, i32 0
+  %0 = load double** %a, align 8
+  %ptrint = ptrtoint double* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds double* %0, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 16
+  %add = fadd double %1, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul = fmul double %add, 2.000000e+00
+  store double %mul, double* %arrayidx, align 16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx.1 = getelementptr inbounds double* %0, i64 %indvars.iv.next
+  %2 = load double* %arrayidx.1, align 8
+  %add.1 = fadd double %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul.1 = fmul double %add.1, 2.000000e+00
+  store double %mul.1, double* %arrayidx.1, align 8
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
+  %exitcond.1 = icmp eq i64 %indvars.iv.next, 1599
+  br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll
new file mode 100644
index 0000000..7e45c04
--- /dev/null
+++ b/test/Transforms/InstCombine/assume.ll
@@ -0,0 +1,265 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+
+; Check that the alignment has been upgraded and that the assume has not
+; been removed:
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @foo2(i32* %a) #0 {
+entry:
+; Same check as in @foo1, but make sure it works if the assume is first too.
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  %0 = load i32* %a, align 4
+  ret i32 %0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+define i32 @simple(i32 %a) #1 {
+entry:
+
+; CHECK-LABEL: @simple
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %cmp = icmp eq i32 %a, 4
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %a
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can1(i1 %a, i1 %b, i1 %c) {
+entry:
+  %and1 = and i1 %a, %b
+  %and  = and i1 %and1, %c
+  tail call void @llvm.assume(i1 %and)
+
+; CHECK-LABEL: @can1
+; CHECK: call void @llvm.assume(i1 %a)
+; CHECK: call void @llvm.assume(i1 %b)
+; CHECK: call void @llvm.assume(i1 %c)
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can2(i1 %a, i1 %b, i1 %c) {
+entry:
+  %v = or i1 %a, %b
+  %w = xor i1 %v, 1
+  tail call void @llvm.assume(i1 %w)
+
+; CHECK-LABEL: @can2
+; CHECK: %[[V1:[^ ]+]] = xor i1 %a, true
+; CHECK: call void @llvm.assume(i1 %[[V1]])
+; CHECK: %[[V2:[^ ]+]] = xor i1 %b, true
+; CHECK: call void @llvm.assume(i1 %[[V2]])
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @bar1(i32 %a) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; CHECK-LABEL: @bar1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @bar2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; Don't be fooled by other assumes around.
+; CHECK-LABEL: @bar3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  tail call void @llvm.assume(i1 %x)
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  tail call void @llvm.assume(i1 %y)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar4(i32 %a, i32 %b) {
+entry:
+  %and1 = and i32 %b, 3
+
+; CHECK-LABEL: @bar4
+; CHECK: call void @llvm.assume
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %cmp2 = icmp eq i32 %a, %b
+  tail call void @llvm.assume(i1 %cmp2)
+
+  ret i32 %and1
+}
+
+define i32 @icmp1(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+
+; CHECK-LABEL: @icmp1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @icmp2(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %0 = zext i1 %cmp to i32
+  %lnot.ext = xor i32 %0, 1
+  ret i32 %lnot.ext
+
+; CHECK-LABEL: @icmp2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+}
+
+declare void @escape(i32* %a)
+
+; Do we canonicalize a nonnull assumption on a load into
+; metadata form?
+define i1 @nonnull1(i32** %a) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  tail call void @escape(i32* %load)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull1
+; CHECK: !nonnull
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret i1 false
+}
+
+; Make sure the above canonicalization applies only
+; to pointer types.  Doing otherwise would be illegal.
+define i1 @nonnull2(i32* %a) {
+entry:
+  %load = load i32* %a
+  %cmp = icmp ne i32 %load, 0
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32 %load, 0
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull2
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the assume is control dependent on something else
+define i1 @nonnull3(i32** %a, i1 %control) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  br i1 %control, label %taken, label %not_taken
+taken:
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+not_taken:
+  ret i1 true
+
+; CHECK-LABEL: @nonnull3
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the path from the load to the assume is potentially 
+; interrupted by an exception being thrown
+define i1 @nonnull4(i32** %a) {
+entry:
+  %load = load i32** %a
+  ;; This call may throw!
+  tail call void @escape(i32* %load)
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull4
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll
new file mode 100644
index 0000000..c41bbaa
--- /dev/null
+++ b/test/Transforms/InstCombine/assume2.ll
@@ -0,0 +1,174 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %and = and i32 %a, 15
+  %cmp = icmp eq i32 %and, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %and = and i32 %a, 15
+  %nand = xor i32 %and, -1
+  %cmp = icmp eq i32 %nand, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test3(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = or i32 %a, 4294967280
+  %cmp = icmp eq i32 %v, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test4(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test4
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %v = or i32 %a, 4294967280
+  %nv = xor i32 %v, -1
+  %cmp = icmp eq i32 %nv, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test5(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test5
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %v = xor i32 %a, 1
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test6(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test6
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = shl i32 %a, 2
+  %cmp = icmp eq i32 %v, 20
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 63
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test7(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test7
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test8(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test8
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test9(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test9
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test10(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test10
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 -2147483648
+
+  %cmp = icmp sle i32 %a, -2
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test11(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test11
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp ule i32 %a, 256
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3072
+  ret i32 %and1
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll
index ccee874..98cecef 100644
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Check transforms involving atomic operations
 
-define i32* @test1(i8** %p) {
-; CHECK-LABEL: define i32* @test1(
-; CHECK: load atomic i8** %p monotonic, align 8
-  %c = bitcast i8** %p to i32**
-  %r = load atomic i32** %c monotonic, align 8
-  ret i32* %r
-}
-
 define i32 @test2(i32* %p) {
 ; CHECK-LABEL: define i32 @test2(
 ; CHECK: %x = load atomic i32* %p seq_cst, align 4
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index a6b56f9..bc36b25 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -90,7 +90,8 @@ entry:
 define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar
-; CHECK: bitcast float %tmp to i32
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast i32 %call to float
@@ -104,7 +105,8 @@ entry:
 define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector
-; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast <2 x i32> %call to <2 x float>
@@ -118,7 +120,8 @@ entry:
 define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
-; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64*
 ; CHECK: %call = call i64 @func_i64
 ; CHECK: bitcast i64 %call to <2 x float>
   %tmp = load <2 x float>* %source, align 8
@@ -130,7 +133,8 @@ entry:
 define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
-; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
 ; CHECK: bitcast <2 x float> %call to i64
   %tmp = load i64* %source, align 8
@@ -142,7 +146,8 @@ entry:
 define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
-; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
 ; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
   %tmp = load <2 x i64*>* %source, align 8
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 0cbfbb0..578b16d 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -354,6 +354,24 @@ define i32* @test41(i32* %tmp1) {
 ; CHECK: ret i32* %tmp1
 }
 
+define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %tmp1) {
+  %tmp64 = addrspacecast i32* %tmp1 to { i32 } addrspace(1)*
+  %tmp65 = getelementptr { i32 } addrspace(1)* %tmp64, i32 0, i32 0
+  ret i32 addrspace(1)* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_smaller(
+; CHECK: addrspacecast i32* %tmp1 to i32 addrspace(1)*
+; CHECK-NEXT: ret i32 addrspace(1)*
+}
+
+define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %tmp1) {
+  %tmp64 = addrspacecast i32 addrspace(1)* %tmp1 to { i32 }*
+  %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
+  ret i32* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_larger(
+; CHECK: addrspacecast i32 addrspace(1)* %tmp1 to i32*
+; CHECK-NEXT: ret i32*
+}
+
 define i32 @test42(i32 %X) {
         %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
         %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
@@ -792,7 +810,7 @@ define double @test71(double *%p, i64 %i) {
 
 define double @test72(double *%p, i32 %i) {
 ; CHECK-LABEL: @test72(
-  %so = mul nsw i32 %i, 8
+  %so = shl nsw i32 %i, 3
   %o = sext i32 %so to i64
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast double* %p to i8*
@@ -807,7 +825,7 @@ define double @test72(double *%p, i32 %i) {
 
 define double @test73(double *%p, i128 %i) {
 ; CHECK-LABEL: @test73(
-  %lo = mul nsw i128 %i, 8
+  %lo = shl nsw i128 %i, 3
   %o = trunc i128 %lo to i64
 ; CHECK-NEXT: trunc i128 %i to i64
   %q = bitcast double* %p to i8*
@@ -919,7 +937,7 @@ define %s @test79(%s *%p, i64 %i, i32 %j) {
 
 define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-LABEL: @test80(
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast [100 x double]* %p to i8*
   %pp = getelementptr i8* %q, i32 %tmp
@@ -936,7 +954,7 @@ define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* %p
 ; CHECK-NEXT: load double addrspace(1)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
@@ -950,7 +968,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: addrspacecast double addrspace(1)*
 ; CHECK-NEXT: load double addrspace(3)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
@@ -960,7 +978,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 
 define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
 ; CHECK-LABEL: @test80_as1(
-  %tmp = mul nsw i16 %i, 8
+  %tmp = shl nsw i16 %i, 3
 ; CHECK-NEXT: sext i16 %i to i32
   %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
   %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
@@ -1004,7 +1022,74 @@ define i64 @test83(i16 %a, i64 %k) {
   ret i64 %sh_prom1
 
 ; CHECK-LABEL: @test83(
-; CHECK: %sub = add nsw i64 %k, 4294967295
+; CHECK: %sub = add i64 %k, 4294967295
 ; CHECK: %sh_prom = trunc i64 %sub to i32
 ; CHECK: %shl = shl i32 %conv, %sh_prom
 }
+
+define i8 @test84(i32 %a) {
+  %add = add nsw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test84(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+define i8 @test85(i32 %a) {
+  %add = add nuw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test85(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+; Overflow on a float to int or int to float conversion is undefined (PR21130).
+
+define i8 @overflow_fptosi() {
+  %i = fptosi double 1.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptosi(
+; CHECK-NEXT: ret i8 undef 
+}
+
+define i8 @overflow_fptoui() {
+  %i = fptoui double 2.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptoui(
+; CHECK-NEXT: ret i8 undef 
+}
+
+; The maximum float is approximately 2 ** 128 which is 3.4E38. 
+; The constant below is 4E38. Use a 130 bit integer to hold that
+; number; 129-bits for the value + 1 bit for the sign.
+define float @overflow_uitofp() {
+  %i = uitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_uitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define float @overflow_sitofp() {
+  %i = sitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_sitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define i32 @PR21388(i32* %v) {
+  %icmp = icmp slt i32* %v, null
+  %sext = sext i1 %icmp to i32
+  ret i32 %sext
+; CHECK-LABEL: @PR21388(
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i32* %v, null
+; CHECK-NEXT: %[[sext:.*]] = sext i1 %[[icmp]] to i32
+; CHECK-NEXT: ret i32 %[[sext]]
+}
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 7fac78a..bb61f02 100644
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() {
   ret i32 %a
 }
 
-define <4 x i32> @constant_fold_bitcast_vector_as() {
+define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
 ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-; CHECK: bitcast <4 x float> %1 to <4 x i32>
-  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
-  ret <4 x i32> %a
+  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
 }
 
 @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
new file mode 100644
index 0000000..13da0f4
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
+
+@G1 = global i32 42, align 1
+@G2 = global i32 42
+@G3 = global [4 x i8] zeroinitializer, align 1
+
+@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+
+define i64 @f1() {
+; This cannot be constant folded because G1 is underaligned.
+; CHECK-LABEL: @f1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @G1 to i64), i64 1)
+}
+
+define i64 @f2() {
+; The preferred alignment for G2 allows this one to foled to zero.
+; CHECK-LABEL: @f2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @G2 to i64), i64 1)
+}
+
+define i64 @g1() {
+; This cannot be constant folded because A1 aliases G3 which is underalaigned.
+; CHECK-LABEL: @g1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @A1 to i64), i64 1)
+}
+
+define i64 @g2() {
+; While A2 also aliases G3 which is underaligned, the math of A2 forces a
+; certain alignment allowing this to fold to zero.
+; CHECK-LABEL: @g2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @A2 to i64), i64 1)
+}
+
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
index 14377df..ce8d337 100644
--- a/test/Transforms/InstCombine/constant-fold-math.ll
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -7,6 +7,7 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
 declare double @llvm.fma.f64(double, double, double) #0
 declare double @llvm.fmuladd.f64(double, double, double) #0
 
+declare double @llvm.sqrt.f64(double) #0
 
 
 ; CHECK-LABEL: @constant_fold_fma_f32
@@ -44,4 +45,12 @@ define double @constant_fold_fmuladd_f64() #0 {
   ret double %x
 }
 
+; The sqrt intrinsic is undefined for negative inputs besides -0.0.
+; CHECK-LABEL: @bad_sqrt
+; CHECK-NEXT: ret double undef
+define double @bad_sqrt() {
+  %x = call double @llvm.sqrt.f64(double -2.000000e+00)
+  ret double %x
+}
+
 attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 2e3785f..309843f 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -15,14 +15,14 @@ declare i32 @printf(i8*, ...)
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\004\000\001\000\006\000\000\000", metadata !8, metadata !1, metadata !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{metadata !"0x29", metadata !8} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang\001\00\000\00\000", metadata !8, metadata !4, metadata !4, metadata !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !8, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{metadata !"0xb\004\0012\000", metadata !8, metadata !0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 75082dc..a7a491e 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
@@ -14,11 +14,11 @@ entry:
   store i8* %__dest, i8** %__dest.addr, align 8
 ; CHECK-NOT: call void @llvm.dbg.declare
 ; CHECK: call void @llvm.dbg.value
-  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0), !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0, metadata !{}), !dbg !16
   store i32 %__val, i32* %__val.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7), !dbg !18
+  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7, metadata !{}), !dbg !18
   store i64 %__len, i64* %__len.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9), !dbg !20
+  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9, metadata !{}), !dbg !20
   %tmp = load i8** %__dest.addr, align 8, !dbg !21
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
@@ -31,29 +31,29 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
-!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x101\00__dest\0016777294\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{metadata !"0x2e\00foobar\00foobar\00\0079\001\001\000\006\00256\001\0079", metadata !27, metadata !2, metadata !4, null, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
+!2 = metadata !{metadata !"0x29", metadata !27} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x11\0012\00clang version 3.0 (trunk 127710)\001\00\000\00\000", metadata !28, metadata !29, metadata !29, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !27, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786468, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"size_t", i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
-!11 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"__darwin_size_t", i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786468, null, metadata !3, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, metadata !3, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{metadata !"0x101\00__val\0033554510\000", metadata !1, metadata !2, metadata !8} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !3} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !"0x101\00__len\0050331726\000", metadata !1, metadata !2, metadata !10} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{metadata !"0x16\00size_t\0080\000\000\000\000", metadata !27, metadata !3, metadata !11} ; [ DW_TAG_typedef ]
+!11 = metadata !{metadata !"0x16\00__darwin_size_t\0090\000\000\000\000", metadata !27, metadata !3, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{metadata !"0x24\00long unsigned int\000\0064\0064\000\000\007", null, metadata !3} ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 78, i32 28, metadata !1, null}
 !18 = metadata !{i32 78, i32 40, metadata !1, null}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !27, metadata !23, i32 80, i32 3, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 786443, metadata !27, metadata !1, i32 79, i32 1, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{metadata !"0xb\0080\003\007", metadata !27, metadata !23} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{metadata !"0xb\0079\001\006", metadata !27, metadata !1} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{metadata !1}
 !25 = metadata !{metadata !0, metadata !7, metadata !9}
-!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!26 = metadata !{metadata !"0x29", metadata !28} ; [ DW_TAG_file_type ]
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
 !29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll
index 7990fdb..4656837 100644
--- a/test/Transforms/InstCombine/descale-zero.ll
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define internal i8* @descale_zero() {
 entry:
-; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
-; CHECK-NEXT: bitcast i16*
+; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
 ; CHECK-NEXT: ret i8*
   %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
   %num = load i64* inttoptr (i64 64 to i64*), align 64
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
deleted file mode 100644
index 9c7cf5d..0000000
--- a/test/Transforms/InstCombine/devirt.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-; CHECK-NOT: getelementptr
-; CHECK-NOT: ptrtoint
-; CHECK: bitcast i8*
-%struct.S = type { i32 (...)** }
-
-@_ZL1p = internal constant { i64, i64 } { i64 1, i64 0 }, align 8
-
-define void @_Z1g1S(%struct.S* %s) nounwind {
-entry:
-  %tmp = load { i64, i64 }* @_ZL1p, align 8
-  %memptr.adj = extractvalue { i64, i64 } %tmp, 1
-  %0 = bitcast %struct.S* %s to i8*
-  %1 = getelementptr inbounds i8* %0, i64 %memptr.adj
-  %this.adjusted = bitcast i8* %1 to %struct.S*
-  %memptr.ptr = extractvalue { i64, i64 } %tmp, 0
-  %2 = and i64 %memptr.ptr, 1
-  %memptr.isvirtual = icmp ne i64 %2, 0
-  br i1 %memptr.isvirtual, label %memptr.virtual, label %memptr.nonvirtual
-
-memptr.virtual:                                   ; preds = %entry
-  %3 = bitcast %struct.S* %this.adjusted to i8**
-  %memptr.vtable = load i8** %3
-  %4 = sub i64 %memptr.ptr, 1
-  %5 = getelementptr i8* %memptr.vtable, i64 %4
-  %6 = bitcast i8* %5 to void (%struct.S*)**
-  %memptr.virtualfn = load void (%struct.S*)** %6
-  br label %memptr.end
-
-memptr.nonvirtual:                                ; preds = %entry
-  %memptr.nonvirtualfn = inttoptr i64 %memptr.ptr to void (%struct.S*)*
-  br label %memptr.end
-
-memptr.end:                                       ; preds = %memptr.nonvirtual, %memptr.virtual
-  %7 = phi void (%struct.S*)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
-  call void %7(%struct.S* %this.adjusted)
-  ret void
-}
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 9c7ba9b..2841043 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -132,11 +132,11 @@ define i32 @test15(i32 %a, i32 %b) nounwind {
 }
 
 define <2 x i64> @test16(<2 x i64> %x) nounwind {
-  %shr = lshr <2 x i64> %x, <i64 3, i64 5>
-  %div = udiv <2 x i64> %shr, <i64 4, i64 6>
+  %shr = lshr <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv <2 x i64> %shr, <i64 6, i64 6>
   ret <2 x i64> %div
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT: udiv <2 x i64> %x, <i64 32, i64 192>
+; CHECK-NEXT: udiv <2 x i64> %x, <i64 192, i64 192>
 ; CHECK-NEXT: ret <2 x i64>
 }
 
@@ -175,3 +175,114 @@ define i32 @test20(i32 %x) {
 ; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
 ; CHECK-NEXT: ret i32
 }
+
+define i32 @test21(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %div = sdiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test22(i32 %a) {
+  %mul = mul nsw i32 %a, 3
+  %div = sdiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %div = sdiv i32 %a, 4
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test23(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %div = udiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test24(i32 %a) {
+  %mul = mul nuw i32 %a, 3
+  %div = udiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %div = lshr i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test25(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %div = shl i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test26(i32 %a) {
+  %mul = mul nsw i32 %a, 12
+  %div = sdiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %div = shl i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test27(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %div = shl nuw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test28(i32 %a) {
+  %mul = mul nuw i32 %a, 36
+  %div = udiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %div = mul nuw i32 %a, 12
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test29(i32 %a) {
+  %mul = shl nsw i32 %a, 31
+  %div = sdiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: %[[and:.*]] = and i32 %a, 1
+; CHECK-NEXT: ret i32 %[[and]]
+}
+
+define i32 @test30(i32 %a) {
+  %mul = shl nuw i32 %a, 31
+  %div = udiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: ret i32 %a
+}
+
+define <2 x i32> @test31(<2 x i32> %x) {
+  %shr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %div
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: ret <2 x i32>
+}
+
+define i32 @test32(i32 %a, i32 %b) {
+  %shl = shl i32 2, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: %[[shl:.*]] = shl i32 2, %b
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[shl]], 2
+; CHECK-NEXT: %[[div:.*]] = udiv i32 %a, %[[shr]]
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index d958470..63a02bb 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,349 +1,366 @@
-; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define float @acos_test(float %f) nounwind readnone {
-; CHECK: acos_test
+; Check for and against shrinkage when using the
+; unsafe-fp-math function attribute on a math lib
+; function. This optimization may be overridden by
+; the -enable-double-float-shrink option.
+; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
+
+define float @acos_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acos_test
 ; CHECK: call float @acosf(float %f)
 }
 
-define double @acos_test2(float %f) nounwind readnone {
-; CHECK: acos_test2
+define double @acos_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    ret double %call
+; CHECK-LABEL: acos_test2
 ; CHECK: call double @acos(double %conv)
 }
 
-define float @acosh_test(float %f) nounwind readnone {
-; CHECK: acosh_test
+define float @acosh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acosh_test
 ; CHECK: call float @acoshf(float %f)
 }
 
-define double @acosh_test2(float %f) nounwind readnone {
-; CHECK: acosh_test2
+define double @acosh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    ret double %call
+; CHECK-LABEL: acosh_test2
 ; CHECK: call double @acosh(double %conv)
 }
 
-define float @asin_test(float %f) nounwind readnone {
-; CHECK: asin_test
+define float @asin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asin_test
 ; CHECK: call float @asinf(float %f)
 }
 
-define double @asin_test2(float %f) nounwind readnone {
-; CHECK: asin_test2
+define double @asin_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    ret double %call
+; CHECK-LABEL: asin_test2
 ; CHECK: call double @asin(double %conv)
 }
 
-define float @asinh_test(float %f) nounwind readnone {
-; CHECK: asinh_test
+define float @asinh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asinh_test
 ; CHECK: call float @asinhf(float %f)
 }
 
-define double @asinh_test2(float %f) nounwind readnone {
-; CHECK: asinh_test2
+define double @asinh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    ret double %call
+; CHECK-LABEL: asinh_test2
 ; CHECK: call double @asinh(double %conv)
 }
 
-define float @atan_test(float %f) nounwind readnone {
-; CHECK: atan_test
+define float @atan_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atan_test
 ; CHECK: call float @atanf(float %f)
 }
 
-define double @atan_test2(float %f) nounwind readnone {
-; CHECK: atan_test2
+define double @atan_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    ret double %call
+; CHECK-LABEL: atan_test2
 ; CHECK: call double @atan(double %conv)
 }
-define float @atanh_test(float %f) nounwind readnone {
-; CHECK: atanh_test
+define float @atanh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atanh_test
 ; CHECK: call float @atanhf(float %f)
 }
 
-define double @atanh_test2(float %f) nounwind readnone {
-; CHECK: atanh_test2
+define double @atanh_test2(float %f)   {
     %conv = fpext float %f to double
     %call = call double @atanh(double %conv)
     ret double %call
+; CHECK-LABEL: atanh_test2
 ; CHECK: call double @atanh(double %conv)
 }
-define float @cbrt_test(float %f) nounwind readnone {
-; CHECK: cbrt_test
+define float @cbrt_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: cbrt_test
 ; CHECK: call float @cbrtf(float %f)
 }
 
-define double @cbrt_test2(float %f) nounwind readnone {
-; CHECK: cbrt_test2
+define double @cbrt_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    ret double %call
+; CHECK-LABEL: cbrt_test2
 ; CHECK: call double @cbrt(double %conv)
 }
-define float @exp_test(float %f) nounwind readnone {
-; CHECK: exp_test
+define float @exp_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: exp_test
 ; CHECK: call float @expf(float %f)
 }
 
-define double @exp_test2(float %f) nounwind readnone {
-; CHECK: exp_test2
+define double @exp_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    ret double %call
+; CHECK-LABEL: exp_test2
 ; CHECK: call double @exp(double %conv)
 }
-define float @expm1_test(float %f) nounwind readnone {
-; CHECK: expm1_test
+define float @expm1_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: expm1_test
 ; CHECK: call float @expm1f(float %f)
 }
 
-define double @expm1_test2(float %f) nounwind readnone {
-; CHECK: expm1_test2
+define double @expm1_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    ret double %call
+; CHECK-LABEL: expm1_test2
 ; CHECK: call double @expm1(double %conv)
 }
-define float @exp10_test(float %f) nounwind readnone {
-; CHECK: exp10_test
+define float @exp10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
-; FIXME: Re-enable this when Linux allows transforming this again, or when we
-; can use builtin attributes to test the transform regardless of OS.
-; DISABLED-CHECK: call float @exp10f(float %f)
+; CHECK-LABEL: exp10_test
 ; CHECK: call double @exp10(double %conv)
 }
 
-define double @exp10_test2(float %f) nounwind readnone {
-; CHECK: exp10_test2
+define double @exp10_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    ret double %call
+; CHECK-LABEL: exp10_test2
 ; CHECK: call double @exp10(double %conv)
 }
-define float @log_test(float %f) nounwind readnone {
-; CHECK: log_test
+define float @log_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log_test
 ; CHECK: call float @logf(float %f)
 }
 
-define double @log_test2(float %f) nounwind readnone {
-; CHECK: log_test2
+define double @log_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    ret double %call
+; CHECK-LABEL: log_test2
 ; CHECK: call double @log(double %conv)
 }
-define float @log10_test(float %f) nounwind readnone {
-; CHECK: log10_test
+define float @log10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log10_test
 ; CHECK: call float @log10f(float %f)
 }
 
-define double @log10_test2(float %f) nounwind readnone {
-; CHECK: log10_test2
+define double @log10_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    ret double %call
+; CHECK-LABEL: log10_test2
 ; CHECK: call double @log10(double %conv)
 }
-define float @log1p_test(float %f) nounwind readnone {
-; CHECK: log1p_test
+define float @log1p_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log1p_test
 ; CHECK: call float @log1pf(float %f)
 }
 
-define double @log1p_test2(float %f) nounwind readnone {
-; CHECK: log1p_test2
+define double @log1p_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    ret double %call
+; CHECK-LABEL: log1p_test2
 ; CHECK: call double @log1p(double %conv)
 }
-define float @log2_test(float %f) nounwind readnone {
-; CHECK: log2_test
+define float @log2_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log2_test
 ; CHECK: call float @log2f(float %f)
 }
 
-define double @log2_test2(float %f) nounwind readnone {
-; CHECK: log2_test2
+define double @log2_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    ret double %call
+; CHECK-LABEL: log2_test2
 ; CHECK: call double @log2(double %conv)
 }
-define float @logb_test(float %f) nounwind readnone {
-; CHECK: logb_test
+define float @logb_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: logb_test
 ; CHECK: call float @logbf(float %f)
 }
 
-define double @logb_test2(float %f) nounwind readnone {
-; CHECK: logb_test2
+define double @logb_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    ret double %call
+; CHECK-LABEL: logb_test2
 ; CHECK: call double @logb(double %conv)
 }
-define float @sin_test(float %f) nounwind readnone {
-; CHECK: sin_test
+define float @sin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sin_test
 ; CHECK: call float @sinf(float %f)
 }
 
-define double @sin_test2(float %f) nounwind readnone {
-; CHECK: sin_test2
+define double @sin_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    ret double %call
+; CHECK-LABEL: sin_test2
 ; CHECK: call double @sin(double %conv)
 }
 
-define float @sqrt_test(float %f) nounwind readnone {
-; CHECK: sqrt_test
+define float @sqrt_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @sqrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_test
 ; CHECK: call float @sqrtf(float %f)
 }
 
-define float @sqrt_int_test(float %f) nounwind readnone {
-; CHECK: sqrt_int_test
+define double @sqrt_test2(float %f) {
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   ret double %call
+; CHECK-LABEL: sqrt_test2
+; CHECK: call double @sqrt(double %conv)
+}
+
+define float @sqrt_int_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @llvm.sqrt.f64(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_int_test
 ; CHECK: call float @llvm.sqrt.f32(float %f)
 }
 
-define double @sqrt_test2(float %f) nounwind readnone {
-; CHECK: sqrt_test2
+define double @sqrt_int_test2(float %f) {
    %conv = fpext float %f to double
-   %call = call double @sqrt(double %conv)
+   %call = call double @llvm.sqrt.f64(double %conv)
    ret double %call
-; CHECK: call double @sqrt(double %conv)
+; CHECK-LABEL: sqrt_int_test2
+; CHECK: call double @llvm.sqrt.f64(double %conv)
 }
-define float @tan_test(float %f) nounwind readnone {
-; CHECK: tan_test
+
+define float @tan_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tan_test
 ; CHECK: call float @tanf(float %f)
 }
 
-define double @tan_test2(float %f) nounwind readnone {
-; CHECK: tan_test2
+define double @tan_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    ret double %call
+; CHECK-LABEL: tan_test2
 ; CHECK: call double @tan(double %conv)
 }
-define float @tanh_test(float %f) nounwind readnone {
-; CHECK: tanh_test
+define float @tanh_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tanh_test
 ; CHECK: call float @tanhf(float %f)
 }
 
-define double @tanh_test2(float %f) nounwind readnone {
-; CHECK: tanh_test2
+define double @tanh_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    ret double %call
+; CHECK-LABEL: tanh_test2
 ; CHECK: call double @tanh(double %conv)
 }
 
-declare double @tanh(double) nounwind readnone
-declare double @tan(double) nounwind readnone
-declare double @sqrt(double) nounwind readnone
-declare double @sin(double) nounwind readnone
-declare double @log2(double) nounwind readnone
-declare double @log1p(double) nounwind readnone
-declare double @log10(double) nounwind readnone
-declare double @log(double) nounwind readnone
-declare double @logb(double) nounwind readnone
-declare double @exp10(double) nounwind readnone
-declare double @expm1(double) nounwind readnone
-declare double @exp(double) nounwind readnone
-declare double @cbrt(double) nounwind readnone
-declare double @atanh(double) nounwind readnone
-declare double @atan(double) nounwind readnone
-declare double @acos(double) nounwind readnone
-declare double @acosh(double) nounwind readnone
-declare double @asin(double) nounwind readnone
-declare double @asinh(double) nounwind readnone
-
-declare double @llvm.sqrt.f64(double) nounwind readnone
+declare double @tanh(double) #1
+declare double @tan(double) #1
+
+; sqrt is a special case: the shrinking optimization 
+; is valid even without unsafe-fp-math.
+declare double @sqrt(double) 
+declare double @llvm.sqrt.f64(double) 
+
+declare double @sin(double) #1
+declare double @log2(double) #1
+declare double @log1p(double) #1
+declare double @log10(double) #1
+declare double @log(double) #1
+declare double @logb(double) #1
+declare double @exp10(double) #1
+declare double @expm1(double) #1
+declare double @exp(double) #1
+declare double @cbrt(double) #1
+declare double @atanh(double) #1
+declare double @atan(double) #1
+declare double @acos(double) #1
+declare double @acosh(double) #1
+declare double @asin(double) #1
+declare double @asinh(double) #1
+
+attributes #1 = { "unsafe-fp-math"="true" }
 
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
new file mode 100644
index 0000000..0479549
--- /dev/null
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure all library calls are eliminated when the input is known positive.
+
+declare float @fabsf(float)
+declare double @fabs(double)
+declare fp128 @fabsl(fp128)
+
+define float @square_fabs_call_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @fabsf(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_call_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_call_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @fabs(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_call_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_call_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @fabsl(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_call_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Make sure all intrinsic calls are eliminated when the input is known positive.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+define float @square_fabs_intrinsic_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @llvm.fabs.f32(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_intrinsic_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_intrinsic_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @llvm.fabs.f64(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_intrinsic_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_intrinsic_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
+
+define float @square_fabs_shrink_call1(float %x) {
+  %ext = fpext float %x to double
+  %sq = fmul double %ext, %ext
+  %fabs = call double @fabs(double %sq)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call1(
+; CHECK-NEXT: %trunc = fmul float %x, %x
+; CHECK-NEXT: ret float %trunc
+}
+
+define float @square_fabs_shrink_call2(float %x) {
+  %sq = fmul float %x, %x
+  %ext = fpext float %sq to double
+  %fabs = call double @fabs(double %ext)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call2(
+; CHECK-NEXT: %sq = fmul float %x, %x
+; CHECK-NEXT: ret float %sq
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 2ee4b0f..b0ec895 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -530,3 +530,173 @@ define float @fact_div6(float %x) {
 ; CHECK: fact_div6
 ; CHECK: %t3 = fsub fast float %t1, %t2
 }
+
+; =========================================================================
+;
+;   Test-cases for square root
+;
+; =========================================================================
+
+; A squared factor fed into a square root intrinsic should be hoisted out
+; as a fabs() value.
+; We have to rely on a function-level attribute to enable this optimization
+; because intrinsics don't currently have access to IR-level fast-math
+; flags. If that changes, we can relax the requirement on all of these
+; tests to just specify 'fast' on the sqrt.
+
+attributes #0 = { "unsafe-fp-math" = "true" }
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_intrinsic_arg_squared(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_squared(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+; Check all 6 combinations of a 3-way multiplication tree where
+; one factor is repeated.
+
+define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args1(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args2(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %y
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args3(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args4(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args5(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %y, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args6(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_arg_4th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_4th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define double @sqrt_intrinsic_arg_5th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %x
+  %mul3 = fmul fast double %mul2, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul3)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_5th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+; Check that square root calls have the same behavior.
+
+declare float @sqrtf(float)
+declare double @sqrt(double)
+declare fp128 @sqrtl(fp128)
+
+define float @sqrt_call_squared_f32(float %x) #0 {
+  %mul = fmul fast float %x, %x
+  %sqrt = call float @sqrtf(float %mul)
+  ret float %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f32(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: ret float %fabs
+}
+
+define double @sqrt_call_squared_f64(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @sqrt(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f64(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
+  %mul = fmul fast fp128 %x, %x
+  %sqrt = call fp128 @sqrtl(fp128 %mul)
+  ret fp128 %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f128(
+; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: ret fp128 %fabs
+}
+
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index 18cbf9d..a776765 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -123,3 +123,32 @@ define float @test11(float %x, float %y) {
 ; CHECK-NOT: fadd float
 ; CHECK: fadd fast float
 }
+
+; PR21126: http://llvm.org/bugs/show_bug.cgi?id=21126
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X.
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_squared1(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul = fmul fast double %sqrt, %sqrt
+  ret double %mul
+; CHECK-LABEL: @sqrt_squared1(
+; CHECK-NEXT: ret double %f
+}
+
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X, 
+; but make sure another use of the sqrt is intact.
+; Note that the remaining fmul is altered but is not 'fast'
+; itself because it was not marked 'fast' originally. 
+; Thus, we have an overall fast result, but no more indication of
+; 'fast'ness in the code.
+define double @sqrt_squared2(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul1 = fmul fast double %sqrt, %sqrt
+  %mul2 = fmul double %mul1, %sqrt
+  ret double %mul2
+; CHECK-LABEL: @sqrt_squared2(
+; CHECK-NEXT: %sqrt = call double @llvm.sqrt.f64(double %f)
+; CHECK-NEXT: %mul2 = fmul double %sqrt, %f
+; CHECK-NEXT: ret double %mul2
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
index bd01d58..c6bb1b3 100644
--- a/test/Transforms/InstCombine/fold-phi.ll
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -17,23 +17,23 @@ end:
   ret float %add5
 }
 
-; CHECK: fold_phi
-define float @fold_phi(float %a) nounwind {
+; CHECK-LABEL: @pr21377(
+define void @pr21377(i32) {
 entry:
-  br label %for.body
-
-for.body:
-; CHECK: phi float
-; CHECK-NEXT: br i1 undef
-  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
-  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
-  br i1 undef, label %bb0, label %end
-
-; CHECK: bb0:
-bb0:
-; CHECK: fadd float
-  br label %for.body
-
-end:
-  ret float %add5
+  br label %while.body
+
+while.body:                                       ; preds = %if.end, %entry
+  %phi1 = phi i64 [ undef, %entry ], [ %or2, %if.end ]
+  %zext = zext i32 %0 to i64
+  br i1 undef, label %if.end, label %if.else
+
+if.else:                                          ; preds = %while.body
+  %or1 = or i64 %phi1, %zext
+  %and = and i64 %or1, 4294967295
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %while.body
+  %phi2 = phi i64 [ %and, %if.else ], [ undef, %while.body ]
+  %or2 = or i64 %phi2, %zext
+  br label %while.body
 }
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 9be66fd..ac03402 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -53,3 +53,23 @@ define half @test5(float %a, float %b, float %c) {
 }
 
 declare float @llvm.fabs.f32(float) nounwind readonly
+
+define <1 x float> @test6(<1 x double> %V) {
+  %frem = frem <1 x double> %V, %V
+  %trunc = fptrunc <1 x double> %frem to <1 x float>
+  ret <1 x float> %trunc
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %[[frem:.*]]  = frem <1 x double> %V, %V
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc <1 x double> %[[frem]] to <1 x float>
+; CHECK-NEXT: ret <1 x float> %trunc
+}
+
+define float @test7(double %V) {
+  %frem = frem double %V, 1.000000e+00
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+; CHECK-LABEL: @test7
+; CHECK-NEXT: %[[frem:.*]]  = frem double %V, 1.000000e+00
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
+; CHECK-NEXT: ret float %trunc
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 3240c6d..bb46662 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -6,6 +6,7 @@ target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 %pair = type { i32, i32 }
 %struct.B = type { double }
 %struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
 
 
 @Global = constant [10 x i8] c"helloworld"
@@ -580,6 +581,16 @@ define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1
   ret i32 addrspace(1)* %C
 }
 
+define i32 addrspace(1)* @test33_addrspacecast(%struct.Key* %A) {
+; CHECK-LABEL: @test33_addrspacecast(
+; CHECK: %C = getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+; CHECK-NEXT: addrspacecast i32* %C to i32 addrspace(1)*
+; CHECK-NEXT: ret
+  %B = addrspacecast %struct.Key* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
@@ -692,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
 
 ; CHECK-LABEL: @test39(
 ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
-; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8
 }
 
 define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
@@ -803,6 +814,78 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
+define i8* @test42(i8* %c1, i8* %c2) {
+  %ptrtoint = ptrtoint i8* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %gep = getelementptr inbounds i8* %c2, i64 %sub
+  ret i8* %gep
+
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i8* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i8* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i8*
+; CHECK-NEXT:  ret i8* [[INTTOPTR]]
+}
+
+define i16* @test43(i16* %c1, i16* %c2) {
+  %ptrtoint = ptrtoint i16* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = ashr i64 %sub, 1
+  %gep = getelementptr inbounds i16* %c2, i64 %shr
+  ret i16* %gep
+
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i16* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i16* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i16*
+; CHECK-NEXT:  ret i16* [[INTTOPTR]]
+}
+
+define %struct.C* @test44(%struct.C* %c1, %struct.C* %c2) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %shr
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint %struct.C* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[INTTOPTR]]
+}
+
+define %struct.C* @test45(%struct.C* %c1, %struct.C** %c2) {
+  %ptrtoint1 = ptrtoint %struct.C* %c1 to i64
+  %ptrtoint2 = ptrtoint %struct.C** %c2 to i64
+  %sub = sub i64 %ptrtoint2, %ptrtoint1 ; C2 - C1
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c1, i64 %shr ; C1 + (C2 - C1)
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:  [[BITCAST:%.*]] = bitcast %struct.C** %c2 to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[BITCAST]]
+}
+
+define %struct.C* @test46(%struct.C* %c1, %struct.C* %c2, i64 %N) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %sdiv = sdiv i64 %sub, %N
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  [[PTRTOINT:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 0, [[PTRTOINT]]
+; CHECK-NEXT:  [[SDIV:%.*]] = sdiv i64 [[SUB]], %N
+; CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+; CHECK-NEXT:  ret %struct.C* [[GEP]]
+}
+
 define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
index d5d8cbc..faae201 100644
--- a/test/Transforms/InstCombine/icmp-logical.ll
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -150,3 +150,23 @@ define i1 @nomask_rhs(i32 %in) {
   %val = or i1 %tst1, %tst2
   ret i1 %val
 }
+
+define i1 @fold_mask_cmps_to_false(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_false
+; CHECK: ret i1 false
+  %1 = and i32 %x, 2147483647
+  %2 = icmp eq i32 %1, 0
+  %3 = icmp eq i32 %x, 2147483647
+  %4 = and i1 %3, %2
+  ret i1 %4
+}
+
+define i1 @fold_mask_cmps_to_true(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_true
+; CHECK: ret i1 true
+  %1 = and i32 %x, 2147483647
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp ne i32 %x, 2147483647
+  %4 = or i1 %3, %2
+  ret i1 %4
+}
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
new file mode 100644
index 0000000..97d231f
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These should be InstSimplify checks, but most of the code
+; is currently only in InstCombine.  TODO: move supporting code
+
+; Definitely out of range
+define i1 @test_nonzero(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero
+; CHECK: ret i1 true
+  %val = load i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+define i1 @test_nonzero2(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero2
+; CHECK: ret i1 false
+  %val = load i32* %arg, !range !0
+  %rval = icmp eq i32 %val, 0
+  ret i1 %rval
+}
+
+; Potentially in range
+define i1 @test_nonzero3(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero3
+; Check that this does not trigger - it wouldn't be legal
+; CHECK: icmp
+  %val = load i32* %arg, !range !1
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Definitely in range
+define i1 @test_nonzero4(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero4
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ne i8 %val, 0
+  ret i1 %rval
+}
+
+define i1 @test_nonzero5(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero5
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ugt i8 %val, 0
+  ret i1 %rval
+}
+
+; Cheaper checks (most values in range meet requirements)
+define i1 @test_nonzero6(i8* %argw) {
+; CHECK-LABEL: test_nonzero6
+; CHECK: icmp ne i8 %val, 0
+  %val = load i8* %argw, !range !3
+  %rval = icmp sgt i8 %val, 0
+  ret i1 %rval
+}
+
+
+!0 = metadata !{i32 1, i32 6} 
+!1 = metadata !{i32 0, i32 6} 
+!2 = metadata !{i8 0, i8 1} 
+!3 = metadata !{i8 0, i8 6} 
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
new file mode 100644
index 0000000..52414b9
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -0,0 +1,378 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK-LABEL: @lshr_eq_msb_low_last_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_msb_low_second_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_msb_low_last_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_msb_low_second_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @ashr_eq_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @ashr_ne_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @lshr_eq_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @lshr_ne_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_ashr_eq_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_ashr_ne_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_lshr_eq_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp eq i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_lshr_ne_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp ne i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_ashr_eq(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_ashr_ne(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @exact_lshr_eq(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @exact_lshr_ne(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @nonexact_ashr_eq(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @nonexact_ashr_ne(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @nonexact_lshr_eq(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @nonexact_lshr_ne(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
+}
+
+; Don't try to fold the entire body of function @PR20945 into a
+; single `ret i1 true` statement.
+; If %B is equal to 1, then this function would return false.
+; As a consequence, the instruction combiner is not allowed to fold %cmp
+; to 'true'. Instead, it should replace %cmp with a simpler comparison
+; between %B and 1.
+
+; CHECK-LABEL: @PR20945(
+; CHECK: icmp ne i32 %B, 1
+define i1 @PR20945(i32 %B) {
+  %shr = ashr i32 -9, %B
+  %cmp = icmp ne i32 %shr, -5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @PR21222
+; CHECK: icmp eq i32 %B, 6
+define i1 @PR21222(i32 %B) {
+  %shr = ashr i32 -93, %B
+  %cmp = icmp eq i32 %shr, -2
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 26e144f..279d86d 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1148,22 +1148,6 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_eq_31(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_eq_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp eq i32 %shl, 31
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ne_31(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ne_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ne i32 %shl, 31
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_30(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1209,22 +1193,6 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ugt i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ule i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %V, 31
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1424,3 +1392,133 @@ define i1 @icmp_neg_cst_slt(i32 %a) {
   %2 = icmp slt i32 %1, -10
   ret i1 %2
 }
+
+; CHECK-LABEL: @icmp_and_or_lshr
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw i32 1, %y
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[SHL]], 1
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[OR]], %x
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
+  %shf = lshr i32 %x, %y
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @icmp_and_or_lshr_cst
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 3
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr_cst(i32 %x) {
+  %shf = lshr i32 %x, 1
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 29
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
+ %shl = shl i32 4, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) {
+ %shl = shl i32 -2, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_positive(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_negative(i32 %a) {
+ %shl = shl i32 -50, %a
+ %cmp = icmp eq i32 %shl, -50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_1(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 25
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 1
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_2(i32 %a) {
+ %shl = shl i32 25, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_3(i32 %a) {
+ %shl = shl i32 26, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sgt_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -1
+define i1 @icmp_sgt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sgt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sge_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -2
+define i1 @icmp_sge_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sge i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_slt_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, -1
+define i1 @icmp_slt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp slt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sle_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, 0
+define i1 @icmp_sle_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sle i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_cmpxchg_strong
+; CHECK-NEXT: %[[xchg:.*]] = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+; CHECK-NEXT: %[[icmp:.*]] = extractvalue { i32, i1 } %[[xchg]], 1
+; CHECK-NEXT: ret i1 %[[icmp]]
+define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
+  %xchg = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+  %xtrc = extractvalue { i32, i1 } %xchg, 0
+  %icmp = icmp eq i32 %xtrc, %old_val
+  ret i1 %icmp
+}
diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll
deleted file mode 100644
index fd6339c..0000000
--- a/test/Transforms/InstCombine/load-addrspace-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-n8:16:32:64"
-
-define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
-; CHECK-LABEL: @pointer_to_addrspace_pointer(
-; CHECK: load
-; CHECK: addrspacecast
-  %y = bitcast i32 addrspace(1)** %x to i32**
-  %z = load i32** %y
-  ret i32* %z
-}
-
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index d11e08e..b4b7558 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -1,6 +1,8 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
 ; This test makes sure that these instructions are properly eliminated.
-;
-; RUN: opt < %s -instcombine -S | not grep load
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @X = constant i32 42		; <i32*> [#uses=2]
 @X2 = constant i32 47		; <i32*> [#uses=1]
@@ -10,47 +12,63 @@
 @GLOBAL = internal constant [4 x i32] zeroinitializer
 
 
+; CHECK-LABEL: @test1(
+; CHECK-NOT: load
 define i32 @test1() {
 	%B = load i32* @X		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test2(
+; CHECK-NOT: load
 define float @test2() {
 	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
 	%B = load float* %A		; <float> [#uses=1]
 	ret float %B
 }
 
+; CHECK-LABEL: @test3(
+; CHECK-NOT: load
 define i32 @test3() {
 	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
 	%B = load i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test4(
+; CHECK-NOT: load
 define i32 @test4() {
 	%A = getelementptr [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0		; <i32*> [#uses=1]
 	%B = load i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK-LABEL: @test5(
+; CHECK-NOT: load
 define i32 @test5(i1 %C) {
 	%Y = select i1 %C, i32* @X, i32* @X2		; <i32*> [#uses=1]
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
 
+; CHECK-LABEL: @test7(
+; CHECK-NOT: load
 define i32 @test7(i32 %X) {
 	%V = getelementptr i32* null, i32 %X		; <i32*> [#uses=1]
 	%R = load i32* %V		; <i32> [#uses=1]
 	ret i32 %R
 }
 
+; CHECK-LABEL: @test8(
+; CHECK-NOT: load
 define i32 @test8(i32* %P) {
 	store i32 1, i32* %P
 	%X = load i32* %P		; <i32> [#uses=1]
 	ret i32 %X
 }
 
+; CHECK-LABEL: @test9(
+; CHECK-NOT: load
 define i32 @test9(i32* %P) {
 	%X = load i32* %P		; <i32> [#uses=1]
 	%Y = load i32* %P		; <i32> [#uses=1]
@@ -58,6 +76,8 @@ define i32 @test9(i32* %P) {
 	ret i32 %Z
 }
 
+; CHECK-LABEL: @test10(
+; CHECK-NOT: load
 define i32 @test10(i1 %C.upgrd.1, i32* %P, i32* %Q) {
 	br i1 %C.upgrd.1, label %T, label %F
 T:		; preds = %0
@@ -72,6 +92,8 @@ C:		; preds = %F, %T
 	ret i32 %V
 }
 
+; CHECK-LABEL: @test11(
+; CHECK-NOT: load
 define double @test11(double* %p) {
   %t0 = getelementptr double* %p, i32 1
   store double 2.0, double* %t0
@@ -80,19 +102,51 @@ define double @test11(double* %p) {
   ret double %x
 }
 
+; CHECK-LABEL: @test12(
+; CHECK-NOT: load
 define i32 @test12(i32* %P) {
-        %A = alloca i32
-        store i32 123, i32* %A
-        ; Cast the result of the load not the source
-        %Q = bitcast i32* %A to i32*
-        %V = load i32* %Q
-        ret i32 %V
+  %A = alloca i32
+  store i32 123, i32* %A
+  ; Cast the result of the load not the source
+  %Q = bitcast i32* %A to i32*
+  %V = load i32* %Q
+  ret i32 %V
 }
 
+; CHECK-LABEL: @test13(
+; CHECK-NOT: load
 define <16 x i8> @test13(<2 x i64> %x) {
-entry:
-	%tmp = load <16 x i8> * bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
-	ret <16 x i8> %tmp
+  %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
+  ret <16 x i8> %tmp
 }
 
+define i8 @test14(i8 %x, i32 %y) {
+; This test must not have the store of %x forwarded to the load -- there is an
+; intervening store if %y. However, the intervening store occurs with a different
+; type and size and to a different pointer value. This is ensuring that none of
+; those confuse the analysis into thinking that the second store does not alias
+; the first.
+; CHECK-LABEL: @test14(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %a = alloca i32
+  %a.i8 = bitcast i32* %a to i8*
+  store i8 %x, i8* %a.i8
+  store i32 %y, i32* %a
+  %r = load i8* %a.i8
+  ret i8 %r
+}
 
+@test15_global = external global i32
+
+define i8 @test15(i8 %x, i32 %y) {
+; Same test as @test14 essentially, but using a global instead of an alloca.
+; CHECK-LABEL: @test15(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %g.i8 = bitcast i32* @test15_global to i8*
+  store i8 %x, i8* %g.i8
+  store i32 %y, i32* @test15_global
+  %r = load i8* %g.i8
+  ret i8 %r
+}
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index 2263cb2..e90bdb7 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -1,67 +1,117 @@
-; RUN: opt < %s -instcombine -S | grep ", align 16" | count 14
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:64:64:64-p2:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @x = external global <2 x i64>, align 16
 @xx = external global [13 x <2 x i64>], align 16
 
+@x.as2 = external addrspace(2) global <2 x i64>, align 16
+
+; CHECK-LABEL: @static_hem(
+; CHECK: , align 16
 define <2 x i64> @static_hem() {
-	%t = getelementptr <2 x i64>* @x, i32 7
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast() {
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(1)*
+  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast_smaller_ptr(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast_smaller_ptr() {
+  %t = getelementptr <2 x i64>* @x, i32 7
+  %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(2)*
+  %tmp1 = load <2 x i64> addrspace(2)* %t.asc, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_addrspacecast_larger_ptr(
+; CHECK: , align 16
+define <2 x i64> @static_hem_addrspacecast_larger_ptr() {
+  %t = getelementptr <2 x i64> addrspace(2)* @x.as2, i32 7
+  %t.asc = addrspacecast <2 x i64> addrspace(2)* %t to <2 x i64> addrspace(1)*
+  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @hem(
+; CHECK: , align 16
 define <2 x i64> @hem(i32 %i) {
-	%t = getelementptr <2 x i64>* @x, i32 %i
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr <2 x i64>* @x, i32 %i
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @hem_2d(
+; CHECK: , align 16
 define <2 x i64> @hem_2d(i32 %i, i32 %j) {
-	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @foo(
+; CHECK: , align 16
 define <2 x i64> @foo() {
-	%tmp1 = load <2 x i64>* @x, align 1
-	ret <2 x i64> %tmp1
+  %tmp1 = load <2 x i64>* @x, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @bar(
+; CHECK: , align 16
+; CHECK: , align 16
 define <2 x i64> @bar() {
-	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t)
-	%tmp1 = load <2 x i64>* %t, align 1
-	ret <2 x i64> %tmp1
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  %tmp1 = load <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
 }
 
+; CHECK-LABEL: @static_hem_store(
+; CHECK: , align 16
 define void @static_hem_store(<2 x i64> %y) {
-	%t = getelementptr <2 x i64>* @x, i32 7
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr <2 x i64>* @x, i32 7
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @hem_store(
+; CHECK: , align 16
 define void @hem_store(i32 %i, <2 x i64> %y) {
-	%t = getelementptr <2 x i64>* @x, i32 %i
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr <2 x i64>* @x, i32 %i
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @hem_2d_store(
+; CHECK: , align 16
 define void @hem_2d_store(i32 %i, i32 %j, <2 x i64> %y) {
-	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
+; CHECK-LABEL: @foo_store(
+; CHECK: , align 16
 define void @foo_store(<2 x i64> %y) {
-	store <2 x i64> %y, <2 x i64>* @x, align 1
-        ret void
+  store <2 x i64> %y, <2 x i64>* @x, align 1
+  ret void
 }
 
+; CHECK-LABEL: @bar_store(
+; CHECK: , align 16
 define void @bar_store(<2 x i64> %y) {
-	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t)
-	store <2 x i64> %y, <2 x i64>* %t, align 1
-        ret void
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
 }
 
 declare void @kip(<2 x i64>* %t)
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
new file mode 100644
index 0000000..863edae
--- /dev/null
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -0,0 +1,86 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test_load_cast_combine_tbaa(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
+; CHECK-LABEL: @test_load_cast_combine_tbaa(
+; CHECK: load i32* %{{.*}}, !tbaa !0
+entry:
+  %l = load float* %ptr, !tbaa !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_noalias(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
+; CHECK-LABEL: @test_load_cast_combine_noalias(
+; CHECK: load i32* %{{.*}}, !alias.scope !2, !noalias !1
+entry:
+  %l = load float* %ptr, !alias.scope !2, !noalias !1
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define float @test_load_cast_combine_range(i32* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
+; would be nice to preserve or update it somehow but this is hard when moving
+; between types.
+; CHECK-LABEL: @test_load_cast_combine_range(
+; CHECK: load float* %{{.*}}
+; CHECK-NOT: !range
+; CHECK: ret float
+entry:
+  %l = load i32* %ptr, !range !5
+  %c = bitcast i32 %l to float
+  ret float %c
+}
+
+define i32 @test_load_cast_combine_invariant(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
+; CHECK-LABEL: @test_load_cast_combine_invariant(
+; CHECK: load i32* %{{.*}}, !invariant.load !3
+entry:
+  %l = load float* %ptr, !invariant.load !3
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_nontemporal(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_nontemporal(
+; CHECK: load i32* %{{.*}}, !nontemporal !4
+entry:
+  %l = load float* %ptr, !nontemporal !4
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_loop(
+; CHECK: load i32* %{{.*}}, !llvm.mem.parallel_loop_access !1
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %src.gep = getelementptr inbounds float* %src, i32 %i
+  %dst.gep = getelementptr inbounds i32* %dst, i32 %i
+  %l = load float* %src.gep, !llvm.mem.parallel_loop_access !1
+  %c = bitcast float %l to i32
+  store i32 %c, i32* %dst.gep
+  %i.next = add i32 %i, 1
+  %cmp = icmp slt i32 %i.next, %n
+  br i1 %cmp, label %loop, label %exit, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+!0 = metadata !{ metadata !1, metadata !1, i64 0 }
+!1 = metadata !{ metadata !1 }
+!2 = metadata !{ metadata !2, metadata !1 }
+!3 = metadata !{ }
+!4 = metadata !{ i32 1 }
+!5 = metadata !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 2085206..ed25e4e 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -144,3 +144,26 @@ lpad.i:                                           ; preds = %entry
   call void @_ZdlPvRKSt9nothrow_t(i8* %call.i, i8* %nt) builtin nounwind
   resume { i8*, i32 } %0
 }
+
+declare i8* @_Znwm(i64) nobuiltin
+declare void @_ZdlPvm(i8*, i64) nobuiltin
+declare i8* @_Znwj(i32) nobuiltin
+declare void @_ZdlPvj(i8*, i32) nobuiltin
+declare i8* @_Znam(i64) nobuiltin
+declare void @_ZdaPvm(i8*, i64) nobuiltin
+declare i8* @_Znaj(i32) nobuiltin
+declare void @_ZdaPvj(i8*, i32) nobuiltin
+
+; CHECK-LABEL: @test8(
+define void @test8() {
+  ; CHECK-NOT: call
+  %nwm = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPvm(i8* %nwm, i64 32) builtin
+  %nwj = call i8* @_Znwj(i32 32) builtin
+  call void @_ZdlPvj(i8* %nwj, i32 32) builtin
+  %nam = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPvm(i8* %nam, i64 32) builtin
+  %naj = call i8* @_Znaj(i32 32) builtin
+  call void @_ZdaPvj(i8* %naj, i32 32) builtin
+  ret void
+}
diff --git a/test/Transforms/InstCombine/maxnum.ll b/test/Transforms/InstCombine/maxnum.ll
new file mode 100644
index 0000000..585d9f4
--- /dev/null
+++ b/test/Transforms/InstCombine/maxnum.ll
@@ -0,0 +1,222 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.maxnum.f32(float, float) #0
+declare float @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.maxnum.f64(double, double) #0
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
+
+; CHECK-LABEL: @constant_fold_maxnum_f32
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32() #0 {
+  %x = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_inv
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_inv() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan1() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_maxnum_f32_nan_nan() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 2.000000e+00, float 8.000000e+00, float 1.000000e+01, float 9.000000e+00>
+define <4 x float> @constant_fold_maxnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64() #0 {
+  %x = call double @llvm.maxnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan0() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan1() #0 {
+  %x = call double @llvm.maxnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_maxnum_f64_nan_nan() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_maxnum_f32
+; CHECK: call float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_maxnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_maxnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @maxnum_x_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_x_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_y_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_y_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_z_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @maxnum_z_maxnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_maxnum_x_y_z
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @maxnum_maxnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum4
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @maxnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %w) #0
+  %c = call float @llvm.maxnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_inf_val
+; CHECK-NEXT: ret float 0x7FF0000000000000
+define float @fold_maxnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_neginf_val
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float 0xFFF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_maxnum_f32_neginf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 65349c6..d960693 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -37,7 +37,7 @@ define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
   ret i32 %ret
 ; CHECK: ret i32 [[RET]]
 }
diff --git a/test/Transforms/InstCombine/minnum.ll b/test/Transforms/InstCombine/minnum.ll
new file mode 100644
index 0000000..57d6e16
--- /dev/null
+++ b/test/Transforms/InstCombine/minnum.ll
@@ -0,0 +1,244 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.minnum.f32(float, float) #0
+declare float @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.minnum.f64(double, double) #0
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
+
+declare float @llvm.fmax.f32(float, float) #0
+
+; CHECK-LABEL: @constant_fold_minnum_f32
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32() #0 {
+  %x = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_inv
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32_inv() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan0() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan1() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_minnum_f32_nan_nan() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 5.000000e+00>
+define <4 x float> @constant_fold_minnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_minnum_f64() #0 {
+  %x = call double @llvm.minnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan0() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan1() #0 {
+  %x = call double @llvm.minnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_minnum_f64_nan_nan() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_minnum_f32
+; CHECK: call float @llvm.minnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_minnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_minnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @minnum_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_y_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_y_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_z_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @minnum_z_minnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_minnum_x_y_z
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @minnum_minnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum4
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @minnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %w) #0
+  %c = call float @llvm.minnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @minnum_x_fmax_x_y
+; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: ret float
+define float @minnum_x_fmax_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.fmax.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @fmax_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: call float @llvm.fmax.f32
+; CHECK-NEXT: ret float
+define float @fmax_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.fmax.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @fold_minnum_f32_inf_val
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float 0x7FF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_minnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_minf_val
+; CHECK-NEXT: ret float 0xFFF0000000000000
+define float @fold_minnum_f32_minf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
new file mode 100644
index 0000000..7646189
--- /dev/null
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+
+; CHECK-LABEL: define i32 @positive1
+; CHECK: switch i32
+; CHECK: i32 10, label
+; CHECK: i32 100, label
+; CHECK: i32 1001, label
+
+define i32 @positive1(i64 %a) {
+entry:
+  %and = and i64 %a, 4294967295
+  switch i64 %and, label %sw.default [
+    i64 10, label %return
+    i64 100, label %sw.bb1
+    i64 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; CHECK-LABEL: define i32 @negative1
+; CHECK: switch i32
+; CHECK: i32 -10, label
+; CHECK: i32 -100, label
+; CHECK: i32 -1001, label
+
+define i32 @negative1(i64 %a) {
+entry:
+  %or = or i64 %a, -4294967296
+  switch i64 %or, label %sw.default [
+    i64 -10, label %return
+    i64 -100, label %sw.bb1
+    i64 -1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure truncating a constant int larger than 64-bit doesn't trigger an
+; assertion.
+
+; CHECK-LABEL: define i32 @trunc72to68
+; CHECK: switch i68
+; CHECK: i68 10, label
+; CHECK: i68 100, label
+; CHECK: i68 1001, label
+
+define i32 @trunc72to68(i72 %a) {
+entry:
+  %and = and i72 %a, 295147905179352825855
+  switch i72 %and, label %sw.default [
+    i72 10, label %return
+    i72 100, label %sw.bb1
+    i72 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
new file mode 100644
index 0000000..cec5297
--- /dev/null
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; PR21403: http://llvm.org/bugs/show_bug.cgi?id=21403
+; When the call to sqrtf is replaced by an intrinsic call to fabs,
+; it should not cause a problem in CGSCC. 
+
+define float @bar(float %f) #0 {
+  %mul = fmul fast float %f, %f
+  %call1 = call float @sqrtf(float %mul) #0
+  ret float %call1
+
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: ret float
+}
+
+declare float @sqrtf(float) #0
+
+attributes #0 = { readnone "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 9cb6884..a971c91 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
   ret i16 %1
 }
 
-@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
 define i32 @foo_alias() nounwind {
   %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
   ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 6459032..1285b1c 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,7 +256,7 @@ return:
   ret i32 7
 }
 
-@globalalias = alias internal [60 x i8]* @a
+@globalalias = internal alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: ret i32 60
@@ -266,7 +266,7 @@ define i32 @test18() {
   ret i32 %1
 }
 
-@globalalias2 = alias weak [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test19(
 ; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index cec36f1..670e3e0 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -92,3 +92,92 @@ define i32 @test9(i32 %x, i32 %y) nounwind {
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
 }
+
+define i32 @test10(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i32 -1
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 -1
+}
+
+; (x | y) & ((~x) ^ y) -> (x & y)
+define i32 @test12(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %and = and i32 %or, %xor
+ ret i32 %and
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((~x) ^ y) & (x | y) -> (x & y)
+define i32 @test13(i32 %x, i32 %y) {
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %or = or i32 %x, %y
+ %and = and i32 %xor, %or
+ ret i32 %and
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((x | y) ^ (x ^ y)) -> (x & y)
+define i32 @test15(i32 %x, i32 %y) {
+  %1 = xor i32 %y, %x
+  %2 = or i32 %y, %x
+  %3 = xor i32 %2, %1
+  ret i32 %3
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %1 = and i32 %y, %x
+; CHECK-NEXT: ret i32 %1
+}
+
+; ((x | ~y) ^ (~x | y)) -> x ^ y
+define i32 @test16(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %noty
+  %or2 = or i32 %notx, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+; ((x & ~y) ^ (~x & y)) -> x ^ y
+define i32 @test17(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %and1 = and i32 %x, %noty
+  %and2 = and i32 %notx, %y
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+define i32 @test18(i32 %a, i32 %b) {
+  %or = xor i32 %a, %b
+  %and1 = and i32 %or, 1
+  %and2 = and i32 %b, -2
+  %xor = or i32 %and1, %and2
+  ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 1cd897e..23dad21 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -408,3 +408,101 @@ define i32 @test38(i32* %xp, i32 %y) {
   %or = or i32 %x, %sext
   ret i32 %or
 }
+
+define i32 @test39(i32 %a, i32 %b) {
+; CHECK-LABEL: test39(
+; CHECK-NEXT: %or = or i32 %a, %b
+ %xor = xor i32 %a, -1
+ %and = and i32 %xor, %b
+ %or = or i32 %and, %a
+ ret i32 %or
+}
+
+define i32 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: test40(
+; CHECK-NEXT:   %1 = xor i32 %a, -1 
+; CHECK-NEXT: %or = or i32 %1, %b
+ %and = and i32 %a, %b
+ %xor = xor i32 %a, -1
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test41(i32 %a, i32 %b) {
+; CHECK-LABEL: test41(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %and = and i32 %a, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test42(i32 %a, i32 %b) {
+; CHECK-LABEL: test42(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %and = and i32 %a, %b
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test43(i32 %a, i32 %b) {
+; CHECK-LABEL: test43(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %xor = xor i32 %a, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test44(i32 %a, i32 %b) {
+; CHECK-LABEL: test44(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %xor = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test45(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: test45(
+; CHECK-NEXT: %1 = and i32 %x, %z
+; CHECK-NEXT: %or1 = or i32 %1, %y
+; CHECK-NEXT: ret i32 %or1
+  %or = or i32 %y, %z
+  %and = and i32 %x, %or
+  %or1 = or i32 %and, %y
+  ret i32 %or1
+}
+
+define i1 @test46(i8 signext %c)  {
+  %c.off = add i8 %c, -97
+  %cmp1 = icmp ult i8 %c.off, 26
+  %c.off17 = add i8 %c, -65
+  %cmp2 = icmp ult i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 26
+}
+
+define i1 @test47(i8 signext %c)  {
+  %c.off = add i8 %c, -65
+  %cmp1 = icmp ule i8 %c.off, 26
+  %c.off17 = add i8 %c, -97
+  %cmp2 = icmp ule i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 27
+}
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
index cbb2f5f..6d8d40b 100644
--- a/test/Transforms/InstCombine/overflow-mul.ll
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -173,3 +173,16 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
   %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
   ret <4 x i32> %vcgez.i
 }
+
+@pr21445_data = external global i32
+define i1 @pr21445(i8 %a) {
+; CHECK-LABEL: @pr21445(
+; CHECK-NEXT:  %[[umul:.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 ptrtoint (i32* @pr21445_data to i8))
+; CHECK-NEXT:  %[[cmp:.*]] = extractvalue { i8, i1 } %[[umul]], 1
+; CHECK-NEXT:  ret i1 %[[cmp]]
+  %ext = zext i8 %a to i32
+  %mul = mul i32 %ext, zext (i8 ptrtoint (i32* @pr21445_data to i8) to i32)
+  %and = and i32 %mul, 255
+  %cmp = icmp ne i32 %mul, %and
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index d34600f..614387a 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -6,7 +6,6 @@ entry:
 
 for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
   %phi3 = sub <1 x i32> zeroinitializer, %local
   br label %cond.end
 
@@ -19,6 +18,7 @@ cond.end:
 
 cond.end47:
   %sum = add <1 x i32> %cond, <i32 92>
+; CHECK: sub <1 x i32> <i32 -92>, %cond
   %phi2 = sub <1 x i32> zeroinitializer, %sum
   br label %for.cond
 }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index d625f3b..6cf9f0f 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1,7 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; This test makes sure that these instructions are properly eliminated.
 ; PR1822
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 
 define i32 @test1(i32 %A, i32 %B) {
         %C = select i1 false, i32 %A, i32 %B            
@@ -916,9 +917,9 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i64 %x, 1
-; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = trunc i64 [[AND]] to i32
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 %x to i32
+; CHECK-NEXT: [[AND:%.+]] = and i32 [[TRUNC]], 1
+; CHECK-NEXT: [[OR:%.+]] = or i32 [[XOR]], %y
 ; CHECK-NEXT: ret i32 [[OR]]
 define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
   %and = and i64 %x, 1
@@ -957,11 +958,11 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 27
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
-; CHECK-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i32 [[AND]] to i8
-; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i8 [[TRUNC]], 8
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i8 [[XOR]], %y
+; CHECK-NEXT: [[LSHR:%.+]] = lshr i32 %x, 27
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i32 [[LSHR]] to i8
+; CHECK-NEXT: [[AND:%.+]] = and i8 [[TRUNC]], 8
+; CHECK-NEXT: [[XOR:%.+]] = xor i8 [[AND]], 8
+; CHECK-NEXT: [[OR:%.+]] = or i8 [[XOR]], %y
 ; CHECK-NEXT: ret i8 [[OR]]
 define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
   %and = and i32 %x, 1073741824
@@ -1108,10 +1109,11 @@ define i32 @test65(i64 %x) {
   ret i32 %3
 
 ; CHECK-LABEL: @test65(
-; CHECK: and i64 %x, 16
-; CHECK: trunc i64 %1 to i32
-; CHECK: lshr exact i32 %2, 3
-; CHECK: xor i32 %3, 42
+; CHECK: %[[TRUNC:.*]] = trunc i64 %x to i32
+; CHECK: %[[LSHR:.*]] = lshr i32 %[[TRUNC]], 3
+; CHECK: %[[AND:.*]] = and i32 %[[LSHR]], 2
+; CHECK: %[[XOR:.*]] = xor i32 %[[AND]], 42
+; CHECK: ret i32 %[[XOR]]
 }
 
 define i32 @test66(i64 %x) {
@@ -1236,3 +1238,150 @@ define i32 @test75(i32 %x) {
 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
 ; CHECK-NEXT: ret i32 [[SEL]]
 }
+
+@under_aligned = external global i32, align 1
+
+define i32 @test76(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test76(
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
+; CHECK: load i32* %[[P]]
+
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* @under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_memory(i32*)
+
+define i32 @test77(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test77(
+; CHECK: %[[A:.*]] = alloca i32, align 1
+; CHECK: call void @scribble_on_memory(i32* %[[A]])
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
+; CHECK: load i32* %[[P]]
+
+  %under_aligned = alloca i32, align 1
+  call void @scribble_on_memory(i32* %under_aligned)
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* %under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK:         %[[V1:.*]] = load i32* %x
+; CHECK-NEXT:    %[[V2:.*]] = load i32* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT:    ret i32 %[[S]]
+entry:
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test79(
+; CHECK:         %[[V1:.*]] = load float* %x
+; CHECK-NEXT:    %[[V2:.*]] = load float* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
+; CHECK-NEXT:    ret float %[[S]]
+entry:
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  store i32 0, i32* %x1
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test80(i1 %flag) {
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+; CHECK-LABEL: @test80(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    ret i32 %[[V]]
+entry:
+  %x = alloca i32
+  %y = alloca i32
+  call void @scribble_on_memory(i32* %x)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load i32* %x
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test81(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test81(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast i32 %[[V]] to float
+; CHECK-NEXT:    ret float %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_memory(i32* %x1)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load i32* %x1
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test82(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test82(
+; CHECK:         %[[X:.*]] = alloca float
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK-NEXT:    %[[X1:.*]] = bitcast float* %[[X]] to i32*
+; CHECK-NEXT:    %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
+; CHECK:         %[[V:.*]] = load float* %[[X]]
+; CHECK-NEXT:    store float %[[V]], float* %[[Y1]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast float %[[V]] to i32
+; CHECK-NEXT:    ret i32 %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_memory(i32* %x1)
+  call void @scribble_on_memory(i32* %y)
+  %tmp = load float* %x
+  store float %tmp, float* %y1
+  %p = select i1 %flag, i32* %x1, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
index fc58ffc..9bbd7db 100644
--- a/test/Transforms/InstCombine/strcmp-1.ll
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
index df30dd1..49b0955 100644
--- a/test/Transforms/InstCombine/strncmp-1.ll
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
@@ -73,7 +73,7 @@ define i32 @test6(i8* %str1, i8* %str2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
 ; CHECK: ret i32 [[RET]]
 
   %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index e7aff00..3a24074 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -32,7 +32,7 @@ define i32 @test3(i32 %x) nounwind {
 
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and i32 %x, 31
-; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: sub nsw i32 73, %and
 ; CHECK-NEXT: ret
 }
 
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 67b7c49..0e421f7 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -142,8 +142,9 @@ define i32 @test15(i32 %A, i32 %B) {
 	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A
-; CHECK: ret i32 %D
+; CHECK:      %[[sub:.*]] = sub i32 0, %A
+; CHECK-NEXT: %[[rem:.*]] = srem i32 %B, %[[sub]]
+; CHECK: ret i32 %[[rem]]
 }
 
 define i32 @test16(i32 %A) {
@@ -464,3 +465,88 @@ define i32 @test38(i32 %A) {
 ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
 ; CHECK-NEXT: ret i32 [[SEXT]]
 }
+
+define i32 @test39(i32 %A, i32 %x) {
+  %B = sub i32 0, %A
+  %C = sub nsw i32 %x, %B
+  ret i32 %C
+; CHECK-LABEL: @test39(
+; CHECK: %C = add i32 %x, %A
+; CHECK: ret i32 %C
+}
+
+define i16 @test40(i16 %a, i16 %b) {
+  %ashr = ashr i16 %a, 1
+  %ashr1 = ashr i16 %b, 1
+  %sub = sub i16 %ashr, %ashr1
+  ret i16 %sub
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i16 %a, 1
+; CHECK-NEXT: [[ASHR1:%.*]] = ashr i16 %b, 1
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i16 [[ASHR]], [[ASHR1]]
+; CHECK: ret i16 [[RET]]
+}
+
+define i32 @test41(i16 %a, i16 %b) {
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %sub = sub i32 %conv, %conv1
+  ret i32 %sub
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %a to i32
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i16 %b to i32
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i32 [[SEXT]], [[SEXT1]]
+; CHECK: ret i32 [[RET]]
+}
+
+define i4 @test42(i4 %x, i4 %y) {
+  %a = and i4 %y, 7
+  %b = and i4 %x, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[AND1:%.*]] = and i4 %x, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i4 [[AND]], [[AND1]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i4 @test43(i4 %x, i4 %y) {
+  %a = or i4 %x, -8
+  %b = and i4 %y, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[OR:%.*]] = or i4 %x, -8
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nuw i4 [[OR]], [[AND]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i32 @test44(i32 %x) {
+  %sub = sub nsw i32 %x, 32768
+  ret i32 %sub
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 %x, -32768
+; CHECK: ret i32 [[ADD]]
+}
+
+define i32 @test45(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %xor = xor i32 %x, %y
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: %sub = and i32 %x, %y
+; CHECK: ret i32 %sub
+}
+
+define i32 @test46(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %sub = sub i32 %or, %x
+ ret i32 %sub
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %sub = and i32 %y, %x.not
+; CHECK: ret i32 %sub
+}
diff --git a/test/Transforms/InstCombine/vsx-unaligned.ll b/test/Transforms/InstCombine/vsx-unaligned.ll
new file mode 100644
index 0000000..26e0426
--- /dev/null
+++ b/test/Transforms/InstCombine/vsx-unaligned.ll
@@ -0,0 +1,44 @@
+; Verify that we can create unaligned loads and stores from VSX intrinsics.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = common global <4 x float> zeroinitializer, align 1
+@res_vf = common global <4 x float> zeroinitializer, align 1
+@vd = common global <2 x double> zeroinitializer, align 1
+@res_vd = common global <2 x double> zeroinitializer, align 1
+
+define void @test1() {
+entry:
+  %t1 = alloca <4 x float>*, align 8
+  %t2 = alloca <2 x double>*, align 8
+  store <4 x float>* @vf, <4 x float>** %t1, align 8
+  %0 = load <4 x float>** %t1, align 8
+  %1 = bitcast <4 x float>* %0 to i8*
+  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
+  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
+  %3 = load <4 x float>** %t1, align 8
+  %4 = bitcast <4 x float>* %3 to i8*
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
+  store <2 x double>* @vd, <2 x double>** %t2, align 8
+  %5 = load <2 x double>** %t2, align 8
+  %6 = bitcast <2 x double>* %5 to i8*
+  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
+  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
+  %8 = load <2 x double>** %t2, align 8
+  %9 = bitcast <2 x double>* %8 to i8*
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
+  ret void
+}
+
+; CHECK-LABEL: @test1
+; CHECK: %0 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
+; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
+; CHECK: %1 = load <2 x double>* @vd, align 1
+; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
+
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index d153e03..797c8f3 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -82,3 +82,93 @@ define i32 @test6(i32 %x) {
 ; CHECK: lshr i32 %x, 16
 ; CHECK: ret
 }
+
+
+; (A | B) ^ (~A) -> (A | ~B)
+define i32 @test7(i32 %a, i32 %b) {
+ %or = or i32 %a, %b
+ %neg = xor i32 %a, -1
+ %xor = xor i32 %or, %neg
+ ret i32 %xor
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (~A) ^ (A | B) -> (A | ~B)
+define i32 @test8(i32 %a, i32 %b) {
+ %neg = xor i32 %a, -1
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (A & B) ^ (A ^ B) -> (A | B)
+define i32 @test9(i32 %b, i32 %c) {
+ %and = and i32 %b, %c
+ %xor = xor i32 %b, %c
+ %xor2 = xor i32 %and, %xor
+ ret i32 %xor2
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10(i32 %b, i32 %c) {
+ %xor = xor i32 %b, %c
+ %and = and i32 %b, %c
+ %xor2 = xor i32 %xor, %and
+ ret i32 %xor2
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %and, %nega
+ ret i32 %xor
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+ %nega = xor i32 %a, -1
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %xor = xor i32 %nega, %and
+ ret i32 %xor
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+; (A ^ C) ^ (A | B) -> ((~A) & B) ^ C
+define i32 @test14(i32 %a, i32 %b, i32 %c) {
+ %neg = xor i32 %a, %c
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: %[[not:.*]] = xor i32 %a, -1
+; CHECK-NEXT: %[[and:.*]] = and i32 %[[not]], %b
+; CHECK-NEXT: %[[xor:.*]] = xor i32 %[[and]], %c
+; CHECK-NEXT: ret i32 %[[xor]]
+}
diff --git a/test/Transforms/InstMerge/ld_hoist1.ll b/test/Transforms/InstMerge/ld_hoist1.ll
new file mode 100644
index 0000000..715f1b8
--- /dev/null
+++ b/test/Transforms/InstMerge/ld_hoist1.ll
@@ -0,0 +1,64 @@
+; Test load hoist
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; Function Attrs: nounwind uwtable
+define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger)  {
+entry:
+  %cmp11 = icmp eq i32 %size, 0
+  br i1 %cmp11, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = add i32 %size, -1
+  br label %for.body
+
+; CHECK-LABEL: for.body
+; CHECK: load
+; CHECK:  %2 = getelementptr inbounds i32* %in, i64 %indvars.iv
+; CHECK:  %3 = load i32* %2, align 4
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32* %trigger, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %1, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+; CHECK-LABEL: if.then
+if.then:                                          ; preds = %for.body
+; This load should be hoisted
+  %arrayidx3 = getelementptr inbounds i32* %in, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %conv = sitofp i32 %2 to float
+  %add = fadd float %conv, 5.000000e-01
+  %arrayidx5 = getelementptr inbounds float* %out, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %arrayidx7 = getelementptr inbounds float* %out, i64 %indvars.iv
+  %3 = load float* %arrayidx7, align 4
+  %div = fdiv float %3, 3.000000e+00
+  store float %div, float* %arrayidx7, align 4
+; This load should be hoisted in spite of store 
+  %arrayidx9 = getelementptr inbounds i32* %in, i64 %indvars.iv
+  %4 = load i32* %arrayidx9, align 4
+  %conv10 = sitofp i32 %4 to float
+  %add13 = fadd float %div, %conv10
+  store float %add13, float* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %0
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %entry, %for.cond.for.end_crit_edge
+  ret float* %out
+}
+
diff --git a/test/Transforms/InstMerge/ld_hoist_st_sink.ll b/test/Transforms/InstMerge/ld_hoist_st_sink.ll
new file mode 100644
index 0000000..978160a
--- /dev/null
+++ b/test/Transforms/InstMerge/ld_hoist_st_sink.ll
@@ -0,0 +1,84 @@
+; Tests to make sure that loads and stores in a diamond get merged
+; Loads are hoisted into the header. Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 }
+%struct.arc = type { i64, i64, i64 }
+
+define i64 @foo(%struct.node* nocapture readonly %r) nounwind {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %tobool18 = icmp eq %struct.node* %node.017, null
+  br i1 %tobool18, label %while.end, label %while.body.preheader
+
+; CHECK-LABEL: while.body.preheader
+while.body.preheader:                             ; preds = %entry
+; CHECK: load
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %if.end
+  %node.020 = phi %struct.node* [ %node.0, %if.end ], [ %node.017, %while.body.preheader ]
+  %sum.019 = phi i64 [ %inc, %if.end ], [ 0, %while.body.preheader ]
+  %orientation = getelementptr inbounds %struct.node* %node.020, i64 0, i32 4
+  %0 = load i64* %orientation, align 8
+  %cmp = icmp eq i64 %0, 1
+  br i1 %cmp, label %if.then, label %if.else
+; CHECK: if.then
+if.then:                                          ; preds = %while.body
+  %a = getelementptr inbounds %struct.node* %node.020, i64 0, i32 5
+; CHECK-NOT: load %struct.arc
+  %1 = load %struct.arc** %a, align 8
+  %cost = getelementptr inbounds %struct.arc* %1, i64 0, i32 0
+; CHECK-NOT: load i64*
+  %2 = load i64* %cost, align 8
+  %pred = getelementptr inbounds %struct.node* %node.020, i64 0, i32 1
+; CHECK-NOT: load %struct.node**
+  %3 = load %struct.node** %pred, align 8
+  %p = getelementptr inbounds %struct.node* %3, i64 0, i32 6
+; CHECK-NOT: load i64*
+  %4 = load i64* %p, align 8
+  %add = add nsw i64 %4, %2
+  %p1 = getelementptr inbounds %struct.node* %node.020, i64 0, i32 6
+; CHECK-NOT: store i64
+  store i64 %add, i64* %p1, align 8
+  br label %if.end
+
+; CHECK: if.else
+if.else:                                          ; preds = %while.body
+  %pred2 = getelementptr inbounds %struct.node* %node.020, i64 0, i32 1
+; CHECK-NOT: load %struct.node**
+  %5 = load %struct.node** %pred2, align 8
+  %p3 = getelementptr inbounds %struct.node* %5, i64 0, i32 6
+; CHECK-NOT: load i64*
+  %6 = load i64* %p3, align 8
+  %a4 = getelementptr inbounds %struct.node* %node.020, i64 0, i32 5
+; CHECK-NOT: load %struct.arc**
+  %7 = load %struct.arc** %a4, align 8
+  %cost5 = getelementptr inbounds %struct.arc* %7, i64 0, i32 0
+; CHECK-NOT: load i64*
+  %8 = load i64* %cost5, align 8
+  %sub = sub nsw i64 %6, %8
+  %p6 = getelementptr inbounds %struct.node* %node.020, i64 0, i32 6
+; CHECK-NOT: store i64
+  store i64 %sub, i64* %p6, align 8
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: store
+  %inc = add nsw i64 %sum.019, 1
+  %node.0.in = getelementptr inbounds %struct.node* %node.020, i64 0, i32 2
+  %node.0 = load %struct.node** %node.0.in, align 8
+  %tobool = icmp eq %struct.node* %node.0, null
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %if.end
+  %inc.lcssa = phi i64 [ %inc, %if.end ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %sum.0.lcssa = phi i64 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
+  ret i64 %sum.0.lcssa
+}
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index c59d6c9..8ed06e8 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -20,3 +20,131 @@ define i64 @pow2b(i32 %x) {
   ret i64 %e2
 ; CHECK: ret i64 %e
 }
+
+define i32 @sub_neg_nuw(i32 %x, i32 %y) {
+; CHECK-LABEL: @sub_neg_nuw(
+  %neg = sub nuw i32 0, %y
+  %sub = sub i32 %x, %neg
+  ret i32 %sub
+; CHECK: ret i32 %x
+}
+
+define i1 @and_of_icmps0(i32 %b) {
+; CHECK-LABEL: @and_of_icmps0(
+  %1 = add i32 %b, 2
+  %2 = icmp ult i32 %1, 4
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps1(i32 %b) {
+; CHECK-LABEL: @and_of_icmps1(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp slt i32 %1, 4
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps2(i32 %b) {
+; CHECK-LABEL: @and_of_icmps2(
+  %1 = add i32 %b, 2
+  %2 = icmp ule i32 %1, 3
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps3(i32 %b) {
+; CHECK-LABEL: @and_of_icmps3(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sle i32 %1, 3
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps4(i32 %b) {
+; CHECK-LABEL: @and_of_icmps4(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ult i32 %1, 4
+  %cmp3 = icmp ugt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps5(i32 %b) {
+; CHECK-LABEL: @and_of_icmps5(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ule i32 %1, 3
+  %cmp3 = icmp ugt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @or_of_icmps0(i32 %b) {
+; CHECK-LABEL: @or_of_icmps0(
+  %1 = add i32 %b, 2
+  %2 = icmp uge i32 %1, 4
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps1(i32 %b) {
+; CHECK-LABEL: @or_of_icmps1(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sge i32 %1, 4
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps2(i32 %b) {
+; CHECK-LABEL: @or_of_icmps2(
+  %1 = add i32 %b, 2
+  %2 = icmp ugt i32 %1, 3
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps3(i32 %b) {
+; CHECK-LABEL: @or_of_icmps3(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sgt i32 %1, 3
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps4(i32 %b) {
+; CHECK-LABEL: @or_of_icmps4(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp uge i32 %1, 4
+  %cmp3 = icmp ule i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps5(i32 %b) {
+; CHECK-LABEL: @or_of_icmps5(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ugt i32 %1, 3
+  %cmp3 = icmp ule i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/assume.ll b/test/Transforms/InstSimplify/assume.ll
new file mode 100644
index 0000000..4dd0a8f
--- /dev/null
+++ b/test/Transforms/InstSimplify/assume.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+define void @test1() {
+  call void @llvm.assume(i1 1)
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: llvm.assume
+; CHECK: ret void
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 7d0cd9c..38fd747 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -333,14 +333,46 @@ define i1 @or(i32 %x) {
 ; CHECK: ret i1 false
 }
 
-define i1 @shl(i32 %x) {
-; CHECK-LABEL: @shl(
+define i1 @shl1(i32 %x) {
+; CHECK-LABEL: @shl1(
   %s = shl i32 1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
+define i1 @shl2(i32 %X) {
+; CHECK: @shl2
+  %sub = shl nsw i32 -1, %X
+  %cmp = icmp eq i32 %sub, 31
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @shl3(i32 %X) {
+; CHECK: @shl3
+  %sub = shl nuw i32 4, %X
+  %cmp = icmp eq i32 %sub, 31
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @shl4(i32 %X) {
+; CHECK: @shl4
+  %sub = shl nsw i32 -1, %X
+  %cmp = icmp sle i32 %sub, -1
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @shl5(i32 %X) {
+; CHECK: @shl5
+  %sub = shl nuw i32 4, %X
+  %cmp = icmp ugt i32 %sub, 3
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
 define i1 @lshr1(i32 %x) {
 ; CHECK-LABEL: @lshr1(
   %s = lshr i32 -1, %x
@@ -874,6 +906,21 @@ define i1 @nonnull_arg(i32* nonnull %i) {
 ; CHECK: ret i1 false
 }
 
+define i1 @nonnull_deref_arg(i32* dereferenceable(4) %i) {
+  %cmp = icmp eq i32* %i, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_deref_arg
+; CHECK: ret i1 false
+}
+
+define i1 @nonnull_deref_as_arg(i32 addrspace(1)* dereferenceable(4) %i) {
+  %cmp = icmp eq i32 addrspace(1)* %i, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_deref_as_arg
+; CHECK: icmp
+; CHECK ret
+}
+
 declare nonnull i32* @returns_nonnull_helper()
 define i1 @returns_nonnull() {
   %call = call nonnull i32* @returns_nonnull_helper()
@@ -883,6 +930,48 @@ define i1 @returns_nonnull() {
 ; CHECK: ret i1 false
 }
 
+declare dereferenceable(4) i32* @returns_nonnull_deref_helper()
+define i1 @returns_nonnull_deref() {
+  %call = call dereferenceable(4) i32* @returns_nonnull_deref_helper()
+  %cmp = icmp eq i32* %call, null
+  ret i1 %cmp
+; CHECK-LABEL: @returns_nonnull_deref
+; CHECK: ret i1 false
+}
+
+declare dereferenceable(4) i32 addrspace(1)* @returns_nonnull_deref_as_helper()
+define i1 @returns_nonnull_as_deref() {
+  %call = call dereferenceable(4) i32 addrspace(1)* @returns_nonnull_deref_as_helper()
+  %cmp = icmp eq i32 addrspace(1)* %call, null
+  ret i1 %cmp
+; CHECK-LABEL: @returns_nonnull_as_deref
+; CHECK: icmp
+; CHECK: ret
+}
+
+define i1 @nonnull_load(i32** %addr) {
+  %ptr = load i32** %addr, !nonnull !{}
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load
+; CHECK: ret i1 false
+}
+
+define i1 @nonnull_load_as_outer(i32* addrspace(1)* %addr) {
+  %ptr = load i32* addrspace(1)* %addr, !nonnull !{}
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load_as_outer
+; CHECK: ret i1 false
+}
+define i1 @nonnull_load_as_inner(i32 addrspace(1)** %addr) {
+  %ptr = load i32 addrspace(1)** %addr, !nonnull !{}
+  %cmp = icmp eq i32 addrspace(1)* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load_as_inner
+; CHECK: ret i1 false
+}
+
 ; If a bit is known to be zero for A and known to be one for B,
 ; then A and B cannot be equal.
 define i1 @icmp_eq_const(i32 %a) nounwind {
@@ -913,3 +1002,101 @@ define i1 @icmp_sdiv_int_min(i32 %a) {
 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[DIV]], -1073741824
 ; CHECK-NEXT: ret i1 [[CMP]]
 }
+
+define i1 @icmp_sdiv_pr20288(i64 %a) {
+   %div = sdiv i64 %a, -8589934592
+   %cmp = icmp ne i64 %div, 1073741824
+   ret i1 %cmp
+
+; CHECK-LABEL: @icmp_sdiv_pr20288
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 %a, -8589934592
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[DIV]], 1073741824
+; CHECK-NEXT: ret i1 [[CMP]]
+}
+
+define i1 @icmp_sdiv_neg1(i64 %a) {
+ %div = sdiv i64 %a, -1
+ %cmp = icmp ne i64 %div, 1073741824
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_sdiv_neg1
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 %a, -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[DIV]], 1073741824
+; CHECK-NEXT: ret i1 [[CMP]]
+}
+
+define i1 @icmp_known_bits(i4 %x, i4 %y) {
+  %and1 = and i4 %y, -7
+  %and2 = and i4 %x, -7
+  %or1 = or i4 %and1, 2
+  %or2 = or i4 %and2, 2
+  %add = add i4 %or1, %or2
+  %cmp = icmp eq i4 %add, 0
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_known_bits
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_nuw_1(i64 %a) {
+ %shl = shl nuw i64 1, %a
+ %cmp = icmp ne i64 %shl, 0
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nuw_1
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_nsw_neg1(i64 %a) {
+ %shl = shl nsw i64 -1, %a
+ %cmp = icmp sge i64 %shl, 3
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nsw_neg1
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_nsw_1(i64 %a) {
+ %shl = shl nsw i64 1, %a
+ %cmp = icmp sge i64 %shl, 0
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nsw_1
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ugt i32 %shl, 2147483648
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ule i32 %shl, 2147483648
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_1_V_eq_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp eq i32 %shl, 31
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_eq_31(
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_1_V_ne_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ne i32 %shl, 31
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ne_31(
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/exact-nsw-nuw.ll b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
index a0e326b..5ccc808 100644
--- a/test/Transforms/InstSimplify/exact-nsw-nuw.ll
+++ b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
@@ -42,3 +42,19 @@ define i32 @shift5(i32 %A, i32 %B) {
   %D = ashr i32 %C, %B
   ret i32 %D
 }
+
+; CHECK-LABEL: @div1(
+; CHECK: ret i32 0
+define i32 @div1(i32 %V) {
+  %A = udiv i32 %V, -2147483648
+  %B = udiv i32 %A, -2147483648
+  ret i32 %B
+}
+
+; CHECK-LABEL: @div2(
+; CHECK-NOT: ret i32 0
+define i32 @div2(i32 %V) {
+  %A = sdiv i32 %V, -1
+  %B = sdiv i32 %A, -2147483648
+  ret i32 %B
+}
diff --git a/test/Transforms/InstSimplify/fold-builtin-fma.ll b/test/Transforms/InstSimplify/fold-builtin-fma.ll
new file mode 100644
index 0000000..6331b8c
--- /dev/null
+++ b/test/Transforms/InstSimplify/fold-builtin-fma.ll
@@ -0,0 +1,119 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; Fixes PR20832
+; Make sure that we correctly fold a fused multiply-add where operands
+; are all finite constants and addend is zero.
+
+declare double @llvm.fma.f64(double, double, double)
+
+
+define double @PR20832()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @PR20832(
+; CHECK: ret double 5.600000e+01
+
+; Test builtin fma with all finite non-zero constants.
+define double @test_all_finite()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 5.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_all_finite(
+; CHECK: ret double 6.100000e+01
+
+; Test builtin fma with a +/-NaN addend.
+define double @test_NaN_addend()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0x7FF8000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_addend(
+; CHECK: ret double 0x7FF8000000000000
+
+define double @test_NaN_addend_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0xFFF8000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_addend_2(
+; CHECK: ret double 0xFFF8000000000000
+
+; Test builtin fma with a +/-Inf addend.
+define double @test_Inf_addend()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0x7FF0000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_addend(
+; CHECK: ret double 0x7FF0000000000000
+
+define double @test_Inf_addend_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0xFFF0000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_addend_2(
+; CHECK: ret double 0xFFF0000000000000
+
+; Test builtin fma with one of the operands to the multiply being +/-NaN.
+define double @test_NaN_1()  {
+  %1 = call double @llvm.fma.f64(double 0x7FF8000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_1(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0x7FF8000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_2(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_3()  {
+  %1 = call double @llvm.fma.f64(double 0xFFF8000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_3(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_4()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0xFFF8000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_4(
+; CHECK: ret double 0x7FF8000000000000
+
+
+; Test builtin fma with one of the operands to the multiply being +/-Inf.
+define double @test_Inf_1()  {
+  %1 = call double @llvm.fma.f64(double 0x7FF0000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_1(
+; CHECK: ret double 0x7FF0000000000000
+
+
+define double @test_Inf_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0x7FF0000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_2(
+; CHECK: ret double 0x7FF0000000000000
+
+
+define double @test_Inf_3()  {
+  %1 = call double @llvm.fma.f64(double 0xFFF0000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_3(
+; CHECK: ret double 0xFFF0000000000000
+
+
+define double @test_Inf_4()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0xFFF0000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_4(
+; CHECK: ret double 0xFFF0000000000000
+
diff --git a/test/Transforms/InstSimplify/gep.ll b/test/Transforms/InstSimplify/gep.ll
new file mode 100644
index 0000000..49a97f1
--- /dev/null
+++ b/test/Transforms/InstSimplify/gep.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.A = type { [7 x i8] }
+
+define %struct.A* @test1(%struct.A* %b, %struct.A* %e) {
+  %e_ptr = ptrtoint %struct.A* %e to i64
+  %b_ptr = ptrtoint %struct.A* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %sdiv = sdiv exact i64 %sub, 7
+  %gep = getelementptr inbounds %struct.A* %b, i64 %sdiv
+  ret %struct.A* %gep
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret %struct.A* %e
+}
+
+define i8* @test2(i8* %b, i8* %e) {
+  %e_ptr = ptrtoint i8* %e to i64
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret i8* %e
+}
+
+define i64* @test3(i64* %b, i64* %e) {
+  %e_ptr = ptrtoint i64* %e to i64
+  %b_ptr = ptrtoint i64* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %ashr = ashr exact i64 %sub, 3
+  %gep = getelementptr inbounds i64* %b, i64 %ashr
+  ret i64* %gep
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret i64* %e
+}
+
+define %struct.A* @test4(%struct.A* %b) {
+  %b_ptr = ptrtoint %struct.A* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %sdiv = sdiv exact i64 %sub, 7
+  %gep = getelementptr inbounds %struct.A* %b, i64 %sdiv
+  ret %struct.A* %gep
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret %struct.A* null
+}
+
+define i8* @test5(i8* %b) {
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret i8* null
+}
+
+define i64* @test6(i64* %b) {
+  %b_ptr = ptrtoint i64* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %ashr = ashr exact i64 %sub, 3
+  %gep = getelementptr inbounds i64* %b, i64 %ashr
+  ret i64* %gep
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret i64* null
+}
+
+define i8* @test7(i8* %b, i8** %e) {
+  %e_ptr = ptrtoint i8** %e to i64
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test7
+; CHECK-NEXT: ptrtoint
+; CHECK-NEXT: ptrtoint
+; CHECK-NEXT: sub
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
index 80fa8e7..f5ea451 100644
--- a/test/Transforms/InstSimplify/rem.ll
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -15,3 +15,31 @@ define i32 @select2(i32 %x, i1 %b) {
   ret i32 %rem
 ; CHECK: ret i32 0
 }
+
+define i32 @rem1(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem1(
+; CHECK-NEXT: %mod = srem i32 %x, %n
+; CHECK-NEXT: ret i32 %mod
+ %mod = srem i32 %x, %n
+ %mod1 = srem i32 %mod, %n
+ ret i32 %mod1
+}
+
+define i32 @rem2(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem2(
+; CHECK-NEXT: %mod = urem i32 %x, %n
+; CHECK-NEXT: ret i32 %mod
+ %mod = urem i32 %x, %n
+ %mod1 = urem i32 %mod, %n
+ ret i32 %mod1
+}
+
+define i32 @rem3(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem3(
+; CHECK-NEXT: %[[srem:.*]] = srem i32 %x, %n
+; CHECK-NEXT: %[[urem:.*]] = urem i32 %[[srem]], %n
+; CHECK-NEXT: ret i32 %[[urem]]
+ %mod = srem i32 %x, %n
+ %mod1 = urem i32 %mod, %n
+ ret i32 %mod1
+}
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
new file mode 100644
index 0000000..b0dc873
--- /dev/null
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -0,0 +1,346 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; CHECK-LABEL: @foo
+; CHECK:      %[[and:.*]] = and i32 %x, 1
+; CHECK-NEXT: %[[add:.*]] = add i32 %[[and]], -1
+; CHECK-NEXT: ret i32 %[[add]]
+define i32 @foo(i32 %x) {
+ %o = and i32 %x, 1
+ %n = add i32 %o, -1
+ %t = ashr i32 %n, 17
+ ret i32 %t
+}
+
+; CHECK-LABEL: @exact_lshr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_eq_both_zero(i8 %a) {
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_eq_both_zero(i8 %a) {
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_eq_both_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_ne_both_zero(i8 %a) {
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_ne_both_zero(i8 %a) {
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_ne_both_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_ne_both_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_last_zero(i8 %a) {
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_last_zero(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_eq_both_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_last_zero(i8 %a) {
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_last_zero(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_last_zero(i8 %a) {
+ %shr = lshr i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_last_zero(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_last_zero(i8 %a) {
+ %shr = lshr i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_last_zero(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_first_zero
+; CHECK-NEXT: ret i1 false
+define i1 @lshr_eq_first_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_first_zero
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_eq_first_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_first_zero
+; CHECK-NEXT: ret i1 true
+define i1 @lshr_ne_first_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_first_zero
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_ne_first_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_both_minus1
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_eq_both_minus1(i8 %a) {
+ %shr = ashr i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_both_minus1
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_ne_both_minus1(i8 %a) {
+ %shr = ashr i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_minus1
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_eq_both_minus1(i8 %a) {
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_minus1
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_ne_both_minus1(i8 %a) {
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_opposite_msb
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_opposite_msb(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_noexactlog(i8 %a) {
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_opposite_msb
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_opposite_msb(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_opposite_msb
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_eq_opposite_msb(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_opposite_msb
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_ne_opposite_msb(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @exact_ashr_eq_shift_gt(i8 %a) {
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @exact_ashr_ne_shift_gt(i8 %a) {
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
+ %shr = ashr i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
+ %shr = ashr i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_shift_gt
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_shift_gt(i8 %a) {
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_shift_gt
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_shift_gt(i8 %a) {
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
+ %shr = lshr i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
+ %shr = ashr i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_noexactlog(i8 %a) {
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_noexactlog(i8 %a) {
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_noexactlog(i8 %a) {
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_lowbit
+; CHECK-NEXT: ret i32 7
+define i32 @exact_lshr_lowbit(i32 %shiftval) {
+  %shr = lshr exact i32 7, %shiftval
+  ret i32 %shr
+}
+
+; CHECK-LABEL: @exact_ashr_lowbit
+; CHECK-NEXT: ret i32 7
+define i32 @exact_ashr_lowbit(i32 %shiftval) {
+  %shr = ashr exact i32 7, %shiftval
+  ret i32 %shr
+}
diff --git a/test/Transforms/InstSimplify/vector_ptr_bitcast.ll b/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
new file mode 100644
index 0000000..607892a
--- /dev/null
+++ b/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+%mst = type { i8*, i8* }
+%mst2 = type { i32*, i32*, i32*, i32* }
+
+@a = private unnamed_addr constant %mst { i8* inttoptr (i64 -1 to i8*),
+                                          i8* inttoptr (i64 -1 to i8*)},
+                                          align 8
+@b = private unnamed_addr constant %mst2 { i32* inttoptr (i64 42 to i32*),
+                                           i32* inttoptr (i64 67 to i32*),
+                                           i32* inttoptr (i64 33 to i32*),
+                                           i32* inttoptr (i64 58 to i32*)},
+                                          align 8
+
+define i64 @fn() {
+  %x = load <2 x i8*>* bitcast (%mst* @a to <2 x i8*>*), align 8
+  %b = extractelement <2 x i8*> %x, i32 0
+  %c = ptrtoint i8* %b to i64
+  ; CHECK-LABEL: @fn
+  ; CHECK-NEXT: ret i64 -1
+  ret i64 %c
+}
+
+define i64 @fn2() {
+  %x = load <4 x i32*>* bitcast (%mst2* @b to <4 x i32*>*), align 8
+  %b = extractelement <4 x i32*> %x, i32 0
+  %c = extractelement <4 x i32*> %x, i32 3
+  %d = ptrtoint i32* %b to i64
+  %e = ptrtoint i32* %c to i64
+  %r = add i64 %d, %e
+  ; CHECK-LABEL: @fn2
+  ; CHECK-NEXT: ret i64 100
+  ret i64 %r
+}
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index 16bfe2a..1652388 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -4,10 +4,10 @@
 ; CHECK: @A = internal global i32 0
 
 @B = alias i32* @A
-; CHECK: @B = alias internal i32* @A
+; CHECK: @B = internal alias i32* @A
 
 @C = alias i32* @A
-; CHECK: @C = alias internal i32* @A
+; CHECK: @C = internal alias i32* @A
 
 define i32 @main() {
 	%tmp = load i32* @C
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
index c24d4b7..b09a136 100644
--- a/test/Transforms/Internalize/local-visibility.ll
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -10,9 +10,9 @@
 ; CHECK: @protected.variable = internal global i32 0
 @protected.variable = protected global i32 0
 
-; CHECK: @hidden.alias = alias internal i32* @global
+; CHECK: @hidden.alias = internal alias  i32* @global
 @hidden.alias = hidden alias i32* @global
-; CHECK: @protected.alias = alias internal i32* @global
+; CHECK: @protected.alias = internal alias i32* @global
 @protected.alias = protected alias i32* @global
 
 ; CHECK: define internal void @hidden.function() {
diff --git a/test/Transforms/JumpThreading/assume-edge-dom.ll b/test/Transforms/JumpThreading/assume-edge-dom.ll
new file mode 100644
index 0000000..f1d0f41
--- /dev/null
+++ b/test/Transforms/JumpThreading/assume-edge-dom.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+declare i8* @escape()
+declare void @llvm.assume(i1)
+
+define i1 @test1(i1 %cond) {
+entry:
+    br i1 %cond, label %taken, label %not_taken
+
+; CHECK-LABEL: @test1
+; CHECK: br i1 %cond, label %no, label %yes
+; CHECK: ret i1 true
+
+taken:
+    %res1 = call i8* @escape()
+    %a = icmp eq i8* %res1, null
+    tail call void @llvm.assume(i1 %a)
+    br label %done
+not_taken:
+    %res2 = call i8* @escape()
+    %b = icmp ne i8* %res2, null
+    tail call void @llvm.assume(i1 %b)
+    br label %done
+
+; An assume that can be used to simplify this comparison dominates each
+; predecessor branch (although no assume dominates the cmp itself). Make sure
+; this still can be simplified.
+
+done:
+    %res = phi i8* [ %res1, %taken ], [ %res2, %not_taken ]
+    %cnd = icmp ne i8* %res, null
+    br i1 %cnd, label %yes, label %no
+
+yes:
+    ret i1 true
+no:
+    ret i1 false
+}
+
diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll
new file mode 100644
index 0000000..89dd0a9
--- /dev/null
+++ b/test/Transforms/JumpThreading/assume.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -jump-threading -dce < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32 %a, i32 %b) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp1 = icmp sgt i32 %b, 1234
+  br i1 %cmp1, label %if.then, label %if.else
+
+; CHECK-LABEL: @test1
+; CHECK: icmp sgt i32 %a, 5
+; CHECK: call void @llvm.assume
+; CHECK-NOT: icmp sgt i32 %a, 3
+; CHECK: ret i32
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp sgt i32 %a, 3
+  br i1 %cmp2, label %if.then3, label %return
+
+if.then3:                                         ; preds = %if.then
+  tail call void (...)* @bar() #1
+  br label %return
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @car() #1
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then, %if.then3
+  %retval.0 = phi i32 [ 1, %if.then3 ], [ 0, %if.then ], [ 0, %if.else ]
+  ret i32 %retval.0
+}
+
+define i32 @test2(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp1 = icmp sgt i32 %a, 3
+  br i1 %cmp1, label %if.then, label %return
+
+; CHECK-LABEL: @test2
+; CHECK: icmp sgt i32 %a, 5
+; CHECK: tail call void @llvm.assume
+; CHECK: tail call void (...)* @bar()
+; CHECK: ret i32 1
+
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @bar() #1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+declare void @bar(...)
+
+declare void @car(...)
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index e5bf64b..b13b767 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -75,6 +75,37 @@ bb3:		; preds = %bb1
 	ret i32 %res.0
 }
 
+define i32 @test3(i8** %x, i1 %f) {
+; Correctly thread loads of different (but compatible) types, placing bitcasts
+; as necessary in the predecessors. This is especially tricky because the same
+; predecessor ends up with two entries in the PHI node and they must share
+; a single cast.
+; CHECK-LABEL: @test3(
+entry:
+  %0 = bitcast i8** %x to i32**
+  %1 = load i32** %0, align 8
+  br i1 %f, label %if.end57, label %if.then56
+; CHECK: %[[LOAD:.*]] = load i32**
+; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8*
+
+if.then56:
+  br label %if.end57
+
+if.end57:
+  %2 = load i8** %x, align 8
+  %tobool59 = icmp eq i8* %2, null
+  br i1 %tobool59, label %return, label %if.then60
+; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ]
+; CHECK-NEXT: %[[CMP:.*]] = icmp eq i8* %[[PHI]], null
+; CHECK-NEXT: br i1 %[[CMP]]
+
+if.then60:
+  ret i32 42
+
+return:
+  ret i32 13
+}
+
 !0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll b/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll
new file mode 100644
index 0000000..17ae716
--- /dev/null
+++ b/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalar-evolution -licm -loop-unroll -disable-output
+; Test triggered an assertion in doFinalization() because loop unroll was deleting
+; the inner loop which caused the loop to not get removed from the
+; LoopToAliasSetMap.
+; Test case taken from test/Transforms/LoopUnroll/unloop.ll.
+
+declare i1 @check() nounwind
+define void @skiplevelexit() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %inner
+
+inner:
+  %iv = phi i32 [ 0, %outer ], [ %inc, %tail ]
+  %inc = add i32 %iv, 1
+  call zeroext i1 @check()
+  br i1 true, label %outer.backedge, label %tail
+
+tail:
+  br i1 false, label %inner, label %exit
+
+outer.backedge:
+  br label %outer
+
+exit:
+  ret void
+}
+
diff --git a/test/Transforms/LICM/PR19798.ll b/test/Transforms/LICM/PR19798.ll
new file mode 100644
index 0000000..82befb0
--- /dev/null
+++ b/test/Transforms/LICM/PR19798.ll
@@ -0,0 +1,22 @@
+; RUN: opt -licm -S < %s | FileCheck %s
+
+define void @f() {
+; CHECK-LABEL: @f(
+entry:
+  br label %bb0
+
+bb0:
+  %tobool7 = icmp eq i1 undef, undef
+  br label %bb1
+
+bb1:
+  br i1 undef, label %bb0, label %bb0
+
+unreachable:
+; CHECK-LABEL: unreachable:
+; CHECK:   br i1 undef, label %unreachable, label %unreachable
+  br i1 %tobool7, label %unreachable, label %unreachable
+
+bb3:
+  unreachable
+}
diff --git a/test/Transforms/LICM/PR21582.ll b/test/Transforms/LICM/PR21582.ll
new file mode 100644
index 0000000..c068c2f
--- /dev/null
+++ b/test/Transforms/LICM/PR21582.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+@b = external global i32, align 4
+@fn3.i = external global i32, align 4
+
+declare i32 @g() nounwind
+
+define i32 @f() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.end, %entry
+; CHECK-LABEL: for.cond:
+; CHECK: store i32 0, i32* @b
+  store i32 0, i32* @b, align 4
+  br i1 true, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %for.cond
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %g.15 = phi i32 [ undef, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx2 = getelementptr inbounds i32* @fn3.i, i64 0
+  %0 = load i32* %arrayidx2, align 4
+  %call = call i32 @g()
+  br i1 false, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %for.cond
+  %whatever = phi i32 [ %call, %for.end.loopexit ], [ undef, %for.cond ]
+  br i1 false, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.end
+; CHECK-LABEL: if.then:
+; CHECK: phi i32 [ {{.*}}, %for.end ]
+; CHECK-NOT: store i32 0, i32* @b
+; CHECK: ret i32
+  ret i32 %whatever
+}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index e5c774f..0e0cd39 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -15,7 +15,7 @@ if.then:                                          ; preds = %for.body
 
 if.then27:                                        ; preds = %if.then
 ; CHECK: tail call void @llvm.dbg.value
-  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !19), !dbg !21
+  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !19, metadata !{}), !dbg !21
   br label %for.body61.us
 
 if.end.if.end.split_crit_edge.critedge:           ; preds = %if.then
@@ -31,35 +31,35 @@ for.end104:                                       ; preds = %for.cond.backedge
   ret void, !dbg !24
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
-!0 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !25} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !25, i32 12, metadata !"clang version 2.9 (trunk 127169)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{metadata !"0x2e\00idamax\00idamax\00\00112\000\001\000\006\00256\000\000", metadata !25, metadata !1, metadata !3, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{metadata !"0x29", metadata !25} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang version 2.9 (trunk 127169)\001\00\000\00\000", metadata !25, metadata !8, metadata !8, metadata !8, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !25, metadata !1, null, metadata !4, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !2} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x2e\00dscal\00dscal\00\00206\000\001\000\006\00256\000\000", metadata !25, metadata !1, metadata !7, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !25, metadata !1, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
-!9 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
+!9 = metadata !{metadata !"0x2e\00daxpy\00daxpy\00\00230\000\001\000\006\00256\000\000", metadata !25, metadata !1, metadata !7, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{metadata !"0x2e\00dgefa\00dgefa\00\00267\000\001\000\006\00256\000\000", metadata !25, metadata !1, metadata !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
 !11 = metadata !{i32 281, i32 9, metadata !12, null}
-!12 = metadata !{i32 589835, metadata !25, metadata !13, i32 272, i32 5, i32 32} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589835, metadata !25, metadata !14, i32 271, i32 5, i32 31} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 589835, metadata !25, metadata !10, i32 267, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"0xb\00272\005\0032", metadata !25, metadata !13} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{metadata !"0xb\00271\005\0031", metadata !25, metadata !14} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{metadata !"0xb\00267\001\0030", metadata !25, metadata !10} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 271, i32 5, metadata !14, null}
 !16 = metadata !{i32 284, i32 10, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !25, metadata !12, i32 282, i32 9, i32 33} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{metadata !"0xb\00282\009\0033", metadata !25, metadata !12} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{double undef}
-!19 = metadata !{i32 590080, metadata !14, metadata !"temp", metadata !1, i32 268, metadata !20, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!19 = metadata !{metadata !"0x100\00temp\00268\000", metadata !14, metadata !1, metadata !20} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{metadata !"0x24\00double\000\0064\0064\000\000\004", null, metadata !2} ; [ DW_TAG_base_type ]
 !21 = metadata !{i32 286, i32 14, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !25, metadata !17, i32 285, i32 13, i32 34} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{metadata !"0xb\00285\0013\0034", metadata !25, metadata !17} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 296, i32 13, metadata !17, null}
 !24 = metadata !{i32 313, i32 1, metadata !14, null}
 !25 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp"}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
index 639dca5..fa61eaf 100644
--- a/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -78,6 +78,44 @@ for.end:                                          ; preds = %for.inc, %entry
   ret void
 }
 
+; Make sure the basic alloca pointer hoisting works through an addrspacecast
+; CHECK-LABEL: @test2_addrspacecast
+; CHECK: load i32 addrspace(1)* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test2_addrspacecast(i32 addrspace(1)* nocapture %a, i32 addrspace(1)* nocapture readonly %b, i32 %n) #0 {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  %ca = alloca i64
+  %c = addrspacecast i64* %ca to i32 addrspace(1)*
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %a, i64 %indvars.iv
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32 addrspace(1)* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32 addrspace(1)* %b, i64 %indvars.iv
+  %2 = load i32 addrspace(1)* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32 addrspace(1)* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
 ; Make sure the basic alloca pointer hoisting works through a bitcast to a
 ; pointer to a smaller type (where the bitcast also needs to be hoisted):
 ; CHECK-LABEL: @test3
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
new file mode 100644
index 0000000..c230d1d
--- /dev/null
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -0,0 +1,168 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test represents the following function:
+; void test1(int * __restrict__ a, int * __restrict__ b, int &c, int n) {
+;   for (int i = 0; i < n; ++i)
+;     if (a[i] > 0)
+;       a[i] = c*b[i];
+; }
+; and we want to hoist the load of %c out of the loop. This can be done only
+; because the dereferenceable attribute is on %c.
+
+; CHECK-LABEL: @test1
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull dereferenceable(4) %c, i32 %n) #0 {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; This is the same as @test1, but without the dereferenceable attribute on %c.
+; Without this attribute, we should not hoist the load of %c.
+
+; CHECK-LABEL: @test2
+; CHECK: if.then:
+; CHECK: load i32* %c, align 4
+
+define void @test2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull %c, i32 %n) #0 {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %1 = load i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; This test represents the following function:
+; void test3(int * restrict a, int * restrict b, int c[static 3], int n) {
+;   for (int i = 0; i < n; ++i)
+;     if (a[i] > 0)
+;       a[i] = c[2]*b[i];
+; }
+; and we want to hoist the load of c[2] out of the loop. This can be done only
+; because the dereferenceable attribute is on %c.
+
+; CHECK-LABEL: @test3
+; CHECK: load i32* %c2, align 4
+; CHECK: for.body:
+
+define void @test3(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(12) %c, i32 %n) #0 {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %c2 = getelementptr inbounds i32* %c, i64 2
+  %1 = load i32* %c2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; This is the same as @test3, but with a dereferenceable attribute on %c with a
+; size too small to cover c[2] (and so we should not hoist it).
+
+; CHECK-LABEL: @test4
+; CHECK: if.then:
+; CHECK: load i32* %c2, align 4
+
+define void @test4(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(11) %c, i32 %n) #0 {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %c2 = getelementptr inbounds i32* %c, i64 2
+  %1 = load i32* %c2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 4244f15..6926669 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -3,12 +3,11 @@
 ; UDiv is safe to speculate if the denominator is known non-zero.
 
 ; CHECK-LABEL: @safe_udiv(
-; CHECK:      %div = udiv i64 %x, %or
+; CHECK:      %div = udiv i64 %x, 2
 ; CHECK-NEXT: br label %for.body
 
 define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
 entry:
-  %or = or i64 %m, 1
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
@@ -19,7 +18,7 @@ for.body:                                         ; preds = %entry, %for.inc
   br i1 %tobool, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %div = udiv i64 %x, %or
+  %div = udiv i64 %x, 2
   %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
   store i64 %div, i64* %arrayidx1, align 8
   br label %for.inc
@@ -69,13 +68,12 @@ for.end:                                          ; preds = %for.inc, %entry
 ; known to have at least one zero bit.
 
 ; CHECK-LABEL: @safe_sdiv(
-; CHECK:      %div = sdiv i64 %x, %or
+; CHECK:      %div = sdiv i64 %x, 2
 ; CHECK-NEXT: br label %for.body
 
 define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
 entry:
   %and = and i64 %m, -3
-  %or = or i64 %and, 1
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
@@ -86,7 +84,7 @@ for.body:                                         ; preds = %entry, %for.inc
   br i1 %tobool, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %div = sdiv i64 %x, %or
+  %div = sdiv i64 %x, 2
   %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
   store i64 %div, i64* %arrayidx1, align 8
   br label %for.inc
diff --git a/test/Transforms/LoadCombine/load-combine-aa.ll b/test/Transforms/LoadCombine/load-combine-aa.ll
new file mode 100644
index 0000000..3542dce
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine-aa.ll
@@ -0,0 +1,39 @@
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
+; CHECK-LABEL: @test1
+
+; CHECK: load i64*
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  store i32 %load1, i32* %b, align 4
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
+define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: @test2
+
+; CHECK: load i32*
+; CHECK: load i32*
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  store i32 %load1, i32* %b, align 4
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
diff --git a/test/Transforms/LoadCombine/load-combine-assume.ll b/test/Transforms/LoadCombine/load-combine-assume.ll
new file mode 100644
index 0000000..94f6300
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine-assume.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.assume(i1) nounwind
+
+; 'load' before the 'call' gets optimized:
+define i64 @test1(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test1
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  %load2 = load i32* %arrayidx1, align 4
+  tail call void @llvm.assume(i1 %b)
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
+; 'call' before the 'load' doesn't get optimized:
+define i64 @test2(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test2
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  tail call void @llvm.assume(i1 %b)
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
diff --git a/test/Transforms/LoopIdiom/R600/lit.local.cfg b/test/Transforms/LoopIdiom/R600/lit.local.cfg
new file mode 100644
index 0000000..4086e8d
--- /dev/null
+++ b/test/Transforms/LoopIdiom/R600/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'R600' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopIdiom/R600/popcnt.ll b/test/Transforms/LoopIdiom/R600/popcnt.ll
new file mode 100644
index 0000000..e4301bb
--- /dev/null
+++ b/test/Transforms/LoopIdiom/R600/popcnt.ll
@@ -0,0 +1,104 @@
+; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i32 %a.addr.04, -1
+  %and = and i32 %sub, %a.addr.04
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index ef4a478..ea3c4de 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -5,8 +5,8 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 define void @foo(double* nocapture %a) nounwind ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{double* %a}, i64 0, metadata !5), !dbg !8
-  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !10), !dbg !14
+  tail call void @llvm.dbg.value(metadata !{double* %a}, i64 0, metadata !5, metadata !{}), !dbg !8
+  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !10, metadata !{}), !dbg !14
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -19,34 +19,34 @@ for.body:                                         ; preds = %entry, %for.body
   br i1 %exitcond, label %for.body, label %for.end, !dbg !14
 
 for.end:                                          ; preds = %for.body
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10), !dbg !16
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10, metadata !{}), !dbg !16
   ret void, !dbg !17
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\002\000\001\000\006\00256\000\000", metadata !18, metadata !1, metadata !3, null, void (double*)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = metadata !{metadata !"0x29", metadata !18} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang version 2.9 (trunk 127165:127174)\001\00\000\00\000", metadata !18, metadata !9, metadata !9, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !18, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!5 = metadata !{metadata !"0x101\00a\0016777218\000", metadata !0, metadata !1, metadata !6} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, metadata !2, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{metadata !"0x24\00double\000\0064\0064\000\000\004", null, metadata !2} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 18, metadata !0, null}
 !9 = metadata !{i32 0}
-!10 = metadata !{i32 590080, metadata !11, metadata !"i", metadata !1, i32 3, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !18, metadata !12, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 589835, metadata !18, metadata !0, i32 2, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !"0x100\00i\003\000", metadata !11, metadata !1, metadata !13} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{metadata !"0xb\003\003\001", metadata !18, metadata !12} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"0xb\002\0021\000", metadata !18, metadata !0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !2} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 3, i32 3, metadata !12, null}
 !15 = metadata !{i32 4, i32 5, metadata !11, null}
 !16 = metadata !{i32 3, i32 29, metadata !11, null}
 !17 = metadata !{i32 5, i32 1, metadata !12, null}
 !18 = metadata !{metadata !"li.c", metadata !"/private/tmp"}
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 50fc965..4da0776 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -loop-rotate < %s | FileCheck %s
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
 ; CHECK-LABEL: define i32 @tak(
@@ -15,9 +15,9 @@ tailrecurse:                                      ; preds = %if.then, %entry
   %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ]
   %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ]
   %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ]
-  tail call void @llvm.dbg.value(metadata !{i32 %x.tr}, i64 0, metadata !6), !dbg !7
-  tail call void @llvm.dbg.value(metadata !{i32 %y.tr}, i64 0, metadata !8), !dbg !9
-  tail call void @llvm.dbg.value(metadata !{i32 %z.tr}, i64 0, metadata !10), !dbg !11
+  tail call void @llvm.dbg.value(metadata !{i32 %x.tr}, i64 0, metadata !6, metadata !{}), !dbg !7
+  tail call void @llvm.dbg.value(metadata !{i32 %y.tr}, i64 0, metadata !8, metadata !{}), !dbg !9
+  tail call void @llvm.dbg.value(metadata !{i32 %z.tr}, i64 0, metadata !10, metadata !{}), !dbg !11
   %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !12
   br i1 %cmp, label %if.then, label %if.end, !dbg !12
 
@@ -46,9 +46,9 @@ define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
 ; CHECK-LABEL: define void @FindFreeHorzSeg(
 ; CHECK: %dec = add
 ; CHECK-NEXT: tail call void @llvm.dbg.value
-; CHECK-NEXT: br i1 %tobool, label %for.cond, label %[[LOOP_EXIT:[^,]*]]
-; CHECK: [[LOOP_EXIT]]:
-; CHECK-NEXT: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ]
+; CHECK: %cmp = icmp
+; CHECK: br i1 %cmp
+; CHECK: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ]
 ; CHECK-NEXT: br label %for.end
 
 
@@ -72,7 +72,7 @@ for.body:
 
 for.inc:
   %dec = add i64 %i.0, -1
-  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata !{metadata !"undef"})
+  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata !{metadata !"undef"}, metadata !{})
   br label %for.cond
 
 for.end:
@@ -84,24 +84,24 @@ for.end:
 !llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
-!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 125492)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00tak\00tak\00\0032\000\001\000\006\00256\000\000", metadata !18, metadata !1, metadata !3, null, i32 (i32, i32, i32)* @tak, null, null, null} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
+!1 = metadata !{metadata !"0x29", metadata !18} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang version 2.9 (trunk 125492)\001\00\000\00\000", metadata !18, metadata !19, metadata !19, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !18, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !2} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x101\00x\0032\000", metadata !0, metadata !1, metadata !5} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 32, i32 13, metadata !0, null}
-!8 = metadata !{i32 590081, metadata !0, metadata !"y", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{metadata !"0x101\00y\0032\000", metadata !0, metadata !1, metadata !5} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 32, i32 20, metadata !0, null}
-!10 = metadata !{i32 590081, metadata !0, metadata !"z", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{metadata !"0x101\00z\0032\000", metadata !0, metadata !1, metadata !5} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 32, i32 27, metadata !0, null}
 !12 = metadata !{i32 33, i32 3, metadata !13, null}
-!13 = metadata !{i32 589835, metadata !18, metadata !0, i32 32, i32 30, i32 6} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{metadata !"0xb\0032\0030\006", metadata !18, metadata !0} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 34, i32 5, metadata !15, null}
-!15 = metadata !{i32 589835, metadata !18, metadata !13, i32 33, i32 14, i32 7} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !"0xb\0033\0014\007", metadata !18, metadata !13} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 36, i32 3, metadata !13, null}
 !17 = metadata !{i32 37, i32 1, metadata !13, null}
 !18 = metadata !{metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
 !19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopRotate/nosimplifylatch.ll b/test/Transforms/LoopRotate/nosimplifylatch.ll
new file mode 100644
index 0000000..8e858b4
--- /dev/null
+++ b/test/Transforms/LoopRotate/nosimplifylatch.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S < %s -loop-rotate -licm -verify-dom-info -verify-loop-info | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios8.0.0"
+
+;CHECK: for.inc:
+;CHECK-NEXT: %incdec.ptr.i = getelementptr 
+
+; Function Attrs: alwaysinline inlinehint nounwind readonly ssp
+define linkonce_odr hidden i64 @_ZNSt3__14findINS_11__wrap_iterIPiEEiEET_S4_S4_RKT0_(i64 %__first.coerce, i64 %__last.coerce, i32* nocapture readonly dereferenceable(4) %__value_) {
+entry:
+  %coerce.val.ip = inttoptr i64 %__first.coerce to i32*
+  %coerce.val.ip2 = inttoptr i64 %__last.coerce to i32*
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %coerce.val.ip9 = phi i32* [ %incdec.ptr.i, %for.inc ], [ %coerce.val.ip, %entry ]
+  %lnot.i = icmp eq i32* %coerce.val.ip9, %coerce.val.ip2
+  br i1 %lnot.i, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %0 = load i32* %coerce.val.ip9, align 4
+  %1 = load i32* %__value_, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %for.end, label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %incdec.ptr.i = getelementptr inbounds i32* %coerce.val.ip9, i64 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond, %for.body
+  %coerce.val.ip9.lcssa = phi i32* [ %coerce.val.ip9, %for.cond ], [ %coerce.val.ip9, %for.body ]
+  %coerce.val.pi = ptrtoint i32* %coerce.val.ip9.lcssa to i64
+  ret i64 %coerce.val.pi
+}
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
index d646cb9..62e5b1a 100644
--- a/test/Transforms/LoopRotate/simplifylatch.ll
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -4,7 +4,7 @@
 @mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
 
 ; CHECK-LABEL: @f(
-; CHECK-NOT: bb4
+; CHECK-NOT: bb:
 define i8 @f() {
 entry:
 	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 8de5938..9678148 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
-; RUN: not grep sext %t
-; RUN: grep "phi i64" %t | count 1
+; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s
 
 ; Loopsimplify should be able to merge the two loop exits
 ; into one, so that loop rotate can rotate the loop, so
@@ -9,36 +7,42 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 
-define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+; CHECK-LABEL: @test1
+; CHECK: bb:
+; CHECK: phi i64
+; CHECK-NOT: phi i64
+; CHECK-NOT: sext
+
+define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
 entry:
-	%t0 = load float* %peakWeight, align 4		; <float> [#uses=1]
+	%t0 = load float* %peakWeight, align 4
 	br label %bb1
 
 bb:		; preds = %bb2
-	%t1 = sext i32 %hiPart.0 to i64		; <i64> [#uses=1]
-	%t2 = getelementptr float* %pTmp1, i64 %t1		; <float*> [#uses=1]
-	%t3 = load float* %t2, align 4		; <float> [#uses=1]
-	%t4 = fadd float %t3, %distERBhi.0		; <float> [#uses=1]
-	%t5 = add i32 %hiPart.0, 1		; <i32> [#uses=2]
-	%t6 = sext i32 %t5 to i64		; <i64> [#uses=1]
-	%t7 = getelementptr float* %peakWeight, i64 %t6		; <float*> [#uses=1]
-	%t8 = load float* %t7, align 4		; <float> [#uses=1]
-	%t9 = fadd float %t8, %peakCount.0		; <float> [#uses=1]
+	%t1 = sext i32 %hiPart.0 to i64
+	%t2 = getelementptr float* %pTmp1, i64 %t1
+	%t3 = load float* %t2, align 4
+	%t4 = fadd float %t3, %distERBhi.0
+	%t5 = add i32 %hiPart.0, 1
+	%t6 = sext i32 %t5 to i64
+	%t7 = getelementptr float* %peakWeight, i64 %t6
+	%t8 = load float* %t7, align 4
+	%t9 = fadd float %t8, %peakCount.0
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
-	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]		; <float> [#uses=2]
-	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]		; <i32> [#uses=3]
-	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]		; <float> [#uses=3]
-	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00		; <i1> [#uses=1]
+	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
+	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
+	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
+	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
 	br i1 %t10, label %bb3, label %bb2
 
 bb2:		; preds = %bb1
-	%t11 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=1]
-	%t12 = icmp sgt i32 %t11, %hiPart.0		; <i1> [#uses=1]
+	%t11 = add i32 %bandEdgeIndex, -1
+	%t12 = icmp sgt i32 %t11, %hiPart.0
 	br i1 %t12, label %bb, label %bb3
 
 bb3:		; preds = %bb2, %bb1
-	%t13 = fdiv float %peakCount.0, %distERBhi.0		; <float> [#uses=1]
+	%t13 = fdiv float %peakCount.0, %distERBhi.0
 	ret float %t13
 }
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index ee7b1e8..1e3df6c 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %_ZN8nsTArray9Elemen
   %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
   %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
   %add = add nsw i32 %i.06, 1
-  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0, metadata !{}) nounwind
   br label %_ZN8nsTArray9ElementAtEi.exit
 
 _ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
@@ -33,6 +33,6 @@ declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.n
 
 declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
+!0 = metadata !{metadata !"0x101"}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index 17c91e5..aae79cb 100644
--- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -41,8 +41,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; CHECK-LABEL: @test
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body.prol{{.*}}:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
 
diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll
new file mode 100644
index 0000000..9d40613
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ephemeral.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+
+  ; This loop will be completely unrolled, even with these extra instructions,
+  ; but only because they're ephemeral (and, thus, free).
+  %1 = add nsw i32 %0, 2
+  %2 = add nsw i32 %1, 4
+  %3 = add nsw i32 %2, 4
+  %4 = add nsw i32 %3, 4
+  %5 = add nsw i32 %4, 4
+  %6 = add nsw i32 %5, 4
+  %7 = add nsw i32 %6, 4
+  %8 = add nsw i32 %7, 4
+  %9 = add nsw i32 %8, 4
+  %10 = add nsw i32 %9, 4
+  %ca = icmp sgt i32 %10, -7
+  call void @llvm.assume(i1 %ca)
+
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
new file mode 100644
index 0000000..dcb5d1c
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
@@ -0,0 +1,133 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS
+; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise)
+; Test that llvm.annotation intrinsic do not count against the loop body size
+; and prevent unrolling.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@B = common global i32 0, align 4
+
+define void @foo(i32* noalias %A, i32 %B, i32 %C) {
+entry:
+  br label %for.body
+
+; A loop that has a small loop body (except for the annotations) that should be
+; unrolled with the default heuristic. Make sure the extra annotations do not
+; prevent unrolling
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  ; The real loop.
+  %mul = mul nsw i32 %B, %C
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.01
+  store i32 %mul, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp ne i32 %inc, 4
+
+  ; A bunch of annotations
+  %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll
new file mode 100644
index 0000000..98cab32
--- /dev/null
+++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-unroll -S %s | FileCheck %s
+
+; extern void f(int);
+; void test1(int v) {
+;   for (int i=v; i<=v+1; ++i)
+;     f(i);
+; }
+;
+; We can use the nsw information to see that the tripcount will be 2, so the
+; loop should be unrolled as this is always beneficial
+
+declare void @f(i32)
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %v) {
+entry:
+  %add = add nsw i32 %v, 1
+  br label %for.body
+
+for.body:
+  %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
+  tail call void @f(i32 %i.04)
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %i.04, %add
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: call void @f
+; CHECK-NOT: br i1
+; CHECK: call void @f
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index a14087d..05d03f2 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -3,15 +3,16 @@
 ; Tests for unrolling loops with run-time trip counts
 
 ; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %unr.cmp
+; CHECK:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK:  %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK:  %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK:  br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
 
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
+; CHECK:  %prol.iter.sub = sub i32 %prol.iter, 1
+; CHECK:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; CHECK:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
 entry:
@@ -39,7 +40,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; even if the -unroll-runtime is specified
 
 ; CHECK: for.body:
-; CHECK-NOT: for.body.unr:
+; CHECK-NOT: for.body.prol:
 
 define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
 entry:
@@ -85,8 +86,8 @@ cond_true138:
 
 ; Test run-time unrolling for a loop that counts down by -2.
 
-; CHECK: for.body.unr:
-; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 
 define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
 entry:
@@ -113,3 +114,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
   %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   ret i16 %res.0.lcssa
 }
+
+; CHECK: !0 = metadata !{metadata !0, metadata !1}
+; CHECK: !1 = metadata !{metadata !"llvm.loop.unroll.disable"}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index ad99b8c..38b4f32 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
 
 ; This tests that setting the unroll count works
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 ; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index cbc7af5..7205c68 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -3,8 +3,7 @@
 ; Choose a smaller, power-of-two, unroll count if the loop is too large.
 ; This test makes sure we're not unrolling 'odd' counts
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index c3086e8..20161d7 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -66,16 +66,13 @@ exit2:
 
 ; SCEV properly unrolls multi-exit loops.
 ;
-; SCEV cannot currently unroll this loop.
-; It should ideally detect a trip count of 5.
-; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
 ; CHECK-LABEL: @multiExit(
-; CHECKFIXME: getelementptr i32* %base, i32 10
-; CHECKFIXME-NEXT: load i32*
-; CHECKFIXME: br i1 false, label %l2.10, label %exit1
-; CHECKFIXME: l2.10:
-; CHECKFIXME-NOT: br
-; CHECKFIXME: ret i32
+; CHECK: getelementptr i32* %base, i32 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
 define i32 @multiExit(i32* %base) nounwind {
 entry:
   br label %l1
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
new file mode 100644
index 0000000..d593685
--- /dev/null
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When prologue is fully unrolled, the branch on its end is unconditional.
+; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
+; like in this example, where it comes from an argument.
+;
+; This test is based on an example from here:
+; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
+;
+; CHECK: while.body.prol:
+; CHECK: br i1
+; CHECK: entry.split:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp eq i32 %i, %N
+  %inc = add i32 %i, 1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret i32 %i
+}
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
new file mode 100644
index 0000000..db18f25
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -0,0 +1,149 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+;
+; Verify that the unrolling pass removes existing unroll count metadata
+; and adds a disable unrolling node after unrolling is complete.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; #pragma clang loop  vectorize(enable) unroll_count(4) vectorize_width(8)
+;
+; Unroll count metadata should be replaced with unroll(disable).  Vectorize
+; metadata should be untouched.
+;
+; CHECK-LABEL: @unroll_count_4(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+define void @unroll_count_4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!3 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+!4 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic.  The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!5 = metadata !{metadata !5, metadata !6}
+!6 = metadata !{metadata !"llvm.loop.unroll.full"}
+
+; #pragma clang loop unroll(disable)
+;
+; Unroll metadata should not change.
+;
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!7 = metadata !{metadata !7, metadata !8}
+!8 = metadata !{metadata !"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint.  Both loops should be unrolled.  This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %0, 10
+  store i32 %add4, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader:                            ; preds = %for.body3
+  br label %for.body3.1
+
+for.body3.1:                                      ; preds = %for.body3.1.preheader, %for.body3.1
+  %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+  %1 = add nsw i64 %indvars.iv.1, 1
+  %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1
+  %2 = load i32* %arrayidx.1, align 4
+  %add4.1 = add nsw i32 %2, 10
+  store i32 %add4.1, i32* %arrayidx.1, align 4
+  %exitcond.1 = icmp eq i64 %1, 4
+  br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1:                                       ; preds = %for.body3.1
+  ret void
+}
+!9 = metadata !{metadata !9, metadata !10}
+!10 = metadata !{metadata !"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = metadata !{metadata !"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_DISABLE:.*]]}
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 5e45a2d..1ca249d 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -1,4 +1,8 @@
 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -loop-unroll -S | FileCheck %s
+;
+; Run loop unrolling twice to verify that loop unrolling metadata is properly
+; removed and further unrolling is disabled after the pass is run once.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -51,11 +55,11 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 !1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+!2 = metadata !{metadata !"llvm.loop.unroll.disable"}
 
 ; loop64 has a high enough count that it should *not* be unrolled by
 ; the default unrolling heuristic.  It serves as the control for the
-; unroll(enable) pragma test loop64_with_.* tests below.
+; unroll(full) pragma test loop64_with_.* tests below.
 ;
 ; CHECK-LABEL: @loop64(
 ; CHECK: store i32
@@ -79,7 +83,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop should be fully unrolled.
 ;
 ; CHECK-LABEL: @loop64_with_enable(
@@ -102,7 +106,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 !3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+!4 = metadata !{metadata !"llvm.loop.unroll.full"}
 
 ; #pragma clang loop unroll_count(4)
 ; Loop should be unrolled 4 times.
@@ -134,37 +138,7 @@ for.end:                                          ; preds = %for.body
 !5 = metadata !{metadata !5, metadata !6}
 !6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
 
-
-; #pragma clang loop unroll_count(enable) unroll_count(4)
-; Loop should be unrolled 4 times.
-;
-; CHECK-LABEL: @loop64_with_enable_and_count4(
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK-NOT: store i32
-; CHECK: br i1
-define void @loop64_with_enable_and_count4(i32* nocapture %a) {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
-  %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %arrayidx, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 64
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
-!7 = metadata !{metadata !7, metadata !6, metadata !4}
-
-; #pragma clang loop unroll_count(enable)
+; #pragma clang loop unroll(full)
 ; Full unrolling is requested, but loop has a dynamic trip count so
 ; no unrolling should occur.
 ;
@@ -257,7 +231,7 @@ for.end:                                          ; preds = %for.body
 !10 = metadata !{metadata !10, metadata !11}
 !11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop has very high loop count (1 million) and full unrolling was requested.
 ; Loop should unrolled up to the pragma threshold, but not completely.
 ;
diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
new file mode 100644
index 0000000..adbf47d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; Crasher from PR20987.
+
+; CHECK: define void @update_loop_info_in_subloops
+; CHECK: entry:
+; CHECK: L:
+; CHECK: L.inner:
+; CHECK: L.inner.latch:
+; CHECK: L.latch:
+; CHECK: L.inner.1:
+; CHECK: L.inner.latch.1:
+; CHECK: L.latch.1:
+
+define void @update_loop_info_in_subloops() {
+entry:
+  br label %L
+
+L:
+  %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ]
+  br label %L.inner
+
+L.inner:
+  br label %L.inner.latch
+
+L.inner.latch:
+  br i1 false, label %L.latch, label %L.inner
+
+L.latch:
+  %1 = add i64 %0, 1
+  %2 = icmp eq i64 %1, 3
+  br i1 %2, label %exit, label %L
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index 1e1396f..a292afb 100644
--- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index aa7cc0e..b3eae69 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index ae9f998..16d64ea 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4 
 
 ; Check that we don't crash.
 
diff --git a/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
new file mode 100644
index 0000000..a01d543
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x i32>*
+; CHECK: sdiv <4 x i32>
+; CHECK: store <4 x i32>
+
+define void @foo(){
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %0, 2
+  %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index fce3b70..9c69ba8 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -41,6 +41,6 @@ for.end:                                          ; preds = %for.body
 ; Now, we check for the Hint metadata
 ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
 ; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
 ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
 
diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll
new file mode 100644
index 0000000..a94e24d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/assume.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @test1
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp1 = fcmp ogt float %0, 1.000000e+02
+  tail call void @llvm.assume(i1 %cmp1)
+  %add = fadd float %0, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv, 1599
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+%struct.data = type { float*, float* }
+
+; Function Attrs: nounwind uwtable
+define void @test2(%struct.data* nocapture readonly %d) #0 {
+entry:
+  %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1
+  %0 = load float** %b, align 8
+  %ptrint = ptrtoint float* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0
+  %1 = load float** %a, align 8
+  %ptrint2 = ptrtoint float* %1 to i64
+  %maskedptr3 = and i64 %ptrint2, 31
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  br label %for.body
+
+; CHECK-LABEL: @test2
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv
+  %2 = load float* %arrayidx, align 4
+  %add = fadd float %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond4)
+  %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv, 1599
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index e1113fd..05403cd 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-interleave=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index d6120e7..0650d94 100644
--- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 8716cff..fd69dc4 100644
--- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index 2c47fcb..0b542a9 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 7e156a9..b580d73 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
new file mode 100644
index 0000000..054da8e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+
+;CHECK-LABEL: @foo(
+;CHECK: load <4 x i32>*
+;CHECK: sdiv <4 x i32>
+;CHECK: store <4 x i32>
+
+define void @foo(){
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %0, 2
+  %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index dfa4faa..f9a0281 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
index 6b38bac..a4ec694 100644
--- a/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mcpu=prescott < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386-unknown-freebsd11.0"
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index d5024bb..716dc08 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-VECTOR
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-interleave=0 -dce -S \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-SCALAR
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index 2d7b663..c684b4e 100644
--- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 59bb8d0..e57cfef 100644
--- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
+; RUN: opt -basicaa -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 6cdd29b..7bce11d 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -24,10 +24,11 @@
 
 ; File, line, and column should match those specified in the metadata
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 ; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
-; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
+; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
+; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
 
 ; CHECK: _Z4testPii
 ; CHECK-NOT: x i32>
@@ -121,40 +122,40 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"source.cpp", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4, metadata !7, metadata !8}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
+!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled]
+!8 = metadata !{metadata !"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds]
 !9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !11 = metadata !{metadata !"clang version 3.5.0"}
 !12 = metadata !{i32 3, i32 8, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!13 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{metadata !14, metadata !15, metadata !15}
 !15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 !16 = metadata !{i32 4, i32 5, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
+!17 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !13} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{metadata !19, metadata !19, i64 0}
 !19 = metadata !{metadata !"int", metadata !20, i64 0}
 !20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
 !21 = metadata !{metadata !"Simple C/C++ TBAA"}
 !22 = metadata !{i32 5, i32 9, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
+!23 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 8, i32 1, metadata !4, null}
 !25 = metadata !{i32 12, i32 8, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
+!26 = metadata !{metadata !"0xb\0012\003\000", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{metadata !27, metadata !28, metadata !29}
-!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+!28 = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
 !29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
 !30 = metadata !{i32 13, i32 5, metadata !26, null}
 !31 = metadata !{i32 14, i32 1, metadata !7, null}
 !32 = metadata !{i32 18, i32 8, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
+!33 = metadata !{metadata !"0xb\0018\003\000", metadata !1, metadata !8} ; [ DW_TAG_lexical_block ]
 !34 = metadata !{metadata !34, metadata !15}
 !35 = metadata !{i32 19, i32 5, metadata !33, null}
 !36 = metadata !{i32 20, i32 1, metadata !8, null}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index f683447..14e541a 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
 ; This code has all the !dbg annotations needed to track source line information,
 ; but is missing the llvm.dbg.cu annotation. This prevents code generation from
@@ -52,23 +52,23 @@ declare void @ibar(i32*) #1
 !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", metadata !1, metadata !5, metadata !6, null, i32 (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5.0 "}
-!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!10 = metadata !{i32 8, i32 3, metadata !4, null}
 !11 = metadata !{metadata !12, metadata !12, i64 0}
 !12 = metadata !{metadata !"int", metadata !13, i64 0}
 !13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0}
 !14 = metadata !{metadata !"Simple C/C++ TBAA"}
 !15 = metadata !{i32 17, i32 8, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!16 = metadata !{metadata !"0xb\0017\008\002", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!17 = metadata !{metadata !"0xb\0017\008\001", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!18 = metadata !{metadata !"0xb\0017\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
 !19 = metadata !{i32 18, i32 5, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!20 = metadata !{metadata !"0xb\0017\0027\000", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
 !21 = metadata !{metadata !13, metadata !13, i64 0}
 !22 = metadata !{i32 20, i32 3, metadata !4, null}
 !23 = metadata !{i32 21, i32 3, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index efc93d9..d8e5403 100644
--- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
+; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.0"
diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
index a099daa..cab333d 100644
--- a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
+++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S -mtriple=xcore | FileCheck %s
 
 target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
 target triple = "xcore"
diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll
index 84b0361..f2fb8b9 100644
--- a/test/Transforms/LoopVectorize/align.ll
+++ b/test/Transforms/LoopVectorize/align.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
index 7b71272..7a3e798 100644
--- a/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
index 2648bbe..d7cbad0 100644
--- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
index 7e79916..5f441f3 100644
--- a/test/Transforms/LoopVectorize/calloc.ll
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
index 255ce9c..4f92d33 100644
--- a/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; rdar://problem/12848162
 
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
new file mode 100644
index 0000000..50fa329
--- /dev/null
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @conditional_store(i32* noalias nocapture %indices) #0 {
+entry:
+  br label %for.body, !dbg !10
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12
+  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14
+  %cmp1 = icmp eq i32 %0, 1024, !dbg !12
+  br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
+
+if.then:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14
+  br label %for.inc, !dbg !18
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !10
+
+for.end:                                          ; preds = %for.inc
+  ret void, !dbg !19
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"source.c", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !"0x2e\00conditional_store\00conditional_store\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*)* @conditional_store, null, null, metadata !2} ; [ DW_TAG_subprogram ]
+!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
+!9 = metadata !{metadata !"clang version 3.6.0"}
+!10 = metadata !{i32 2, i32 8, metadata !11, null}
+!11 = metadata !{metadata !"0xb\002\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 3, i32 9, metadata !13, null}
+!13 = metadata !{metadata !"0xb\003\009\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{metadata !15, metadata !15, i64 0}
+!15 = metadata !{metadata !"int", metadata !16, i64 0}
+!16 = metadata !{metadata !"omnipotent char", metadata !17, i64 0}
+!17 = metadata !{metadata !"Simple C/C++ TBAA"}
+!18 = metadata !{i32 3, i32 29, metadata !13, null}
+!19 = metadata !{i32 4, i32 1, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index e4ba77f..452b7ae 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -11,7 +11,7 @@
 ; }
 
 ; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
-; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:5:9: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 
 ; CHECK: _Z4testPii
 ; CHECK-NOT: x i32>
@@ -55,21 +55,21 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"source.cpp", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [test]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5.0"}
 !10 = metadata !{i32 3, i32 8, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 5, i32 9, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
-!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
+!13 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !14} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{metadata !16, metadata !16, i64 0}
 !16 = metadata !{metadata !"int", metadata !17, i64 0}
 !17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index c8215a1..f32f610 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index 2497b25..91d07d4 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
+; RUN: opt < %s -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
 ; Make sure we vectorize with debugging turned on.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-LABEL: @test(
 define i32 @test() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9, metadata !{}), !dbg !18
   br label %for.body, !dbg !18
 
 for.body:
@@ -25,7 +25,7 @@ for.body:
   %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
   store i32 %add, i32* %arrayidx4, align 4, !dbg !19
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9, metadata !{}), !dbg !18
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
   %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18
   br i1 %exitcond, label %for.body, label %for.end, !dbg !18
@@ -34,9 +34,9 @@ for.end:
   ret i32 0, !dbg !24
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
 attributes #1 = { nounwind readnone }
@@ -44,27 +44,27 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26}
 
-!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""}
+!0 = metadata !{metadata !"0x11\004\00clang\001\00\000\00\000", metadata !25, metadata !1, metadata !1, metadata !2, metadata !11, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
-!4 = metadata !{i32 786473, metadata !25}
-!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!3 = metadata !{metadata !"0x2e\00test\00test\00test\005\000\001\000\006\00256\001\005", metadata !25, metadata !4, metadata !5, null, i32 ()* @test, null, null, metadata !8} ; [ DW_TAG_subprogram ]
+!4 = metadata !{metadata !"0x29", metadata !25} ; [ DW_TAG_file_type ]
+!5 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!7 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
-!10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0}
+!9 = metadata !{metadata !"0x100\00i\006\000", metadata !10, metadata !4, metadata !7} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{metadata !"0xb\006\000\000", metadata !25, metadata !3} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{metadata !12, metadata !16, metadata !17}
-!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
-!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
+!12 = metadata !{metadata !"0x34\00A\00A\00\001\000\001", null, metadata !4, metadata !13, [1024 x i32]* @A, null} ; [ DW_TAG_variable ]
+!13 = metadata !{metadata !"0x1\00\000\0032768\0032\000\000", null, null, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 1024}
-!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
-!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
+!16 = metadata !{metadata !"0x34\00B\00B\00\002\000\001", null, metadata !4, metadata !13, [1024 x i32]* @B, null} ; [ DW_TAG_variable ]
+!17 = metadata !{metadata !"0x34\00C\00C\00\003\000\001", null, metadata !4, metadata !13, [1024 x i32]* @C, null} ; [ DW_TAG_variable ]
 !18 = metadata !{i32 6, i32 0, metadata !10, null}
 !19 = metadata !{i32 7, i32 0, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1}
+!20 = metadata !{metadata !"0xb\006\000\001", metadata !25, metadata !10} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 9, i32 0, metadata !3, null}
 !25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index bf0b418..6350296 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -19,10 +19,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 define i32 @f(i32* nocapture %a, i32 %size) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16, metadata !{}), !dbg !21
   %cmp4 = icmp eq i32 %size, 0, !dbg !21
   br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21
 
@@ -35,9 +35,9 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
   %0 = load i32* %arrayidx, align 4, !dbg !22
   %add = add i32 %0, %sum.05, !dbg !22
-  tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
+  tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15, metadata !{}), !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16, metadata !{}), !dbg !21
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
   %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
   br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
@@ -52,10 +52,10 @@ for.end:                                          ; preds = %entry, %for.cond.fo
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -63,28 +63,28 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !27}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.4 (trunk 185038) (llvm/trunk 185097)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
 !1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!4 = metadata !{metadata !"0x2e\00f\00f\00\003\000\001\000\006\00256\001\003", metadata !5, metadata !6, metadata !7, null, i32 (i32*, i32)* @f, null, null, metadata !12} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
 !5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{metadata !"0x29", metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !11}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!9 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!11 = metadata !{metadata !"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
 !12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16}
-!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3]
-!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4]
-!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
-!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!13 = metadata !{metadata !"0x101\00a\0016777219\000", metadata !4, metadata !6, metadata !10} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!14 = metadata !{metadata !"0x101\00size\0033554435\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_arg_variable ] [size] [line 3]
+!15 = metadata !{metadata !"0x100\00sum\004\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [sum] [line 4]
+!16 = metadata !{metadata !"0x100\00i\005\000", metadata !17, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!17 = metadata !{metadata !"0xb\005\000\000", metadata !5, metadata !4} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
 !18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
 !19 = metadata !{i32 3, i32 0, metadata !4, null}
 !20 = metadata !{i32 4, i32 0, metadata !4, null}
 !21 = metadata !{i32 5, i32 0, metadata !17, null}
 !22 = metadata !{i32 6, i32 0, metadata !17, null}
 !26 = metadata !{i32 7, i32 0, metadata !4, null}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/duplicated-metadata.ll b/test/Transforms/LoopVectorize/duplicated-metadata.ll
new file mode 100644
index 0000000..8353dca
--- /dev/null
+++ b/test/Transforms/LoopVectorize/duplicated-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test makes sure we don't duplicate the loop vectorizer's metadata
+; while marking them as already vectorized (by setting width = 1), even
+; at lower optimization levels, where no extra cleanup is done
+
+define void @_Z3fooPf(float* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %p = load float* %arrayidx, align 4
+  %mul = fmul float %p, 2.000000e+00
+  store float %mul, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 4}
+; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
+; CHECK: !{metadata !"llvm.loop.interleave.count", i32 1}
diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll
index 8a4f8ce..a3c0bb8 100644
--- a/test/Transforms/LoopVectorize/ee-crash.ll
+++ b/test/Transforms/LoopVectorize/ee-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/exact.ll b/test/Transforms/LoopVectorize/exact.ll
new file mode 100644
index 0000000..0a8fbf3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/exact.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; CHECK-LABEL: @lshr_exact(
+; CHECK: lshr exact <4 x i32>
+define void @lshr_exact(i32* %x) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %x, i64 %iv
+  %0 = load i32* %arrayidx, align 4
+  %conv1 = lshr exact i32 %0, 1
+  store i32 %conv1, i32* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 21d0937..0fc55c8 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index 0dfbab0..0f064ee 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -23,3 +23,25 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret float %add
 }
+
+;CHECK-LABEL: @foosub(
+;CHECK: fsub fast <4 x float>
+;CHECK: ret
+define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %sub = fsub fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %sub
+}
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
index f1f068c..e03534f 100644
--- a/test/Transforms/LoopVectorize/funcall.ll
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index d8959d4..6c8af0b 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index d64d67f..3f11ce8 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
index 765e14d..d0b27f1 100644
--- a/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
index 2a0e826..90e3ec0 100644
--- a/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
index f8f2cf1..67910bf 100644
--- a/test/Transforms/LoopVectorize/if-conv-crash.ll
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 92cb06e..b5ac8fc 100644
--- a/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 8cb703c..455699c 100644
--- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 6e3e8ed..9e18528 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 7b0e181..c6067e0 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
new file mode 100644
index 0000000..624ee7e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -0,0 +1,142 @@
+; This test is based on one of benchmarks from SPEC2006. It exposes a bug with
+; incorrect updating of the dom-tree.
+; RUN: opt < %s  -loop-vectorize -verify-dom-info
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@PL_utf8skip = external constant [0 x i8]
+
+; Function Attrs: nounwind ssp uwtable
+define void @Perl_pp_quotemeta() #0 {
+  %len = alloca i64, align 8
+  br i1 undef, label %2, label %1
+
+; <label>:1                                       ; preds = %0
+  br label %3
+
+; <label>:2                                       ; preds = %0
+  br label %3
+
+; <label>:3                                       ; preds = %2, %1
+  br i1 undef, label %34, label %4
+
+; <label>:4                                       ; preds = %3
+  br i1 undef, label %5, label %6
+
+; <label>:5                                       ; preds = %4
+  br label %6
+
+; <label>:6                                       ; preds = %5, %4
+  br i1 undef, label %7, label %8
+
+; <label>:7                                       ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %7, %6
+  br i1 undef, label %.preheader, label %9
+
+.preheader:                                       ; preds = %9, %8
+  br i1 undef, label %.loopexit, label %.lr.ph
+
+; <label>:9                                       ; preds = %8
+  br i1 undef, label %thread-pre-split.preheader, label %.preheader
+
+thread-pre-split.preheader:                       ; preds = %9
+  br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.thread-pre-split.loopexit_crit_edge:             ; preds = %19
+  %scevgep.sum = xor i64 %umax, -1
+  %scevgep45 = getelementptr i8* %d.020, i64 %scevgep.sum
+  br label %thread-pre-split.loopexit
+
+thread-pre-split.loopexit:                        ; preds = %11, %.thread-pre-split.loopexit_crit_edge
+  %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
+  br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.lr.ph21:                                         ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+  %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+  %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+  br i1 undef, label %11, label %22
+
+; <label>:11                                      ; preds = %.lr.ph21
+  %12 = getelementptr inbounds [0 x i8]* @PL_utf8skip, i64 0, i64 undef
+  %13 = load i8* %12, align 1
+  %14 = zext i8 %13 to i64
+  %15 = icmp ugt i64 %14, %10
+  %. = select i1 %15, i64 %10, i64 %14
+  br i1 undef, label %thread-pre-split.loopexit, label %.lr.ph28
+
+.lr.ph28:                                         ; preds = %11
+  %16 = xor i64 %10, -1
+  %17 = xor i64 %14, -1
+  %18 = icmp ugt i64 %16, %17
+  %umax = select i1 %18, i64 %16, i64 %17
+  br label %19
+
+; <label>:19                                      ; preds = %19, %.lr.ph28
+  %ulen.126 = phi i64 [ %., %.lr.ph28 ], [ %20, %19 ]
+  %20 = add i64 %ulen.126, -1
+  %21 = icmp eq i64 %20, 0
+  br i1 %21, label %.thread-pre-split.loopexit_crit_edge, label %19
+
+; <label>:22                                      ; preds = %.lr.ph21
+  br i1 undef, label %26, label %23
+
+; <label>:23                                      ; preds = %22
+  br i1 undef, label %26, label %24
+
+; <label>:24                                      ; preds = %23
+  br i1 undef, label %26, label %25
+
+; <label>:25                                      ; preds = %24
+  br label %26
+
+; <label>:26                                      ; preds = %25, %24, %23, %22
+  %27 = load i64* %len, align 8
+  %28 = add i64 %27, -1
+  br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+thread-pre-split._crit_edge:                      ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+  br label %.loopexit
+
+.lr.ph:                                           ; preds = %33, %.preheader
+  br i1 undef, label %29, label %thread-pre-split5
+
+; <label>:29                                      ; preds = %.lr.ph
+  br i1 undef, label %33, label %30
+
+; <label>:30                                      ; preds = %29
+  br i1 undef, label %33, label %31
+
+thread-pre-split5:                                ; preds = %.lr.ph
+  br i1 undef, label %33, label %31
+
+; <label>:31                                      ; preds = %thread-pre-split5, %30
+  br i1 undef, label %33, label %32
+
+; <label>:32                                      ; preds = %31
+  br label %33
+
+; <label>:33                                      ; preds = %32, %31, %thread-pre-split5, %30, %29
+  br i1 undef, label %.loopexit, label %.lr.ph
+
+.loopexit:                                        ; preds = %33, %thread-pre-split._crit_edge, %.preheader
+  br label %35
+
+; <label>:34                                      ; preds = %3
+  br label %35
+
+; <label>:35                                      ; preds = %34, %.loopexit
+  br i1 undef, label %37, label %36
+
+; <label>:36                                      ; preds = %35
+  br label %37
+
+; <label>:37                                      ; preds = %36, %35
+  ret void
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.6.0 "}
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 71bedb7..067a76b 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 7dabcb2..3f34918 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -28,7 +28,7 @@ for.end:
   ret void
 }
 
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
 
 ; Make sure we remove unneeded vectorization of induction variables.
 ; In order for instcombine to cleanup the vectorized induction variables that we
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 9c8201a..ce64c5b 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 7dfaf03..d48731a 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -1192,3 +1192,59 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+;CHECK-LABEL: @minnum_f32(
+;CHECK: llvm.minnum.v4f32
+;CHECK: ret void
+define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+;CHECK-LABEL: @maxnum_f32(
+;CHECK: llvm.maxnum.v4f32
+;CHECK: ret void
+define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
index de6be54..68cc74e 100644
--- a/test/Transforms/LoopVectorize/lcssa-crash.ll
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
index 4f6f3b8..ba36cc4 100644
--- a/test/Transforms/LoopVectorize/lifetime.ll
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll
index 21cb703..f857e80 100644
--- a/test/Transforms/LoopVectorize/memdep.ll
+++ b/test/Transforms/LoopVectorize/memdep.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll
index 2fcc53a..848f1f9 100644
--- a/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 }
 
 !0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2}
+!1 = metadata !{metadata !"llvm.loop.interleave.count", i32 2}
diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll
index 87de655..da0c622 100644
--- a/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/test/Transforms/LoopVectorize/metadata-width.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/metadata.ll b/test/Transforms/LoopVectorize/metadata.ll
new file mode 100644
index 0000000..14f60b3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/metadata.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %conv = fptosi float %0 to i32
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+; CHECK-LABEL: @test1
+; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
+; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
+; CHECK: ret i32 0
+
+; CHECK-DAG: ![[TFLT]] = metadata !{metadata ![[TFLT1:[0-9]+]]
+; CHECK-DAG: ![[TFLT1]] = metadata !{metadata !"float"
+
+; CHECK-DAG: ![[TINT]] = metadata !{metadata ![[TINT1:[0-9]+]]
+; CHECK-DAG: ![[TINT1]] = metadata !{metadata !"int"
+
+attributes #0 = { nounwind uwtable }
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"float", metadata !2, i64 0}
+!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"int", metadata !2, i64 0}
+
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 0e47260..e73e69d 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1  < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
index 88a29c5..cd022ad 100644
--- a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index 7d836de..bb2af1e 100644
--- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; From a simple program with two address spaces:
 ; char Y[4*10000] __attribute__((address_space(1)));
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
new file mode 100644
index 0000000..a39b44f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+
+; Verify warning is generated when vectorization/ interleaving is explicitly specified and fails to occur.
+; CHECK: warning: no_array_bounds.cpp:5:5: loop not vectorized: failed explicitly specified loop vectorization
+; CHECK: warning: no_array_bounds.cpp:10:5: loop not interleaved: failed explicitly specified loop interleaving
+
+;  #pragma clang loop vectorize(enable)
+;  for (int i = 0; i < number; i++) {
+;    A[B[i]]++;
+;  }
+
+;  #pragma clang loop vectorize(disable) interleave(enable)
+;  for (int i = 0; i < number; i++) {
+;    B[A[i]]++;
+;  }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 {
+entry:
+  %cmp25 = icmp sgt i32 %number, 0, !dbg !10
+  br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.cond5.preheader:                              ; preds = %for.body
+  br i1 %cmp25, label %for.body7.preheader, label %for.end15, !dbg !16, !llvm.loop !18
+
+for.body7.preheader:                              ; preds = %for.cond5.preheader
+  br label %for.body7, !dbg !20
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv27, !dbg !14
+  %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !22
+  %idxprom1 = sext i32 %0 to i64, !dbg !14
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !14
+  %1 = load i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  %inc = add nsw i32 %1, 1, !dbg !14
+  store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10
+  %lftr.wideiv29 = trunc i64 %indvars.iv.next28 to i32, !dbg !10
+  %exitcond30 = icmp eq i32 %lftr.wideiv29, %number, !dbg !10
+  br i1 %exitcond30, label %for.cond5.preheader, label %for.body, !dbg !10, !llvm.loop !12
+
+for.body7:                                        ; preds = %for.body7.preheader, %for.body7
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ]
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !20
+  %2 = load i32* %arrayidx9, align 4, !dbg !20, !tbaa !22
+  %idxprom10 = sext i32 %2 to i64, !dbg !20
+  %arrayidx11 = getelementptr inbounds i32* %B, i64 %idxprom10, !dbg !20
+  %3 = load i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  %inc12 = add nsw i32 %3, 1, !dbg !20
+  store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !16
+  %exitcond = icmp eq i32 %lftr.wideiv, %number, !dbg !16
+  br i1 %exitcond, label %for.end15.loopexit, label %for.body7, !dbg !16, !llvm.loop !18
+
+for.end15.loopexit:                               ; preds = %for.body7
+  br label %for.end15
+
+for.end15:                                        ; preds = %for.end15.loopexit, %entry, %for.cond5.preheader
+  ret void, !dbg !26
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"no_array_bounds.cpp", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z4testPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ]
+!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
+!9 = metadata !{metadata !"clang version 3.5.0"}
+!10 = metadata !{i32 4, i32 8, metadata !11, null}
+!11 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !12, metadata !13}
+!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!14 = metadata !{i32 5, i32 5, metadata !15, null}
+!15 = metadata !{metadata !"0xb\004\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 9, i32 8, metadata !17, null}
+!17 = metadata !{metadata !"0xb\009\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !18, metadata !13, metadata !19}
+!19 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+!20 = metadata !{i32 10, i32 5, metadata !21, null}
+!21 = metadata !{metadata !"0xb\009\0036\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{metadata !23, metadata !23, i64 0}
+!23 = metadata !{metadata !"int", metadata !24, i64 0}
+!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 12, i32 1, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index 295fcab..5c721a680 100644
--- a/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 @a = common global [128 x i32] zeroinitializer, align 16
 
 ;; Must not vectorize division reduction. Division is lossy.
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index e572d1a..1275915 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; int __attribute__((noinline)) sum_array(int *A, int n) {
 ;  return std::accumulate(A, A + n, 0);
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 1f891ad..bcd29c1 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,4 +1,7 @@
-; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+
+; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
+; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 52b4285..c989c6b 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -1,7 +1,8 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
 
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
-; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
+; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
+; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
 
 ; CHECK: _Z11test_switchPii
 ; CHECK-NOT: x i32>
@@ -58,28 +59,28 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"source.cpp", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
+!4 = metadata !{metadata !"0x2e\00test_switch\00test_switch\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test_switch]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5.0"}
 !10 = metadata !{i32 3, i32 8, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
+!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !12, metadata !13, metadata !13}
 !13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
 !14 = metadata !{i32 4, i32 5, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
+!15 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{metadata !17, metadata !17, i64 0}
 !17 = metadata !{metadata !"int", metadata !18, i64 0}
 !18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
 !19 = metadata !{metadata !"Simple C/C++ TBAA"}
 !20 = metadata !{i32 6, i32 7, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
+!21 = metadata !{metadata !"0xb\004\0018\000", metadata !1, metadata !15} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 7, i32 5, metadata !21, null}
 !23 = metadata !{i32 9, i32 7, metadata !21, null}
 !24 = metadata !{i32 14, i32 1, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
index c3c81b6..e9f4c5f 100644
--- a/test/Transforms/LoopVectorize/nofloat.ll
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
 
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 0c54a2b..b03d4f0 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
index e5fad14..68d9933 100644
--- a/test/Transforms/LoopVectorize/nsw-crash.ll
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4
 
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll
index 27030a2..a9be80f 100644
--- a/test/Transforms/LoopVectorize/opt.ll
+++ b/test/Transforms/LoopVectorize/opt.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
-; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
+; RUN: opt -S -O3 -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC %s
+; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
index 15983f0..3fb38fe 100644
--- a/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index fc8f0a5..2f7a96a 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 791fce1..5e6b7fa 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 65ef95d..da02d01 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll
index f803120..13172bb 100644
--- a/test/Transforms/LoopVectorize/reverse_iter.ll
+++ b/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 6c86561..34bbe52 100644
--- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 ; Check vectorization that would ordinarily require a runtime bounds
 ; check on the pointers when mixing address spaces. For now we cannot
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 212b37c..56f1f99 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 ; Artificial datalayout
 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 01e28bc..9d02a6a 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: br
 ;CHECK: br
 ;CHECK: getelementptr
-;CHECK-NEXT: getelementptr
+;CHECK-DAG: getelementptr
 ;CHECK-DAG: icmp uge
 ;CHECK-DAG: icmp uge
 ;CHECK-DAG: icmp uge
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index d15479d..1edafb4 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index 7370a6f..324949d 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
index c950860..f853afd 100644
--- a/test/Transforms/LoopVectorize/safegep.ll
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1  < %s |  FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1  < %s |  FileCheck %s
 target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 
 
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
index d623a34..d19458f 100644
--- a/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index 257c7be..6b37cc2 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 683621a..1bce3f8 100644
--- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx"
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
index 83f35ff..8bf680a 100644
--- a/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 49ce5c5..1d30102 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 8f675af..cc47494 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index e53c120..6d3d113 100644
--- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index 75beae8..cf6f325 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/tbaa-nodep.ll b/test/Transforms/LoopVectorize/tbaa-nodep.ll
new file mode 100644
index 0000000..5cd104c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/tbaa-nodep.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s  -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %conv = fptosi float %0 to i32
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+
+; TBAA partitions the accesses in this loop, so it can be vectorized without
+; runtime checks.
+
+; CHECK-LABEL: @test1
+; CHECK: entry:
+; CHECK-NEXT: br label %vector.body
+; CHECK: vector.body:
+
+; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test1
+; CHECK-NOTBAA: icmp uge i32*
+
+; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !4
+  %conv = sitofp i32 %1 to float
+  %mul = fmul float %0, %conv
+  %arrayidx4 = getelementptr inbounds float* %c, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+
+; This test is like the first, except here there is still one runtime check
+; required. Without TBAA, however, two checks are required.
+
+; CHECK-LABEL: @test2
+; CHECK: icmp uge float*
+; CHECK: icmp uge float*
+; CHECK-NOT: icmp uge i32*
+
+; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test2
+; CHECK-NOTBAA: icmp uge float*
+; CHECK-NOTBAA: icmp uge float*
+; CHECK-NOTBAA-DAG: icmp uge float*
+; CHECK-NOTBAA-DAG: icmp uge i32*
+
+; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"float", metadata !2, i64 0}
+!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"int", metadata !2, i64 0}
+
diff --git a/test/Transforms/LoopVectorize/undef-inst-bug.ll b/test/Transforms/LoopVectorize/undef-inst-bug.ll
index ed60e80..0444fe8 100644
--- a/test/Transforms/LoopVectorize/undef-inst-bug.ll
+++ b/test/Transforms/LoopVectorize/undef-inst-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
index 89f4678..257b4e6 100644
--- a/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/unsized-pointee-crash.ll b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
new file mode 100644
index 0000000..5cc9837
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry:
+  br label %for.body
+
+for.body:
+  %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ]
+  %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = bitcast i32 (...)* %b.05 to i8*
+  %add.ptr = getelementptr i8* %0, i64 1
+  %1 = bitcast i8* %add.ptr to i32 (...)*
+; CHECK:      %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8* %[[cst]], i64 1
+  %inc = add nsw i32 %a.04, 1
+  %exitcond = icmp eq i32 %a.04, 63
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index 6b06afa..7fb9095 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
index f646567..b0fe7a5 100644
--- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll
+++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ; Loop from "rotated"
diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll
index 92ec24f..556da45 100644
--- a/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/test/Transforms/LoopVectorize/vect.stats.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ;
@@ -62,4 +62,4 @@ for.body:
 
 for.end:
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 47de13d..cee4b16 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -70,7 +70,7 @@ attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "
 
 ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
 ; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
+; CHECK: !2 = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
 ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
 !0 = metadata !{metadata !0, metadata !1}
diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll
index 51d20e2..7ac2fca 100644
--- a/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s
+; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index 71a9cd0..2f100de 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll b/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll
new file mode 100644
index 0000000..c6cddf6
--- /dev/null
+++ b/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+define i32 @test(i32 %arg) #0 {
+; CHECK-LABEL: @test
+; CHECK: ; <label>:2
+; CHECK-NEXT:  %res.0 = phi i32 [ 1, %NodeBlock ], [ 2, %1 ]
+; CHECK-NEXT:  br label %3
+; CHECK: ; <label>:5
+; CHECK-NEXT:   %res.3 = phi i32 [ 0, %NewDefault ], [ %res.2, %4 ]
+; CHECK-NEXT:   %6 = add nsw i32 %res.3, 1
+; CHECK-NEXT:   ret i32 %6
+
+  switch i32 %arg, label %5 [
+    i32 1, label %1
+    i32 2, label %2
+    i32 3, label %3
+    i32 4, label %4
+  ]
+
+; <label>:1
+  br label %2
+
+; <label>:2
+  %res.0 = phi i32 [ 1, %0 ], [ 2, %1 ]
+  br label %3
+
+; <label>:3
+  %res.1 = phi i32 [ 0, %0 ], [ %res.0, %2 ]
+  %phitmp = add nsw i32 %res.1, 2
+  br label %4
+
+; <label>:4
+  %res.2 = phi i32 [ 1, %0 ], [ %phitmp, %3 ]
+  br label %5
+
+; <label>:5
+  %res.3 = phi i32 [ 0, %0 ], [ %res.2, %4 ]
+  %6 = add nsw i32 %res.3, 1
+  ret i32 %6
+}
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index ea0d515..ea581d1 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
+; RUN: opt < %s -O3 -S | grep volatile | count 3
 ; PR1520
 ; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
 
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 33eaed6..b2d094f 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -7,13 +7,13 @@ entry:
   %retval = alloca double                         ; <double*> [#uses=2]
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
-; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]])
-; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]])
+  call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0, metadata !{}), !dbg !8
+; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
+; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
 ; CHECK: ![[IVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [i]
 ; CHECK: ![[JVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [j]
   store i32 %i, i32* %i_addr
-  call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
+  call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9, metadata !{}), !dbg !8
   store double %j, double* %j_addr
   %1 = load i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
   %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
@@ -30,23 +30,23 @@ return:                                           ; preds = %entry
   ret double %retval1, !dbg !10
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !12, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x101\00i\002\000", metadata !1, metadata !2, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{metadata !"0x2e\00testfunc\00testfunc\00testfunc\002\000\001\000\006\000\000\002", metadata !12, metadata !2, metadata !4, null, double (i32, double)* @testfunc, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{metadata !"0x29", metadata !12} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", metadata !12, metadata !13, metadata !13, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !12, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x24\00double\000\0064\0064\000\000\004", metadata !12, metadata !2} ; [ DW_TAG_base_type ]
+!7 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", metadata !12, metadata !2} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 0, metadata !1, null}
-!9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{metadata !"0x101\00j\002\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !12, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{metadata !"0xb\002\000\000", metadata !12, metadata !1} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
 !13 = metadata !{i32 0}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 32acdd6..b7b9dc7 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @foo(i32, i64, i8*)
 
@@ -11,14 +11,14 @@ entry:
   %z_addr.i = alloca i8*                          ; <i8**> [#uses=2]
   %a_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i32* %a_addr}, metadata !0), !dbg !7
+  call void @llvm.dbg.declare(metadata !{i32* %a_addr}, metadata !0, metadata !{}), !dbg !7
   store i32 %a, i32* %a_addr
   %0 = load i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32* %x_addr.i}, metadata !9) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata !{i32* %x_addr.i}, metadata !9, metadata !{}) nounwind, !dbg !15
   store i32 %0, i32* %x_addr.i
-  call void @llvm.dbg.declare(metadata !{i64* %y_addr.i}, metadata !16) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata !{i64* %y_addr.i}, metadata !16, metadata !{}) nounwind, !dbg !15
   store i64 55, i64* %y_addr.i
-  call void @llvm.dbg.declare(metadata !{i8** %z_addr.i}, metadata !17) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata !{i8** %z_addr.i}, metadata !17, metadata !{}) nounwind, !dbg !15
   store i8* bitcast (void (i32)* @baz to i8*), i8** %z_addr.i
   %1 = load i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
   %2 = load i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
@@ -32,26 +32,26 @@ return:                                           ; preds = %entry
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!22}
-!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x101\00a\008\000", metadata !1, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{metadata !"0x2e\00baz\00baz\00baz\008\000\001\000\006\000\000\008", metadata !20, metadata !2, metadata !4, null, void (i32)* @baz, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{metadata !"0x29", metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", metadata !20, metadata !21, metadata !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", metadata !20, metadata !2} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
-!9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{metadata !"0x101\00x\004\000", metadata !10, metadata !2, metadata !6} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{metadata !"0x2e\00bar\00bar\00bar\004\001\001\000\006\000\000\004", metadata !20, metadata !2, metadata !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786447, metadata !20, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{metadata !"0x24\00long int\000\0064\0064\000\000\005", metadata !20, metadata !2} ; [ DW_TAG_base_type ]
+!14 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", metadata !20, metadata !2, null} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
-!16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{metadata !"0x101\00y\004\000", metadata !10, metadata !2, metadata !13} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{metadata !"0x101\00z\004\000", metadata !10, metadata !2, metadata !14} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
 !20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
 !21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index d124be5..00ac34d 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -6,7 +6,7 @@ target triple = "i386-pc-linux-gnu"
 
 %0 = type { x86_fp80, x86_fp80 }
 
-define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
+define internal fastcc void @initialize(%0* noalias nocapture sret %agg.result) nounwind {
 entry:
   %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
   store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
@@ -15,7 +15,7 @@ entry:
   ret void
 }
 
-declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0* nocapture) nounwind
 
 define fastcc void @badly_optimized() nounwind {
 entry:
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 597b69d..6982c8b 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 %a = type { i32 }
 %b = type { float }
 
-declare void @g(%a*)
+declare void @g(%a* nocapture)
 
 define float @f() {
 entry:
diff --git a/test/Transforms/MemCpyOpt/callslot_deref.ll b/test/Transforms/MemCpyOpt/callslot_deref.ll
new file mode 100644
index 0000000..4d51552
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; all bytes of %dst that are touch by the memset are dereferenceable
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+; CHECK-LABEL: @must_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}
+
+; memset touch more bytes than those guaranteed to be dereferenceable
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+; CHECK-LABEL: @must_not_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/capturing-func.ll b/test/Transforms/MemCpyOpt/capturing-func.ll
new file mode 100644
index 0000000..17614fd
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/capturing-func.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+
+target datalayout = "e"
+
+declare void @foo(i8*)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @test() {
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @foo(i8* %ptr2)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i32 1, i1 false)
+  call void @foo(i8* %ptr1)
+  ret void
+
+  ; Check that the transformation isn't applied if the called function can
+  ; capture the pointer argument (i.e. the nocapture attribute isn't present)
+  ; CHECK-LABEL: @test(
+  ; CHECK: call void @foo(i8* %ptr2)
+  ; CHECK-NEXT: call void @llvm.memcpy
+  ; CHECK-NEXT: call void @foo(i8* %ptr1)
+}
diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll
index 89eabca..d4a700d 100644
--- a/test/Transforms/MemCpyOpt/loadstore-sret.ll
+++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -22,4 +22,4 @@ _ZNSt8auto_ptrIiED1Ev.exit:
   ret void
 }
 
-declare void @_Z3barv(%"class.std::auto_ptr"* sret)
+declare void @_Z3barv(%"class.std::auto_ptr"* nocapture sret)
diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
new file mode 100644
index 0000000..6263176
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -memcpyopt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.lifetime.end(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @foo(
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false)
+  %out.cast = bitcast [8 x i64]* %out to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.lifetime.end(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @bar(
+; CHECK:         %[[a:[^=]+]] = alloca [8 x i64]
+; CHECK:         %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[sret_cast]], i8* %[[a_cast]], i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32
+; CHECK:         %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 492c453..ee04f19 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -29,7 +29,7 @@ entry:
 ; CHECK: ret void
 }
 
-declare void @ccoshl(%0* sret , x86_fp80, x86_fp80) nounwind 
+declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind 
 
 
 ; The intermediate alloca and one of the memcpy's should be eliminated, the
@@ -202,7 +202,7 @@ define void @test10(%opaque* noalias nocapture sret %x, i32 %y) {
   ret void
 }
 
-declare void @f1(%struct.big* sret)
+declare void @f1(%struct.big* nocapture sret)
 declare void @f2(%struct.big*)
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index 1bbb5fe..bfe5e0f 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -25,6 +25,6 @@ entry:
   ret void
 }
 
-declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
+declare void @ccoshl(%0* noalias nocapture sret, %0* byval) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
new file mode 100644
index 0000000..9878b47
--- /dev/null
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -0,0 +1,91 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+define i8 @call_with_range() {
+  bitcast i8 0 to i8 ; dummy to make the function large enough
+  %out = call i8 @dummy(), !range !0
+  ret i8 %out
+}
+
+define i8 @call_no_range() {
+; CHECK-LABEL: @call_no_range
+; CHECK-NEXT: bitcast i8 0 to i8
+; CHECK-NEXT: %out = call i8 @dummy()
+; CHECK-NEXT: ret i8 %out
+  bitcast i8 0 to i8
+  %out = call i8 @dummy()
+  ret i8 %out
+}
+
+define i8 @call_different_range() {
+; CHECK-LABEL: @call_different_range
+; CHECK-NEXT: bitcast i8 0 to i8
+; CHECK-NEXT: %out = call i8 @dummy(), !range !1
+; CHECK-NEXT: ret i8 %out
+  bitcast i8 0 to i8
+  %out = call i8 @dummy(), !range !1
+  ret i8 %out
+}
+
+define i8 @invoke_with_range() {
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } personality i8* undef cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @invoke_no_range() {
+; CHECK-LABEL: @invoke_no_range()
+; CHECK-NEXT: invoke i8 @dummy
+  %out = invoke i8 @dummy() to label %next unwind label %lpad
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } personality i8* undef cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @invoke_different_range() {
+; CHECK-LABEL: @invoke_different_range()
+; CHECK-NEXT: invoke i8 @dummy
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !1
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } personality i8* undef cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @call_same_range() {
+; CHECK-LABEL: @call_same_range
+; CHECK: tail call i8 @call_with_range
+  bitcast i8 0 to i8
+  %out = call i8 @dummy(), !range !0
+  ret i8 %out
+}
+
+define i8 @invoke_same_range() {
+; CHECK-LABEL: @invoke_same_range()
+; CHECK: tail call i8 @invoke_with_range()
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } personality i8* undef cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+declare i8 @dummy();
+declare i32 @__gxx_personality_v0(...)
+
+!0 = metadata !{i8 0, i8 2}
+!1 = metadata !{i8 5, i8 7}
+\ No newline at end of file
diff --git a/test/Transforms/MergeFunc/vector-GEP-crash.ll b/test/Transforms/MergeFunc/vector-GEP-crash.ll
new file mode 100644
index 0000000..a1eefa0
--- /dev/null
+++ b/test/Transforms/MergeFunc/vector-GEP-crash.ll
@@ -0,0 +1,12 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to cause a crash when compairing the GEPs
+
+define void @foo(<2 x i64*>) {
+  %tmp = getelementptr <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}
+
+define void @bar(<2 x i64*>) {
+  %tmp = getelementptr <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index 6297af6..4010f31 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-pc-linux-gnu"
 @func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
 @global_3_xxx = common global i32 0, align 4
 
-@func_7_xxx = alias weak i32 (...)* @aliased_func_7_xxx
+@func_7_xxx = weak alias i32 (...)* @aliased_func_7_xxx
 
 define i32 @aliased_func_7_xxx(...) {
   ret i32 0
diff --git a/test/Transforms/ObjCARC/allocas.ll b/test/Transforms/ObjCARC/allocas.ll
index 7347a8f..d2e7841 100644
--- a/test/Transforms/ObjCARC/allocas.ll
+++ b/test/Transforms/ObjCARC/allocas.ll
@@ -23,7 +23,7 @@ declare i8* @returner2()
 declare void @bar(i32 ()*)
 declare void @use_alloca(i8**)
 
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 885935c..a1ee956 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -22,7 +22,7 @@ declare void @invokee()
 declare i8* @returner()
 declare void @bar(i32 ()*)
 
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
@@ -2679,8 +2679,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
 invoke.cont:
   %0 = bitcast {}* %self to i8*
   %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
-  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0, metadata !{})
+  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0, metadata !{})
   %ivar = load i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr = getelementptr i8* %0, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to float*
@@ -3012,7 +3012,7 @@ define void @test67(i8* %x) {
 !llvm.module.flags = !{!1}
 
 !0 = metadata !{}
-!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 79e300c..03af93e 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -41,10 +41,10 @@ entry:
   %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
 ; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
   %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
-  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12, metadata !{}), !dbg !37
 ; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
   %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
-  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25, metadata !{}), !dbg !39
   invoke fastcc void @ThrowFunc(i8* %call)
           to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
 
@@ -58,7 +58,7 @@ lpad:                                             ; preds = %entry
           catch i8* null, !dbg !40
   %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
   %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
-  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21, metadata !{}), !dbg !46
   call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
 ; CHECK: call void @objc_release(i8* %call)
   call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
@@ -72,7 +72,7 @@ if.end:                                           ; preds = %lpad, %eh.cont
   ret i32 0, !dbg !54
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
@@ -87,7 +87,7 @@ declare void @objc_exception_rethrow()
 define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
 entry:
   %tmp = call i8* @objc_retain(i8* %obj) nounwind
-  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32, metadata !{}), !dbg !55
   %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
   %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
   %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
@@ -102,7 +102,7 @@ declare void @objc_release(i8*) nonlazybind
 
 declare void @NSLog(i8*, ...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { ssp uwtable }
 ; CHECK: attributes #1 = { nounwind readnone }
@@ -113,37 +113,37 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33, !34, !35, !36, !61}
 
-!0 = metadata !{i32 786449, metadata !60, i32 16, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!0 = metadata !{metadata !"0x11\0016\00clang version 3.3 \001\00\002\00\000", metadata !60, metadata !1, metadata !1, metadata !3, metadata !1, null} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !27}
-!5 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
-!6 = metadata !{i32 786473, metadata !60} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !"0x2e\00main\00main\00\009\000\001\000\006\000\001\0010", metadata !60, metadata !6, metadata !7, null, i32 ()* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{metadata !"0x29", metadata !60} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
 !11 = metadata !{metadata !12, metadata !21, metadata !25}
-!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
-!13 = metadata !{i32 786443, metadata !60, metadata !5, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!14 = metadata !{i32 786454, metadata !60, null, metadata !"id", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
-!15 = metadata !{i32 786447, metadata !60, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
+!12 = metadata !{metadata !"0x100\00obj\0011\000", metadata !13, metadata !6, metadata !14} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = metadata !{metadata !"0xb\0010\000\000", metadata !60, metadata !5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{metadata !"0x16\00id\0011\000\000\000\000", metadata !60, null, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", metadata !60, null, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{metadata !"0x13\00objc_object\000\000\000\000\000\000", metadata !60, null, null, metadata !17, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !17 = metadata !{metadata !18}
-!18 = metadata !{i32 786445, metadata !60, metadata !16, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!19 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
-!22 = metadata !{i32 786443, metadata !60, metadata !13, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!23 = metadata !{i32 786454, metadata !60, null, metadata !"BOOL", i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
-!24 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
-!26 = metadata !{i32 786443, metadata !60, metadata !22, i32 14, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!27 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
-!28 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{metadata !"0xd\00isa\000\0064\000\000\000", metadata !60, metadata !16, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{metadata !"0xf\00\000\0064\000\000\000", null, null, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{metadata !"0x13\00objc_class\000\000\000\000\004\000", metadata !60, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!21 = metadata !{metadata !"0x100\00ok\0013\000", metadata !22, metadata !6, metadata !23} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = metadata !{metadata !"0xb\0012\000\001", metadata !60, metadata !13} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{metadata !"0x16\00BOOL\0062\000\000\000\000", metadata !60, null, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{metadata !"0x24\00signed char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{metadata !"0x100\00obj2\0015\000", metadata !26, metadata !6, metadata !14} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = metadata !{metadata !"0xb\0014\000\002", metadata !60, metadata !22} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{metadata !"0x2e\00ThrowFunc\00ThrowFunc\00\004\001\001\000\006\00256\001\005", metadata !60, metadata !6, metadata !28, null, void (i8*)* @ThrowFunc, null, null, metadata !30} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !14}
 !30 = metadata !{metadata !31}
 !31 = metadata !{metadata !32}
-!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!32 = metadata !{metadata !"0x101\00obj\0016777220\000", metadata !27, metadata !6, metadata !14} ; [ DW_TAG_arg_variable ] [obj] [line 4]
 !33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
@@ -152,23 +152,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !38 = metadata !{}
 !39 = metadata !{i32 15, i32 0, metadata !26, null}
 !40 = metadata !{i32 17, i32 0, metadata !41, null}
-!41 = metadata !{i32 786443, metadata !60, metadata !26, i32 16, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!41 = metadata !{metadata !"0xb\0016\000\003", metadata !60, metadata !26} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !42 = metadata !{i32 22, i32 0, metadata !26, null}
 !43 = metadata !{i32 23, i32 0, metadata !22, null}
 !44 = metadata !{i32 19, i32 0, metadata !41, null}
 !45 = metadata !{i8 0}
 !46 = metadata !{i32 20, i32 0, metadata !47, null}
-!47 = metadata !{i32 786443, metadata !60, metadata !48, i32 19, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!48 = metadata !{i32 786443, metadata !60, metadata !26, i32 19, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!47 = metadata !{metadata !"0xb\0019\000\005", metadata !60, metadata !48} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{metadata !"0xb\0019\000\004", metadata !60, metadata !26} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !49 = metadata !{i32 21, i32 0, metadata !47, null}
 !50 = metadata !{i32 24, i32 0, metadata !51, null}
-!51 = metadata !{i32 786443, metadata !60, metadata !22, i32 23, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!51 = metadata !{metadata !"0xb\0023\000\006", metadata !60, metadata !22} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !52 = metadata !{i32 25, i32 0, metadata !51, null}
 !53 = metadata !{i32 27, i32 0, metadata !13, null}
 !54 = metadata !{i32 28, i32 0, metadata !13, null}
 !55 = metadata !{i32 4, i32 0, metadata !27, null}
 !56 = metadata !{i32 6, i32 0, metadata !57, null}
-!57 = metadata !{i32 786443, metadata !60, metadata !27, i32 5, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!57 = metadata !{metadata !"0xb\005\000\007", metadata !60, metadata !27} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !58 = metadata !{i32 7, i32 0, metadata !57, null}
 !60 = metadata !{metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997"}
-!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/ObjCARC/provenance.ll b/test/Transforms/ObjCARC/provenance.ll
new file mode 100644
index 0000000..937c689
--- /dev/null
+++ b/test/Transforms/ObjCARC/provenance.ll
@@ -0,0 +1,52 @@
+; RUN: opt -disable-output -pa-eval %s 2>&1 | FileCheck %s
+
+@"\01l_objc_msgSend_fixup_" = global i8 0
+@g1 = global i8 0, section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
+@g2 = global i8 0, section "__DATA, __objc_classrefs, regular, no_dead_strip"
+@g3 = global i8 0, section "__DATA, __objc_superrefs, regular, no_dead_strip"
+@g4 = global i8 0, section "__TEXT,__objc_methname,cstring_literals"
+@g5 = global i8 0, section "__TEXT,__cstring,cstring_literals"
+
+declare void @g(i8)
+
+define void @f(i8* %a, i8** %b, i8** %c) {
+  %y1 = load i8* %a
+  call void @g(i8 %y1)
+
+  %y2 = load i8** %b
+  %y3 = load i8** %c
+
+  %x0 = load i8* @"\01l_objc_msgSend_fixup_"
+  call void @g(i8 %x0)
+
+  %x1 = load i8* @g1
+  call void @g(i8 %x1)
+
+  %x2 = load i8* @g2
+  call void @g(i8 %x2)
+
+  %x3 = load i8* @g3
+  call void @g(i8 %x3)
+
+  %x4 = load i8* @g4
+  call void @g(i8 %x4)
+
+  %x5 = load i8* @g5
+  call void @g(i8 %x5)
+  ret void
+}
+
+; CHECK: y1 and y2 are related.
+; CHECK: y1 and y3 are related.
+; CHECK: y2 and y3 are related.
+; CHECK: x0 and y1 are not related.
+; CHECK: x0 and y2 are not related.
+; CHECK: x0 and y3 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y1 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y2 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y3 are not related.
+; CHECK: x1 and y1 are not related.
+; CHECK: x2 and y1 are not related.
+; CHECK: x3 and y1 are not related.
+; CHECK: x4 and y1 are not related.
+; CHECK: x5 and y1 are not related.
diff --git a/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
new file mode 100644
index 0000000..34cd672
--- /dev/null
+++ b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -partially-inline-libcalls < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @sqrt()
+
+; CHECK-LABEL: @foo
+define i32 @foo() {
+  ; CHECK: call{{.*}}@sqrt
+  ; CHECK-NOT: call{{.*}}@sqrt
+  %r = call i32 @sqrt()
+  ret i32 %r
+}
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index f783955..ea86984 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -3,7 +3,7 @@
 define <4 x float> @test1() {
 ; CHECK-LABEL: test1
 ; CHECK-NEXT: %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
-; CHECK-NEXT: %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+; CHECK-NEXT: %tmp2 = fmul <4 x float> %tmp1, zeroinitializer
 ; CHECK-NEXT: ret <4 x float> %tmp2
 
   %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index d70bfcb..0194ce2 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -203,7 +203,7 @@ define i32 @test14(i32 %X1, i32 %X2) {
 
 ; CHECK-LABEL: @test14
 ; CHECK-NEXT: sub i32 %X1, %X2
-; CHECK-NEXT: mul i32 %tmp, 47
+; CHECK-NEXT: mul i32 %B2, 47
 ; CHECK-NEXT: ret i32
 }
 
diff --git a/test/Transforms/Reassociate/canonicalize-neg-const.ll b/test/Transforms/Reassociate/canonicalize-neg-const.ll
new file mode 100644
index 0000000..e85a963
--- /dev/null
+++ b/test/Transforms/Reassociate/canonicalize-neg-const.ll
@@ -0,0 +1,158 @@
+; RUN: opt -reassociate -gvn -S < %s | FileCheck %s
+
+; (x + 0.1234 * y) * (x + -0.1234 * y) -> (x + 0.1234 * y) * (x - 0.1234 * y)
+define double @test1(double %x, double %y) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double %mul
+
+  %mul = fmul double 1.234000e-01, %y
+  %add = fadd double %mul, %x
+  %mul1 = fmul double -1.234000e-01, %y
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; (x + -0.1234 * y) * (x + -0.1234 * y) -> (x - 0.1234 * y) * (x - 0.1234 * y)
+define double @test2(double %x, double %y) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double %mul
+
+  %mul = fmul double %y, -1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; (x + 0.1234 * y) * (x - -0.1234 * y) -> (x + 0.1234 * y) * (x + 0.1234 * y)
+define double @test3(double %x, double %y) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double
+
+  %mul = fmul double %y, 1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fsub double %x, %mul1
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; Canonicalize (x - -1234 * y)
+define i64 @test4(i64 %x, i64 %y) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: mul i64 %y, 1234
+; CHECK-NEXT: add i64 %mul, %x
+; CHECK-NEXT: ret i64 %sub
+
+  %mul = mul i64 %y, -1234
+  %sub = sub i64 %x, %mul
+  ret i64 %sub
+}
+
+; Canonicalize (x - -0.1234 * y)
+define double @test5(double %x, double %y) {
+; CHECK-LABEL: @test5
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: ret double
+
+  %mul = fmul double -1.234000e-01, %y
+  %sub = fsub double %x, %mul
+  ret double %sub
+}
+
+; Don't modify (-0.1234 * y - x)
+define double @test6(double %x, double %y) {
+; CHECK-LABEL: @test6
+; CHECK-NEXT: fmul double %y, -1.234000e-01
+; CHECK-NEXT: fsub double %mul, %x
+; CHECK-NEXT: ret double %sub
+
+  %mul = fmul double -1.234000e-01, %y
+  %sub = fsub double %mul, %x
+  ret double %sub
+}
+
+; Canonicalize (-0.1234 * y + x) -> (x - 0.1234 * y)
+define double @test7(double %x, double %y) {
+; CHECK-LABEL: @test7
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: ret double %add
+
+  %mul = fmul double -1.234000e-01, %y
+  %add = fadd double %mul, %x
+  ret double %add
+}
+
+; Canonicalize (y * -0.1234 + x) -> (x - 0.1234 * y)
+define double @test8(double %x, double %y) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: ret double %add
+
+  %mul = fmul double %y, -1.234000e-01
+  %add = fadd double %mul, %x
+  ret double %add
+}
+
+; Canonicalize (x - -0.1234 / y)
+define double @test9(double %x, double %y) {
+; CHECK-LABEL: @test9
+; CHECK-NEXT: fdiv double 1.234000e-01, %y
+; CHECK-NEXT: fadd double %x, %div
+; CHECK-NEXT: ret double
+
+  %div = fdiv double -1.234000e-01, %y
+  %sub = fsub double %x, %div
+  ret double %sub
+}
+
+; Don't modify (-0.1234 / y - x)
+define double @test10(double %x, double %y) {
+; CHECK-LABEL: @test10
+; CHECK-NEXT: fdiv double -1.234000e-01, %y
+; CHECK-NEXT: fsub double %div, %x
+; CHECK-NEXT: ret double %sub
+
+  %div = fdiv double -1.234000e-01, %y
+  %sub = fsub double %div, %x
+  ret double %sub
+}
+
+; Canonicalize (-0.1234 / y + x) -> (x - 0.1234 / y)
+define double @test11(double %x, double %y) {
+; CHECK-LABEL: @test11
+; CHECK-NEXT: fdiv double 1.234000e-01, %y
+; CHECK-NEXT: fsub double %x, %div
+; CHECK-NEXT: ret double %add
+
+  %div = fdiv double -1.234000e-01, %y
+  %add = fadd double %div, %x
+  ret double %add
+}
+
+; Canonicalize (y / -0.1234 + x) -> (x - y / 0.1234)
+define double @test12(double %x, double %y) {
+; CHECK-LABEL: @test12
+; CHECK-NEXT: fdiv double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %div
+; CHECK-NEXT: ret double %add
+
+  %div = fdiv double %y, -1.234000e-01
+  %add = fadd double %div, %x
+  ret double %add
+}
diff --git a/test/Transforms/Reassociate/commute.ll b/test/Transforms/Reassociate/commute.ll
new file mode 100644
index 0000000..760e51b
--- /dev/null
+++ b/test/Transforms/Reassociate/commute.ll
@@ -0,0 +1,19 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+declare void @use(i32)
+
+define void @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: test1
+; CHECK: mul i32 %y, %x
+; CHECK: mul i32 %y, %x
+; CHECK: sub i32 %1, %2
+; CHECK: call void @use(i32 %{{.*}})
+; CHECK: call void @use(i32 %{{.*}})
+
+  %1 = mul i32 %x, %y
+  %2 = mul i32 %y, %x
+  %3 = sub i32 %1, %2
+  call void @use(i32 %1)
+  call void @use(i32 %3)
+  ret void
+}
diff --git a/test/Transforms/Reassociate/fast-AgressiveSubMove.ll b/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
new file mode 100644
index 0000000..0c28ed1
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+define float @test1(float %A) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %X = fadd float %A, 1.000000e+00
+; CHECK-NEXT: %Y = fadd float %A, 1.000000e+00
+; CHECK-NEXT: %r = fsub float %X, %Y
+; CHECK-NEXT: ret float %r
+
+  %X = fadd float %A, 1.000000e+00
+  %Y = fadd float %A, 1.000000e+00
+  %r = fsub float %X, %Y
+  ret float %r
+}
+
+define float @test2(float %A) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: ret float 0.000000e+00
+
+  %X = fadd fast float 1.000000e+00, %A
+  %Y = fadd fast float 1.000000e+00, %A
+  %r = fsub fast float %X, %Y
+  ret float %r
+}
diff --git a/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
new file mode 100644
index 0000000..0109e4f
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+; Not marked as fast, so must not change.
+define float @test1(float %a0, float %a1, float %a2, float %a3, float %a4) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp.2 = fadd float %a3, %a4
+; CHECK-NEXT: %tmp.4 = fadd float %tmp.2, %a2
+; CHECK-NEXT: %tmp.6 = fadd float %tmp.4, %a1
+; CHECK-NEXT: %tmp.8 = fadd float %tmp.6, %a0
+; CHECK-NEXT: %tmp.11 = fadd float %a2, %a3
+; CHECK-NEXT: %tmp.13 = fadd float %tmp.11, %a1
+; CHECK-NEXT: %tmp.15 = fadd float %tmp.13, %a0
+; CHECK-NEXT: %tmp.18 = fadd float %a1, %a2
+; CHECK-NEXT: %tmp.20 = fadd float %tmp.18, %a0
+; CHECK-NEXT: %tmp.23 = fadd float %a0, %a1
+; CHECK-NEXT: %tmp.26 = fsub float %tmp.8, %tmp.15
+; CHECK-NEXT: %tmp.28 = fadd float %tmp.20, %tmp.26
+; CHECK-NEXT: %tmp.30 = fsub float %tmp.28, %tmp.23
+; CHECK-NEXT: %tmp.32 = fsub float %tmp.30, %a4
+; CHECK-NEXT: %tmp.34 = fsub float %tmp.32, %a2
+; CHECK-NEXT: %T = fmul float %tmp.34, %tmp.34
+; CHECK-NEXT: ret float %T
+
+  %tmp.2 = fadd float %a4, %a3
+  %tmp.4 = fadd float %tmp.2, %a2
+  %tmp.6 = fadd float %tmp.4, %a1
+  %tmp.8 = fadd float %tmp.6, %a0
+  %tmp.11 = fadd float %a3, %a2
+  %tmp.13 = fadd float %tmp.11, %a1
+  %tmp.15 = fadd float %tmp.13, %a0
+  %tmp.18 = fadd float %a2, %a1
+  %tmp.20 = fadd float %tmp.18, %a0
+  %tmp.23 = fadd float %a1, %a0
+  %tmp.26 = fsub float %tmp.8, %tmp.15
+  %tmp.28 = fadd float %tmp.26, %tmp.20
+  %tmp.30 = fsub float %tmp.28, %tmp.23
+  %tmp.32 = fsub float %tmp.30, %a4
+  %tmp.34 = fsub float %tmp.32, %a2
+  %T = fmul float %tmp.34, %tmp.34
+  ret float %T
+}
+
+; Should be able to eliminate everything.
+define float @test2(float %a0, float %a1, float %a2, float %a3, float %a4) {
+; CHECK-LABEL: test2
+; CHECK: ret float 0.000000e+00
+
+  %tmp.2 = fadd fast float %a4, %a3
+  %tmp.4 = fadd fast float %tmp.2, %a2
+  %tmp.6 = fadd fast float %tmp.4, %a1
+  %tmp.8 = fadd fast float %tmp.6, %a0
+  %tmp.11 = fadd fast float %a3, %a2
+  %tmp.13 = fadd fast float %tmp.11, %a1
+  %tmp.15 = fadd fast float %tmp.13, %a0
+  %tmp.18 = fadd fast float %a2, %a1
+  %tmp.20 = fadd fast float %tmp.18, %a0
+  %tmp.23 = fadd fast float %a1, %a0
+  %tmp.26 = fsub fast float %tmp.8, %tmp.15
+  %tmp.28 = fadd fast float %tmp.26, %tmp.20
+  %tmp.30 = fsub fast float %tmp.28, %tmp.23
+  %tmp.32 = fsub fast float %tmp.30, %a4
+  %tmp.34 = fsub fast float %tmp.32, %a2
+  %T = fmul fast float %tmp.34, %tmp.34
+  ret float %T
+}
diff --git a/test/Transforms/Reassociate/fast-MissedTree.ll b/test/Transforms/Reassociate/fast-MissedTree.ll
new file mode 100644
index 0000000..689fd6c
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-MissedTree.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+define float @test1(float %A, float %B) {
+; CHECK-LABEL: test1
+; CHECK: %Z = fadd fast float %A, %B
+; CHECK: ret float %Z
+	%W = fadd fast float %B, -5.0
+	%Y = fadd fast float %A, 5.0
+	%Z = fadd fast float %W, %Y
+	ret float %Z
+}
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
new file mode 100644
index 0000000..eeae096
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; Canonicalize operands, but don't optimize floating point vector operations.
+define <4 x float> @test1() {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+; CHECK-NEXT: %tmp2 = fmul fast <4 x float> %tmp1, zeroinitializer
+
+  %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+  %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
+  ret <4 x float> %tmp2
+}
+
+; Commute integer vector operations.
+define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = add <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = add <2 x i32> %x, %y
+  %tmp2 = add <2 x i32> %y, %x
+  %tmp3 = add <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %tmp1 = mul <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = mul <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = mul <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = mul <2 x i32> %x, %y
+  %tmp2 = mul <2 x i32> %y, %x
+  %tmp3 = mul <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test4
+; CHECK-NEXT: %tmp1 = and <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = and <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = and <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = and <2 x i32> %x, %y
+  %tmp2 = and <2 x i32> %y, %x
+  %tmp3 = and <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test5
+; CHECK-NEXT: %tmp1 = or <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = or <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = or <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = or <2 x i32> %x, %y
+  %tmp2 = or <2 x i32> %y, %x
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test6(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test6
+; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = xor <2 x i32> %x, %y
+  %tmp2 = xor <2 x i32> %y, %x
+  %tmp3 = xor <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
diff --git a/test/Transforms/Reassociate/fast-SubReassociate.ll b/test/Transforms/Reassociate/fast-SubReassociate.ll
new file mode 100644
index 0000000..db4191a
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-SubReassociate.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -reassociate -constprop -instcombine -S | FileCheck %s
+
+define float @test1(float %A, float %B) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %W = fadd float %B, 5.000000e+00
+; CHECK-NEXT: %X = fadd float %A, -7.000000e+00
+; CHECK-NEXT: %Y = fsub float %X, %W
+; CHECK-NEXT: %Z = fadd float %Y, 1.200000e+01
+; CHECK-NEXT: ret float %Z
+
+  %W = fadd float 5.0, %B
+  %X = fadd float -7.0, %A
+  %Y = fsub float %X, %W
+  %Z = fadd float %Y, 12.0
+  ret float %Z
+}
+
+; With sub reassociation, constant folding can eliminate all of the constants.
+define float @test2(float %A, float %B) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %Z = fsub fast float %A, %B
+; CHECK-NEXT: ret float %Z
+
+  %W = fadd fast float %B, 5.000000e+00
+  %X = fadd fast float %A, -7.000000e+00
+  %Y = fsub fast float %X, %W
+  %Z = fadd fast float %Y, 1.200000e+01
+  ret float %Z
+
+}
+
+define float @test3(float %A, float %B, float %C, float %D) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %M = fadd float %A, 1.200000e+01
+; CHECK-NEXT: %N = fadd float %M, %B
+; CHECK-NEXT: %O = fadd float %N, %C
+; CHECK-NEXT: %P = fsub float %D, %O
+; CHECK-NEXT: %Q = fadd float %P, 1.200000e+01
+; CHECK-NEXT: ret float %Q
+
+  %M = fadd float %A, 1.200000e+01
+  %N = fadd float %M, %B
+  %O = fadd float %N, %C
+  %P = fsub float %D, %O
+  %Q = fadd float %P, 1.200000e+01
+  ret float %Q
+}
+
+; With sub reassociation, constant folding can eliminate the two 12 constants.
+define float @test4(float %A, float %B, float %C, float %D) {
+; CHECK-LABEL: test4
+; CHECK-NEXT: %B.neg = fsub fast float -0.000000e+00, %B
+; CHECK-NEXT: %O.neg = fsub fast float %B.neg, %A
+; CHECK-NEXT: %P = fsub fast float %O.neg, %C
+; CHECK-NEXT: %Q = fadd fast float %P, %D
+; CHECK-NEXT: ret float %Q
+
+; FIXME: InstCombine should be able to get us to the following:
+; %sum = fadd fast float %B, %A
+; %sum1 = fadd fast float %sum, %C
+; %Q = fsub fast float %D, %sum1
+; ret i32 %Q
+
+  %M = fadd fast float 1.200000e+01, %A
+  %N = fadd fast float %M, %B
+  %O = fadd fast float %N, %C
+  %P = fsub fast float %D, %O
+  %Q = fadd fast float 1.200000e+01, %P
+  ret float %Q
+}
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
new file mode 100644
index 0000000..67b07f4
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -0,0 +1,285 @@
+; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
+
+; With reassociation, constant folding can eliminate the 12 and -12 constants.
+define float @test1(float %arg) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: fsub fast float -0.000000e+00, %arg
+; CHECK-NEXT: ret float
+
+  %tmp1 = fsub fast float -1.200000e+01, %arg
+  %tmp2 = fadd fast float %tmp1, 1.200000e+01
+  ret float %tmp2
+}
+
+define float @test2(float %reg109, float %reg1111) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fadd float %reg109, -3.000000e+01
+; CHECK-NEXT: fadd float %reg115, %reg1111
+; CHECK-NEXT: fadd float %reg116, 3.000000e+01
+; CHECK-NEXT: ret float
+
+  %reg115 = fadd float %reg109, -3.000000e+01
+  %reg116 = fadd float %reg115, %reg1111
+  %reg117 = fadd float %reg116, 3.000000e+01
+  ret float %reg117
+}
+
+define float @test3(float %reg109, float %reg1111) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %reg117 = fadd fast float %reg109, %reg1111
+; CHECK-NEXT:  ret float %reg117
+
+  %reg115 = fadd fast float %reg109, -3.000000e+01
+  %reg116 = fadd fast float %reg115, %reg1111
+  %reg117 = fadd fast float %reg116, 3.000000e+01
+  ret float %reg117
+}
+
+@fe = external global float
+@fa = external global float
+@fb = external global float
+@fc = external global float
+@ff = external global float
+
+define void @test4() {
+; CHECK-LABEL: @test4
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd fast float
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %A, %B
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = (a+b)+c;
+  store float %t2, float* @fe
+  ; f = (a+c)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define void @test5() {
+; CHECK-LABEL: @test5
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %A, %B
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = c+(a+b)
+  store float %t2, float* @fe
+  ; f = (c+a)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define void @test6() {
+; CHECK-LABEL: @test6
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %B, %A
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = c+(b+a)
+  store float %t2, float* @fe
+  ; f = (c+a)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define float @test7(float %A, float %B, float %C) {
+; CHECK-LABEL: @test7
+; CHECK-NEXT: fadd fast float %C, %B
+; CHECK-NEXT: fmul fast float %A, %A
+; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: ret float
+
+  %aa = fmul fast float %A, %A
+  %aab = fmul fast float %aa, %B
+  %ac = fmul fast float %A, %C
+  %aac = fmul fast float %ac, %A
+  %r = fadd fast float %aab, %aac
+  ret float %r
+}
+
+define float @test8(float %X, float %Y, float %Z) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT: fmul fast float %Y, %X
+; CHECK-NEXT: fsub fast float %Z
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 0.0, %X
+  %B = fmul fast float %A, %Y
+  ; (-X)*Y + Z -> Z-X*Y
+  %C = fadd fast float %B, %Z
+  ret float %C
+}
+
+define float @test9(float %X) {
+; CHECK-LABEL: @test9
+; CHECK-NEXT: fmul fast float %X, 9.400000e+01
+; CHECK-NEXT: ret float
+
+  %Y = fmul fast float %X, 4.700000e+01
+  %Z = fadd fast float %Y, %Y
+  ret float %Z
+}
+
+define float @test10(float %X) {
+; CHECK-LABEL: @test10
+; CHECK-NEXT: fmul fast float %X, 3.000000e+00
+; CHECK-NEXT: ret float
+
+  %Y = fadd fast float %X ,%X
+  %Z = fadd fast float %Y, %X
+  ret float %Z
+}
+
+define float @test11(float %W) {
+; CHECK-LABEL: test11
+; CHECK-NEXT: fmul fast float %W, 3.810000e+02
+; CHECK-NEXT: ret float
+
+  %X = fmul fast float %W, 127.0
+  %Y = fadd fast float %X ,%X
+  %Z = fadd fast float %Y, %X
+  ret float %Z
+}
+
+define float @test12(float %X) {
+; CHECK-LABEL: @test12
+; CHECK-NEXT: fmul fast float %X, -3.000000e+00
+; CHECK-NEXT: fadd fast float %factor, 6.000000e+00
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 1.000000e+00, %X
+  %B = fsub fast float 2.000000e+00, %X
+  %C = fsub fast float 3.000000e+00, %X
+  %Y = fadd fast float %A ,%B
+  %Z = fadd fast float %Y, %C
+  ret float %Z
+}
+
+define float @test13(float %X1, float %X2, float %X3) {
+; CHECK-LABEL: @test13
+; CHECK-NEXT: fsub fast float %X3, %X2
+; CHECK-NEXT: fmul fast float {{.*}}, %X1
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 0.000000e+00, %X1
+  %B = fmul fast float %A, %X2   ; -X1*X2
+  %C = fmul fast float %X1, %X3  ; X1*X3
+  %D = fadd fast float %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
+  ret float %D
+}
+
+define float @test14(float %X1, float %X2) {
+; CHECK-LABEL: @test14
+; CHECK-NEXT: fsub fast float %X1, %X2
+; CHECK-NEXT: fmul fast float %1, 4.700000e+01
+; CHECK-NEXT: ret float
+
+  %B = fmul fast float %X1, 47.   ; X1*47
+  %C = fmul fast float %X2, -47.  ; X2*-47
+  %D = fadd fast float %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
+  ret float %D
+}
+
+define float @test15(float %arg) {
+; CHECK-LABEL: test15
+; CHECK-NEXT: fmul fast float %arg, 1.440000e+02
+; CHECK-NEXT: ret float %tmp2
+
+  %tmp1 = fmul fast float 1.200000e+01, %arg
+  %tmp2 = fmul fast float %tmp1, 1.200000e+01
+  ret float %tmp2
+}
+
+; (b+(a+1234))+-a -> b+1234
+define float @test16(float %b, float %a) {
+; CHECK-LABEL: @test16
+; CHECK-NEXT: fadd fast float %b, 1.234000e+03
+; CHECK-NEXT: ret float
+
+  %1 = fadd fast float %a, 1234.0
+  %2 = fadd fast float %b, %1
+  %3 = fsub fast float 0.0, %a
+  %4 = fadd fast float %2, %3
+  ret float %4
+}
+
+; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
+
+define float @test17(float %a, float %b, float %z) {
+; CHECK-LABEL: test17
+; CHECK-NEXT: fmul fast float %a, 1.234500e+04
+; CHECK-NEXT: fmul fast float %e, %b
+; CHECK-NEXT: fmul fast float %f, %z
+; CHECK-NEXT: ret float
+
+  %c = fsub fast float 0.000000e+00, %z
+  %d = fmul fast float %a, %b
+  %e = fmul fast float %c, %d
+  %f = fmul fast float %e, 1.234500e+04
+  %g = fsub fast float 0.000000e+00, %f
+  ret float %g
+}
+
+define float @test18(float %a, float %b, float %z) {
+; CHECK-LABEL: test18
+; CHECK-NEXT: fmul fast float %a, 4.000000e+01
+; CHECK-NEXT: fmul fast float %e, %z
+; CHECK-NEXT: ret float
+
+  %d = fmul fast float %z, 4.000000e+01
+  %c = fsub fast float 0.000000e+00, %d
+  %e = fmul fast float %a, %c
+  %f = fsub fast float 0.000000e+00, %e
+  ret float %f
+}
+
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+define float @test19(float %A, float %B) {
+; CHECK-LABEL: @test19
+; CHECK-NEXT: fsub fast float %A, %B
+; CHECK-NEXT: ret float
+  %X = fadd fast float -1.200000e+01, %A
+  %Y = fsub fast float %X, %B
+  %Z = fadd fast float %Y, 1.200000e+01
+  ret float %Z
+}
+
+; With sub reassociation, constant folding can eliminate the uses of %a.
+define float @test20(float %a, float %b, float %c) nounwind  {
+; CHECK-LABEL: @test20
+; CHECK-NEXT: fsub fast float -0.000000e+00, %b
+; CHECK-NEXT: fsub fast float %b.neg, %c
+; CHECK-NEXT: ret float
+
+; FIXME: Should be able to generate the below, which may expose more
+;        opportunites for FAdd reassociation.
+; %sum = fadd fast float %c, %b
+; %tmp7 = fsub fast float 0, %sum
+
+  %tmp3 = fsub fast float %a, %b
+  %tmp5 = fsub fast float %tmp3, %c
+  %tmp7 = fsub fast float %tmp5, %a
+  ret float %tmp7
+}
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
new file mode 100644
index 0000000..ad89607
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -0,0 +1,44 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+declare void @use(float)
+
+define void @test1(float %x, float %y) {
+; CHECK-LABEL: test1
+; CHECK: fmul fast float %y, %x
+; CHECK: fmul fast float %y, %x
+; CHECK: fsub fast float %1, %2
+; CHECK: call void @use(float %{{.*}})
+; CHECK: call void @use(float %{{.*}})
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fsub fast float %1, %2
+  call void @use(float %1)
+  call void @use(float %3)
+  ret void
+}
+
+define float @test2(float %x, float %y) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: fmul fast float %y, %x
+; CHECK-NEXT: fmul fast float %y, %x
+; CHECK-NEXT: fsub fast float %1, %2
+; CHECK-NEXT: ret float %3
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fsub fast float %1, %2
+  ret float %3
+}
+
+define float @test3(float %x, float %y) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: ret float %tmp1
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fadd fast float %1, %2
+  ret float %3
+}
diff --git a/test/Transforms/Reassociate/fast-mightymul.ll b/test/Transforms/Reassociate/fast-mightymul.ll
new file mode 100644
index 0000000..98bdf7a
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-mightymul.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate -disable-output
+; PR13021
+
+define float @test2(float %x) {
+  %t0 = fmul fast float %x, %x
+  %t1 = fmul fast float %t0, %t0
+  %t2 = fmul fast float %t1, %t1
+  %t3 = fmul fast float %t2, %t2
+  %t4 = fmul fast float %t3, %t3
+  %t5 = fmul fast float %t4, %t4
+  %t6 = fmul fast float %t5, %t5
+  %t7 = fmul fast float %t6, %t6
+  %t8 = fmul fast float %t7, %t7
+  %t9 = fmul fast float %t8, %t8
+  %t10 = fmul fast float %t9, %t9
+  %t11 = fmul fast float %t10, %t10
+  %t12 = fmul fast float %t11, %t11
+  %t13 = fmul fast float %t12, %t12
+  %t14 = fmul fast float %t13, %t13
+  %t15 = fmul fast float %t14, %t14
+  %t16 = fmul fast float %t15, %t15
+  %t17 = fmul fast float %t16, %t16
+  %t18 = fmul fast float %t17, %t17
+  %t19 = fmul fast float %t18, %t18
+  %t20 = fmul fast float %t19, %t19
+  %t21 = fmul fast float %t20, %t20
+  %t22 = fmul fast float %t21, %t21
+  %t23 = fmul fast float %t22, %t22
+  %t24 = fmul fast float %t23, %t23
+  %t25 = fmul fast float %t24, %t24
+  %t26 = fmul fast float %t25, %t25
+  %t27 = fmul fast float %t26, %t26
+  %t28 = fmul fast float %t27, %t27
+  ret float %t28
+}
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
new file mode 100644
index 0000000..45e15c7
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+define float @fmultistep1(float %a, float %b, float %c) {
+; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+; CHECK-LABEL: @fmultistep1
+; CHECK-NEXT: fadd fast float %c, %b
+; CHECK-NEXT: fmul fast float %a, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: ret float
+
+  %t0 = fmul fast float %a, %b
+  %t1 = fmul fast float %a, %t0 ; a*(a*b)
+  %t2 = fmul fast float %a, %c
+  %t3 = fmul fast float %a, %t2 ; a*(a*c)
+  %t4 = fadd fast float %t1, %t3
+  ret float %t4
+}
+
+define float @fmultistep2(float %a, float %b, float %c, float %d) {
+; Check that a*b+a*c+d is turned into a*(b+c)+d.
+; CHECK-LABEL: @fmultistep2
+; CHECK-NEXT: fadd fast float %c, %b
+; CHECK-NEXT: fmul fast float %tmp, %a
+; CHECK-NEXT: fadd fast float %tmp1, %d
+; CHECK-NEXT: ret float
+
+  %t0 = fmul fast float %a, %b
+  %t1 = fmul fast float %a, %c
+  %t2 = fadd fast float %t1, %d ; a*c+d
+  %t3 = fadd fast float %t0, %t2 ; a*b+(a*c+d)
+  ret float %t3
+}
diff --git a/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll b/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll
new file mode 100644
index 0000000..f51c0c1
--- /dev/null
+++ b/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll
@@ -0,0 +1,18 @@
+; RUN: opt -reassociate %s -S | FileCheck %s
+
+define float @foo(float %a,float %b, float %c) {
+; CHECK: %mul3 = fmul float %a, %b
+; CHECK-NEXT: fmul fast float %c, 2.000000e+00
+; CHECK-NEXT: fadd fast float %factor, %b
+; CHECK-NEXT: fmul fast float %tmp1, %a
+; CHECK-NEXT: fadd fast float %tmp2, %mul3
+; CHECK-NEXT: ret float
+  %mul1 = fmul fast float %a, %c
+  %mul2 = fmul fast float %a, %b
+  %mul3 = fmul float %a, %b
+  %mul4 = fmul fast float %a, %c
+  %add1 = fadd fast  float %mul1, %mul3
+  %add2 = fadd  fast float %mul4, %mul2
+  %add3 = fadd fast float %add1, %add2
+  ret float %add3
+}
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index 12eaeee..c499646 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -9,7 +9,7 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
   %t3 = mul i64 %a, %t2 ; a*(a*c)
   %t4 = add i64 %t1, %t3
 ; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
 ; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
 ; CHECK-NEXT: ret
   ret i64 %t4
diff --git a/test/Transforms/Reassociate/negation1.ll b/test/Transforms/Reassociate/negation1.ll
new file mode 100644
index 0000000..34b943c
--- /dev/null
+++ b/test/Transforms/Reassociate/negation1.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+; Test that we can turn things like A*B + X - A*B -> X.
+
+define i32 @test1(i32 %a, i32 %b, i32 %x) {
+; CHECK-LABEL: test1
+; CHECK: ret i32 %x
+
+  %c = mul i32 %a, %b
+  %d = add i32 %c, %x
+  %c1 = mul i32 %a, %b
+  %f = sub i32 %d, %c1
+  ret i32 %f
+}
+
diff --git a/test/Transforms/Reassociate/pr21205.ll b/test/Transforms/Reassociate/pr21205.ll
new file mode 100644
index 0000000..fcc7150
--- /dev/null
+++ b/test/Transforms/Reassociate/pr21205.ll
@@ -0,0 +1,21 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+; PR21205
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Don't canonicalize %conv - undef into %conv + (-undef).
+; CHECK-LABEL: @test1
+; CHECK: %sub = fsub fast float %conv, undef
+; CHECK: %sub1 = fadd fast float %sub, -1.000000e+00
+
+define i32 @test1() {
+entry:
+  %0 = load i32* @a, align 4
+  %conv = sitofp i32 %0 to float
+  %sub = fsub fast float %conv, undef
+  %sub1 = fadd fast float %sub, -1.000000e+00
+  %conv2 = fptosi float %sub1 to i32
+  store i32 %conv2, i32* @b, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/Reassociate/wrap-flags.ll b/test/Transforms/Reassociate/wrap-flags.ll
new file mode 100644
index 0000000..e3304b6
--- /dev/null
+++ b/test/Transforms/Reassociate/wrap-flags.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -reassociate -dce -S | FileCheck %s
+; PR12985
+
+; Verify the nsw flags are preserved when converting shl to mul.
+
+; CHECK-LABEL: @shl_to_mul_nsw(
+; CHECK: %mul = mul i32 %i, -2147483648
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nsw(i32 %i) {
+entry:
+  %mul = shl nsw i32 %i, 31
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
+
+; CHECK-LABEL: @shl_to_mul_nuw(
+; CHECK: %mul = mul nuw i32 %i, 4
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nuw(i32 %i) {
+entry:
+  %mul = shl nuw i32 %i, 2
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
+
+; CHECK-LABEL: @shl_to_mul_nuw_nsw(
+; CHECK: %mul = mul nuw nsw i32 %i, 4
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nuw_nsw(i32 %i) {
+entry:
+  %mul = shl nuw nsw i32 %i, 2
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index c1c6c92..107b7af 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -227,3 +227,23 @@ entry:
 ; CHECK-LABEL: define internal i32 @test10b(
 ; CHECK: ret i32 undef
 }
+
+;;======================== test11
+
+define i64 @test11a() {
+  %xor = xor i64 undef, undef
+  ret i64 %xor
+; CHECK-LABEL: define i64 @test11a
+; CHECK: ret i64 0
+}
+
+define void @test11b() {
+  %call1 = call i64 @test11a()
+  %call2 = call i64 @llvm.ctpop.i64(i64 %call1)
+  ret void
+; CHECK-LABEL: define void @test11b
+; CHECK: %[[call1:.*]] = call i64 @test11a()
+; CHECK: %[[call2:.*]] = call i64 @llvm.ctpop.i64(i64 0)
+}
+
+declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll
new file mode 100644
index 0000000..4ee91a5
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -0,0 +1,75 @@
+; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%structA = type { [2 x float] }
+
+define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+; CHECK-LABEL: test1
+; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
+; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %5 = fsub fast <2 x float> %2, %4
+; CHECK: %6 = fmul fast <2 x float> %5, %5
+; CHECK: %7 = extractelement <2 x float> %6, i32 0
+; CHECK: %8 = extractelement <2 x float> %6, i32 1
+; CHECK: %add = fadd fast float %7, %8
+; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+
+entry:
+  br label %for.body3.lr.ph
+
+for.body3.lr.ph:
+  %conv5 = sitofp i32 %ymin to float
+  %conv = sitofp i32 %xmin to float
+  %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+  %0 = load float* %arrayidx4, align 4
+  %sub = fsub fast float %conv, %0
+  %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+  %1 = load float* %arrayidx9, align 4
+  %sub10 = fsub fast float %conv5, %1
+  %mul11 = fmul fast float %sub, %sub
+  %mul12 = fmul fast float %sub10, %sub10
+  %add = fadd fast float %mul11, %mul12
+  %cmp = fcmp oeq float %add, 0.000000e+00
+  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
+
+for.end27:
+  ret void
+}
+
+define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+; CHECK-LABEL: test2
+; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
+; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %5 = fsub fast <2 x float> %2, %4
+; CHECK: %6 = fmul fast <2 x float> %5, %5
+; CHECK: %7 = extractelement <2 x float> %6, i32 0
+; CHECK: %8 = extractelement <2 x float> %6, i32 1
+; CHECK: %add = fadd fast float %8, %7
+; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+
+entry:
+  br label %for.body3.lr.ph
+
+for.body3.lr.ph:
+  %conv5 = sitofp i32 %ymin to float
+  %conv = sitofp i32 %xmin to float
+  %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+  %0 = load float* %arrayidx4, align 4
+  %sub = fsub fast float %conv, %0
+  %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+  %1 = load float* %arrayidx9, align 4
+  %sub10 = fsub fast float %conv5, %1
+  %mul11 = fmul fast float %sub, %sub
+  %mul12 = fmul fast float %sub10, %sub10
+  %add = fadd fast float %mul12, %mul11         ;;;<---- Operands commuted!!
+  %cmp = fcmp oeq float %add, 0.000000e+00
+  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
+
+for.end27:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
new file mode 100644
index 0000000..45fa2f9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -basicaa -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; Holding a value live over a call boundary may require
+; spills and fills. This is the case for <2 x double>,
+; as it occupies a Q register of which there are no
+; callee-saves.
+ 
+; CHECK: load double
+; CHECK: load double
+; CHECK: call void @g
+; CHECK: store double
+; CHECK: store double
+define void @f(double* %p, double* %q) {
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  %x = load double* %p
+  %y = load double* %addr
+  call void @g()
+  store double %x, double* %q
+  store double %y, double* %addr2
+  ret void
+}
+declare void @g()
+
+; Check we deal with loops correctly.
+;
+; CHECK: store <2 x double>
+; CHECK: load <2 x double>
+define void @f2(double* %p, double* %q) {
+entry:
+  br label %loop
+
+loop:
+  %p1 = phi double [0.0, %entry], [%x, %loop]
+  %p2 = phi double [0.0, %entry], [%y, %loop]
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  store double %p1, double* %q
+  store double %p2, double* %addr2
+
+  %x = load double* %p
+  %y = load double* %addr
+  br label %loop
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
new file mode 100644
index 0000000..e49c7ad
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @test1
+; CHECK: load <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: sdiv <4 x i32>
+
+define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
+entry:
+  %0 = load i32* %b, align 4
+  %1 = load i32* %c, align 4
+  %add = add nsw i32 %1, %0
+  %div = sdiv i32 %add, 2
+  store i32 %div, i32* %a, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 1
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 1
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %3, %2
+  %div6 = sdiv i32 %add5, 2
+  %arrayidx7 = getelementptr inbounds i32* %a, i64 1
+  store i32 %div6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32* %b, i64 2
+  %4 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  %5 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %5, %4
+  %div11 = sdiv i32 %add10, 2
+  %arrayidx12 = getelementptr inbounds i32* %a, i64 2
+  store i32 %div11, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32* %b, i64 3
+  %6 = load i32* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32* %c, i64 3
+  %7 = load i32* %arrayidx14, align 4
+  %add15 = add nsw i32 %7, %6
+  %div16 = sdiv i32 %add15, 2
+  %arrayidx17 = getelementptr inbounds i32* %a, i64 3
+  store i32 %div16, i32* %arrayidx17, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll
index e0c75b1..899cfb1 100644
--- a/test/Transforms/SLPVectorizer/ARM/sroa.ll
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -5,11 +5,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 %class.Complex = type { double, double }
 
 ; Code like this is the result of SROA. Make sure we don't vectorize this
-; because the in the scalar version of this the shl/or are handled by the
+; because the scalar version of the shl/or are handled by the
 ; backend and disappear, the vectorized code stays.
 
 ; CHECK-LABEL: SROAed
-; CHECK-NOT: shl <2 x i64>
+; CHECK-NOT: shl nuw <2 x i64>
 ; CHECK-NOT: or <2 x i64>
 
 define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
index 8303bc8..174d400 100644
--- a/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -12,9 +12,9 @@ target triple = "x86_64-unknown-linux-gnu"
 @fa = common global [4 x float] zeroinitializer, align 16
 
 ; CHECK-LABEL: @addsub
-; CHECK: %5 = add <4 x i32> %3, %4
-; CHECK: %6 = add <4 x i32> %2, %5
-; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %5 = add nsw <4 x i32> %3, %4
+; CHECK: %6 = add nsw <4 x i32> %2, %5
+; CHECK: %7 = sub nsw <4 x i32> %2, %5
 ; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 
 ; Function Attrs: nounwind uwtable
@@ -56,9 +56,9 @@ entry:
 }
 
 ; CHECK-LABEL: @subadd
-; CHECK:  %5 = add <4 x i32> %3, %4
-; CHECK:  %6 = sub <4 x i32> %2, %5
-; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %5 = add nsw <4 x i32> %3, %4
+; CHECK:  %6 = sub nsw <4 x i32> %2, %5
+; CHECK:  %7 = add nsw <4 x i32> %2, %5
 ; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 
 ; Function Attrs: nounwind uwtable
diff --git a/test/Transforms/SLPVectorizer/X86/align.ll b/test/Transforms/SLPVectorizer/X86/align.ll
index f586573..ce80620 100644
--- a/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/test/Transforms/SLPVectorizer/X86/align.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Simple 3-pair chain with loads and stores
-; CHECK: test1
+; CHECK-LABEL: @test1
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
   %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
@@ -25,3 +25,31 @@ entry:
 ; CHECK: ret
   ret void
 }
+
+; Float has 4 byte abi alignment on x86_64. We must use the alignmnet of the
+; value being loaded/stored not the alignment of the pointer type.
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: align 8
+; CHECK: load <4 x float>{{.*}}, align 4
+; CHECK: store <4 x float>{{.*}}, align 4
+; CHECK: ret
+
+define void @test2(float * %a, float * %b) {
+entry:
+  %l0 = load float* %a
+  %a1 = getelementptr inbounds float* %a, i64 1
+  %l1 = load float* %a1
+  %a2 = getelementptr inbounds float* %a, i64 2
+  %l2 = load float* %a2
+  %a3 = getelementptr inbounds float* %a, i64 3
+  %l3 = load float* %a3
+  store float %l0, float* %b
+  %b1 = getelementptr inbounds float* %b, i64 1
+  store float %l1, float* %b1
+  %b2 = getelementptr inbounds float* %b, i64 2
+  store float %l2, float* %b2
+  %b3 = getelementptr inbounds float* %b, i64 3
+  store float %l3, float* %b3
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
new file mode 100644
index 0000000..dc99366
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.3.0"
+
+@a = common global double 0.000000e+00, align 8
+
+define i32 @fn1() {
+entry:
+  %init = load double* @a, align 8
+  br label %loop
+
+loop:
+  %phi = phi double [ %add2, %loop ], [ %init, %entry ]
+  %postadd1_phi = phi double [ %postadd1, %loop ], [ %init, %entry ]
+  %postadd2_phi = phi double [ %postadd2, %loop ], [ %init, %entry ]
+  %add1 = fadd double %postadd1_phi, undef
+  %add2 = fadd double %postadd2_phi, %phi
+  %mul2 = fmul double %add2, 0.000000e+00
+  %binaryop_B = fadd double %postadd1_phi, %mul2
+  %mul1 = fmul double %add1, 0.000000e+00
+  %tmp = fadd double %postadd2_phi, 0.000000e+00
+
+  ; tryToVectorize() starts with this binary instruction.
+  ; At the same time vectorization wraps around the loop, vectorizes
+  ; postadd1/2 and eventually binary_V and tmp. So binary_V itself is replaced
+  ; with a vector instruction.
+  ; The SLPVectorizer crashed because it tried to use binary_V
+  ; after vectorization to re-arrange instructions.
+  %binary_V = fadd double %mul1, %binaryop_B
+
+  %postadd1 = fadd double %binary_V, 0.000000e+00
+  %postadd2 = fadd double %tmp, 1.000000e+00
+  %tobool = fcmp une double %postadd1, 0.000000e+00
+  br i1 %tobool, label %exit, label %loop
+
+exit:
+  ret i32 1
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/test/Transforms/SLPVectorizer/X86/crash_gep.ll
new file mode 100644
index 0000000..dd4034c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i64* null, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @fn1() {
+entry:
+  %0 = load i64** @a, align 8
+  %add.ptr = getelementptr inbounds i64* %0, i64 1
+  %1 = ptrtoint i64* %add.ptr to i64
+  %arrayidx = getelementptr inbounds i64* %0, i64 2
+  store i64 %1, i64* %arrayidx, align 8
+  %2 = ptrtoint i64* %arrayidx to i64
+  store i64 %2, i64* %add.ptr, align 8
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
new file mode 100644
index 0000000..dddc1be
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.3.0"
+
+define void @_foo(double %p1, double %p2, double %p3) #0 {
+entry:
+  %tab1 = alloca [256 x i32], align 16
+  %tab2 = alloca [256 x i32], align 16
+  br label %bb1
+
+
+bb1:
+  %mul19 = fmul double %p1, 1.638400e+04
+  %mul20 = fmul double %p3, 1.638400e+04
+  %add = fadd double %mul20, 8.192000e+03
+  %mul21 = fmul double %p2, 1.638400e+04
+  ; The SLPVectorizer crashed when scheduling this block after it inserted an
+  ; insertelement instruction (during vectorizing the for.body block) at this position.
+  br label %for.body
+
+for.body:
+  %indvars.iv266 = phi i64 [ 0, %bb1 ], [ %indvars.iv.next267, %for.body ]
+  %t.0259 = phi double [ 0.000000e+00, %bb1 ], [ %add27, %for.body ]
+  %p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ]
+  %vecinit.i.i237 = insertelement <2 x double> undef, double %t.0259, i32 0
+  %x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2
+  %arrayidx = getelementptr inbounds [256 x i32]* %tab1, i64 0, i64 %indvars.iv266
+  store i32 %x13, i32* %arrayidx, align 4, !tbaa !4
+  %vecinit.i.i = insertelement <2 x double> undef, double %p3.addr.0258, i32 0
+  %x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2
+  %arrayidx26 = getelementptr inbounds [256 x i32]* %tab2, i64 0, i64 %indvars.iv266
+  store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4
+  %add27 = fadd double %mul19, %t.0259
+  %add28 = fadd double %mul21, %p3.addr.0258
+  %indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1
+  %exitcond = icmp eq i64 %indvars.iv.next267, 256
+  br i1 %exitcond, label %return, label %for.body
+
+return:
+  ret void
+}
+
+declare i32 @_xfn(<2 x double>) #4
+
+!3 = metadata !{metadata !"int", metadata !4, i64 0}
+!4 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index c7ec98a..9f1fb71 100644
--- a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
+; RUN: opt -basicaa -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
index fba3549..bac2c3c 100644
--- a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;CHECK: bitcast i32* %A to <4 x i32>*
 ;CHECK-NEXT: load <4 x i32>
 ;CHECK: phi <4 x i32>
-;CHECK-NEXT: mul <4 x i32>
+;CHECK-NEXT: mul nsw <4 x i32>
 ;CHECK-NOT: mul
 ;CHECK: phi <4 x i32>
 ;CHECK: bitcast i32* %A to <4 x i32>*
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index f4e68f2..1046087 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -23,11 +23,11 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 define i32 @depth(double* nocapture %A, i32 %m) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{double* %A}, i64 0, metadata !12), !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32 %m}, i64 0, metadata !13), !dbg !19
-  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !14), !dbg !21
-  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !15), !dbg !21
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !23
+  tail call void @llvm.dbg.value(metadata !{double* %A}, i64 0, metadata !12, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32 %m}, i64 0, metadata !13, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !14, metadata !{}), !dbg !21
+  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !15, metadata !{}), !dbg !21
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16, metadata !{}), !dbg !23
   %cmp8 = icmp sgt i32 %m, 0, !dbg !23
   br i1 %cmp8, label %for.body.lr.ph, label %for.end, !dbg !23
 
@@ -49,7 +49,7 @@ for.end:                                          ; preds = %for.body.lr.ph, %en
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -57,24 +57,24 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !32}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"file.c", metadata !"/Users/nadav"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"depth", metadata !"depth", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (double*, i32)* @depth, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00depth\00depth\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, i32 (double*, i32)* @depth, null, null, metadata !11} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !9, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
-!10 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!10 = metadata !{metadata !"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
-!12 = metadata !{i32 786689, metadata !4, metadata !"A", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [A] [line 1]
-!13 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 33554433, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 1]
-!14 = metadata !{i32 786688, metadata !4, metadata !"y0", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y0] [line 2]
-!15 = metadata !{i32 786688, metadata !4, metadata !"y1", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y1] [line 2]
-!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !5, i32 3, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3]
-!17 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!12 = metadata !{metadata !"0x101\00A\0016777217\000", metadata !4, metadata !5, metadata !9} ; [ DW_TAG_arg_variable ] [A] [line 1]
+!13 = metadata !{metadata !"0x101\00m\0033554433\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [m] [line 1]
+!14 = metadata !{metadata !"0x100\00y0\002\000", metadata !4, metadata !5, metadata !10} ; [ DW_TAG_auto_variable ] [y0] [line 2]
+!15 = metadata !{metadata !"0x100\00y1\002\000", metadata !4, metadata !5, metadata !10} ; [ DW_TAG_auto_variable ] [y1] [line 2]
+!16 = metadata !{metadata !"0x100\00i\003\000", metadata !17, metadata !5, metadata !8} ; [ DW_TAG_auto_variable ] [i] [line 3]
+!17 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
 !18 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
 !19 = metadata !{i32 1, i32 0, metadata !4, null}
 !20 = metadata !{double 0.000000e+00}
@@ -82,8 +82,8 @@ attributes #1 = { nounwind readnone }
 !22 = metadata !{double 1.000000e+00}
 !23 = metadata !{i32 3, i32 0, metadata !17, null}
 !24 = metadata !{i32 4, i32 0, metadata !25, null}
-!25 = metadata !{i32 786443, metadata !1, metadata !17, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!25 = metadata !{metadata !"0xb\003\000\001", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
 !29 = metadata !{i32 5, i32 0, metadata !25, null}
 !30 = metadata !{i32 7, i32 0, metadata !4, null}
-!31 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!31 = metadata !{i32 8, i32 0, metadata !4, null}
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
new file mode 100644
index 0000000..3628042
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.9.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global i64* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @fn1() {
+entry:
+  %0 = load i64** @a, align 8
+  %add.ptr = getelementptr inbounds i64* %0, i64 11
+  %1 = ptrtoint i64* %add.ptr to i64
+  store i64 %1, i64* %add.ptr, align 8
+  %add.ptr1 = getelementptr inbounds i64* %0, i64 56
+  %2 = ptrtoint i64* %add.ptr1 to i64
+  %arrayidx2 = getelementptr inbounds i64* %0, i64 12
+  store i64 %2, i64* %arrayidx2, align 8
+  ret i32 undef
+; CHECK-LABEL: @fn1(
+; CHECK: extractelement <2 x i64*>
+; CHECK: ret
+}
+
+
+declare float @llvm.powi.f32(float, i32)
+define void @fn2(i32* %a, i32* %b, float* %c) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %fp1 = sitofp i32 %add1 to float
+  %call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %fp2 = sitofp i32 %add2 to float
+  %call2 = tail call float @llvm.powi.f32(float %fp2,i32 %add1) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %fp3 = sitofp i32 %add3 to float
+  %call3 = tail call float @llvm.powi.f32(float %fp3,i32 %add1) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %fp4 = sitofp i32 %add4 to float
+  %call4 = tail call float @llvm.powi.f32(float %fp4,i32 %add1) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @fn2(
+; CHECK: extractelement <4 x i32>
+; CHECK: ret
+}
diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll
index 5074cea..78c58f1 100644
--- a/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -21,7 +21,7 @@ target triple = "i386-apple-macosx10.9.0"
 ; loop body:
 ;CHECK: phi
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 8f91951..1836047 100644
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -148,7 +148,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: long_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
@@ -250,7 +250,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: chain_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
@@ -317,7 +317,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: store_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
@@ -379,7 +379,7 @@ for.end:
 ; }
 
 ; STORE-LABEL: store_red_double
-; STORE: fmul <2 x double>
+; STORE: fmul fast <2 x double>
 ; STORE: extractelement <2 x double>
 ; STORE: extractelement <2 x double>
 
diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index 3115232..194a0fb 100644
--- a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -5,9 +5,11 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
 
-; We can't vectorize when the roots are used inside the tree.
+; Uses inside the tree must be scheduled after the corresponding tree bundle.
 ;CHECK-LABEL: @in_tree_user(
-;CHECK-NOT: load <2 x double>
+;CHECK: load <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: InTreeUser = fadd
 ;CHECK: ret
 define void @in_tree_user(double* nocapture %A, i32 %n) {
 entry:
@@ -22,7 +24,7 @@ for.body:                                         ; preds = %for.inc, %entry
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
-  %BadValue = fadd double %add, %add    ; <------------------ In tree user.
+  %InTreeUser = fadd double %add, %add    ; <------------------ In tree user.
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double* %A, i64 %2
   %3 = load double* %arrayidx6, align 8
@@ -43,6 +45,7 @@ for.inc:                                          ; preds = %for.body, %if.then
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.inc
+  store double %InTreeUser, double* %A, align 8   ; Avoid dead code elimination of the InTreeUser.
   ret void
 }
 
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 9eda29f..0221613 100644
--- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -35,6 +35,49 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
   ret <4 x float> %rd
 }
 
+declare void @llvm.assume(i1) nounwind
+
+; This entire tree is ephemeral, don't vectorize any of it.
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_eph(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  %q0 = extractelement <4 x float> %rd, i32 0
+  %q1 = extractelement <4 x float> %rd, i32 1
+  %q2 = extractelement <4 x float> %rd, i32 2
+  %q3 = extractelement <4 x float> %rd, i32 3
+  %q4 = fadd float %q0, %q1
+  %q5 = fadd float %q2, %q3
+  %q6 = fadd float %q4, %q5
+  %qi = fcmp olt float %q6, %q5
+  call void @llvm.assume(i1 %qi)
+  ret <4 x float> undef
+}
+
 ; Insert in an order different from the vector indices to make sure it
 ; doesn't matter
 define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index aef2479..bc12926 100644
--- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -5,10 +5,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) #0 {
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
index cab9994..63a77e4 100644
--- a/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK-LABEL: @foo(
 ;CHECK: insertelement <4 x i32>
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) {
diff --git a/test/Transforms/SLPVectorizer/X86/powof2div.ll b/test/Transforms/SLPVectorizer/X86/powof2div.ll
new file mode 100644
index 0000000..7aa1efd
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @powof2div(
+;CHECK: load <4 x i32>*
+;CHECK: add nsw <4 x i32>
+;CHECK: sdiv <4 x i32>
+define void @powof2div(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+entry:
+  %0 = load i32* %b, align 4
+  %1 = load i32* %c, align 4
+  %add = add nsw i32 %1, %0
+  %div = sdiv i32 %add, 2
+  store i32 %div, i32* %a, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 1
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 1
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %3, %2
+  %div6 = sdiv i32 %add5, 2
+  %arrayidx7 = getelementptr inbounds i32* %a, i64 1
+  store i32 %div6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32* %b, i64 2
+  %4 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  %5 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %5, %4
+  %div11 = sdiv i32 %add10, 2
+  %arrayidx12 = getelementptr inbounds i32* %a, i64 2
+  store i32 %div11, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32* %b, i64 3
+  %6 = load i32* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32* %c, i64 3
+  %7 = load i32* %arrayidx14, align 4
+  %add15 = add nsw i32 %7, %6
+  %div16 = sdiv i32 %add15, 2
+  %arrayidx17 = getelementptr inbounds i32* %a, i64 3
+  store i32 %div16, i32* %arrayidx17, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
new file mode 100644
index 0000000..3843ef7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -0,0 +1,350 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+
+; Check propagation of optional IR flags (PR20802). For a flag to
+; propagate from scalar instructions to their vector replacement,
+; *all* scalar instructions must have the flag.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; CHECK-LABEL: @exact(
+; CHECK: lshr exact <4 x i32>
+define void @exact(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = lshr exact i32 %load1, 1
+  %op2 = lshr exact i32 %load2, 1
+  %op3 = lshr exact i32 %load3, 1
+  %op4 = lshr exact i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @not_exact(
+; CHECK: lshr <4 x i32>
+define void @not_exact(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = lshr exact i32 %load1, 1
+  %op2 = lshr i32 %load2, 1
+  %op3 = lshr exact i32 %load3, 1
+  %op4 = lshr exact i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @nsw(
+; CHECK: add nsw <4 x i32>
+define void @nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = add nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = add nsw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @not_nsw(
+; CHECK: add <4 x i32>
+define void @not_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = add nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = add i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @nuw(
+; CHECK: add nuw <4 x i32>
+define void @nuw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nuw i32 %load1, 1
+  %op2 = add nuw i32 %load2, 1
+  %op3 = add nuw i32 %load3, 1
+  %op4 = add nuw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @not_nuw(
+; CHECK: add <4 x i32>
+define void @not_nuw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nuw i32 %load1, 1
+  %op2 = add i32 %load2, 1
+  %op3 = add i32 %load3, 1
+  %op4 = add nuw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @nnan(
+; CHECK: fadd nnan <4 x float>
+define void @nnan(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast nnan float %load1, 1.0
+  %op2 = fadd nnan ninf float %load2, 1.0
+  %op3 = fadd nsz nnan float %load3, 1.0
+  %op4 = fadd arcp nnan float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @not_nnan(
+; CHECK: fadd <4 x float>
+define void @not_nnan(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd nnan float %load1, 1.0
+  %op2 = fadd ninf float %load2, 1.0
+  %op3 = fadd nsz float %load3, 1.0
+  %op4 = fadd arcp float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @only_fast(
+; CHECK: fadd fast <4 x float>
+define void @only_fast(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast nnan float %load1, 1.0
+  %op2 = fadd fast nnan ninf float %load2, 1.0
+  %op3 = fadd fast nsz nnan float %load3, 1.0
+  %op4 = fadd arcp nnan fast float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @only_arcp(
+; CHECK: fadd arcp <4 x float>
+define void @only_arcp(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast float %load1, 1.0
+  %op2 = fadd fast float %load2, 1.0
+  %op3 = fadd fast float %load3, 1.0
+  %op4 = fadd arcp float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @addsub_all_nsw
+; CHECK: add nsw <4 x i32>
+; CHECK: sub nsw <4 x i32>
+define void @addsub_all_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub nsw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @addsub_some_nsw
+; CHECK: add nsw <4 x i32>
+; CHECK: sub <4 x i32>
+define void @addsub_some_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @addsub_no_nsw
+; CHECK: add <4 x i32>
+; CHECK: sub <4 x i32>
+define void @addsub_no_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
diff --git a/test/Transforms/SLPVectorizer/X86/return.ll b/test/Transforms/SLPVectorizer/X86/return.ll
new file mode 100644
index 0000000..1a81c23
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/return.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "x86_64--linux-gnu"
+
+@a = common global [4 x double] zeroinitializer, align 8
+@b = common global [4 x double] zeroinitializer, align 8
+
+; [4], b[4];
+; double foo() {
+;  double sum =0;
+;  sum = (a[0]+b[0]) + (a[1]+b[1]);
+;  return sum;
+; }
+
+; CHECK-LABEL: @return1
+; CHECK: %0 = load <2 x double>*
+; CHECK: %1 = load <2 x double>*
+; CHECK: %2 = fadd <2 x double>
+
+define double @return1() {
+entry:
+  %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
+  %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
+  %add0 = fadd double %a0, %b0
+  %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
+  %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
+  %add1 = fadd double %a1, %b1
+  %add2 = fadd double %add0, %add1
+  ret double %add2
+}
+
+; double hadd(double *x) {
+;   return ((x[0] + x[2]) + (x[1] + x[3]));
+; }
+
+; CHECK-LABEL: @return2
+; CHECK: %1 = load <2 x double>*
+; CHECK: %3 = load <2 x double>* %2
+; CHECK: %4 = fadd <2 x double> %1, %3
+
+define double @return2(double* nocapture readonly %x) {
+entry:
+  %x0 = load double* %x, align 4
+  %arrayidx1 = getelementptr inbounds double* %x, i32 2
+  %x2 = load double* %arrayidx1, align 4
+  %add3 = fadd double %x0, %x2
+  %arrayidx2 = getelementptr inbounds double* %x, i32 1
+  %x1 = load double* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds double* %x, i32 3
+  %x3 = load double* %arrayidx3, align 4
+  %add4 = fadd double %x1, %x3
+  %add5 = fadd double %add3, %add4
+  ret double %add5
+}
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
index 4626341..4b39d46 100644
--- a/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
 ;CHECK: SAXPY
-;CHECK: mul <4 x i32>
+;CHECK: mul nsw <4 x i32>
 ;CHECK: ret
 
 define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
diff --git a/test/Transforms/SLPVectorizer/X86/scheduling.ll b/test/Transforms/SLPVectorizer/X86/scheduling.ll
new file mode 100644
index 0000000..3b3bd80
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK-LABEL: @foo
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: %[[S1:.+]] = add nsw <4 x i32>
+;CHECK-DAG: store <4 x i32> %[[S1]]
+;CHECK-DAG: %[[A1:.+]] = add nsw i32
+;CHECK-DAG: %[[A2:.+]] = add nsw i32 %[[A1]]
+;CHECK-DAG: %[[A3:.+]] = add nsw i32 %[[A2]]
+;CHECK-DAG: %[[A4:.+]] = add nsw i32 %[[A3]]
+;CHECK: ret i32 %[[A4]] 
+
+define i32 @foo(i32* nocapture readonly %diff) #0 {
+entry:
+  %m2 = alloca [8 x [8 x i32]], align 16
+  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
+  %1 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32* %diff, i64 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = or i64 %1, 4
+  %arrayidx2 = getelementptr inbounds i32* %diff, i64 %3
+  %4 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %4, %2
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, i32* %arrayidx6, align 16
+  %add10 = add nsw i32 %add3, %a.088
+  %5 = or i64 %1, 1
+  %arrayidx13 = getelementptr inbounds i32* %diff, i64 %5
+  %6 = load i32* %arrayidx13, align 4
+  %7 = or i64 %1, 5
+  %arrayidx16 = getelementptr inbounds i32* %diff, i64 %7
+  %8 = load i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %8, %6
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, i32* %arrayidx20, align 4
+  %add24 = add nsw i32 %add10, %add17
+  %9 = or i64 %1, 2
+  %arrayidx27 = getelementptr inbounds i32* %diff, i64 %9
+  %10 = load i32* %arrayidx27, align 4
+  %11 = or i64 %1, 6
+  %arrayidx30 = getelementptr inbounds i32* %diff, i64 %11
+  %12 = load i32* %arrayidx30, align 4
+  %add31 = add nsw i32 %12, %10
+  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
+  store i32 %add31, i32* %arrayidx34, align 8
+  %add38 = add nsw i32 %add24, %add31
+  %13 = or i64 %1, 3
+  %arrayidx41 = getelementptr inbounds i32* %diff, i64 %13
+  %14 = load i32* %arrayidx41, align 4
+  %15 = or i64 %1, 7
+  %arrayidx44 = getelementptr inbounds i32* %diff, i64 %15
+  %16 = load i32* %arrayidx44, align 4
+  %add45 = add nsw i32 %16, %14
+  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
+  store i32 %add45, i32* %arrayidx48, align 4
+  %add52 = add nsw i32 %add38, %add45
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %arraydecay = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 0
+  call void @ff([8 x i32]* %arraydecay) #1
+  ret i32 %add52
+}
+
+declare void @ff([8 x i32]*) #2
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/unreachable.ll b/test/Transforms/SLPVectorizer/X86/unreachable.ll
new file mode 100644
index 0000000..8d60957
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+; Check if the SLPVectorizer does not crash when handling
+; unreachable blocks with unscheduleable instructions.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @foo(i32* nocapture %x) #0 {
+entry:
+  br label %bb2
+
+bb1:                                    ; an unreachable block
+  %t3 = getelementptr inbounds i32* %x, i64 4
+  %t4 = load i32* %t3, align 4
+  %t5 = getelementptr inbounds i32* %x, i64 5
+  %t6 = load i32* %t5, align 4
+  %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
+                                        ;    but legal in unreachable code
+  %t7 = getelementptr inbounds i32* %x, i64 6
+  %t8 = load i32* %t7, align 4
+  %t9 = getelementptr inbounds i32* %x, i64 7
+  %t10 = load i32* %t9, align 4
+  br label %bb2
+
+bb2:
+  %t1.0 = phi i32 [ %t4, %bb1 ], [ 2, %entry ]
+  %t2.0 = phi i32 [ %t6, %bb1 ], [ 2, %entry ]
+  %t3.0 = phi i32 [ %t8, %bb1 ], [ 2, %entry ]
+  %t4.0 = phi i32 [ %t10, %bb1 ], [ 2, %entry ]
+  store i32 %t1.0, i32* %x, align 4
+  %t12 = getelementptr inbounds i32* %x, i64 1
+  store i32 %t2.0, i32* %t12, align 4
+  %t13 = getelementptr inbounds i32* %x, i64 2
+  store i32 %t3.0, i32* %t13, align 4
+  %t14 = getelementptr inbounds i32* %x, i64 3
+  store i32 %t4.0, i32* %t14, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 8d82964..f287012 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -501,3 +501,102 @@ end:
 ; CHECK-NOT: load
 ; CHECK: ret float %[[phi]]
 }
+
+; Verifies we fixed PR20425. We should be able to promote all alloca's to
+; registers in this test.
+;
+; %0 = slice
+; %1 = slice
+; %2 = phi(%0, %1) // == slice
+define float @simplify_phi_nodes_that_equal_slice(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %merge
+
+else:
+  %1 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+merge:
+  %2 = phi float* [ %0, %then ], [ %1, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %3 = load float* %2, align 4
+  ret float %3
+}
+
+; A slightly complicated example for PR20425.
+;
+; %0 = slice
+; %1 = phi(%0) // == slice
+; %2 = slice
+; %3 = phi(%1, %2) // == slice
+define float @simplify_phi_nodes_that_equal_slice_2(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice_2(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %then2
+
+then2:
+  %1 = phi float* [ %0, %then ]
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+else:
+  %2 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 3.000000e+00, float* %2, align 4
+  br label %merge
+
+merge:
+  %3 = phi float* [ %1, %then2 ], [ %2, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %4 = load float* %3, align 4
+  ret float %4
+}
+
+%struct.S = type { i32 }
+
+; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI
+; which requires the rewriting of the speculated PHI to handle insertion
+; when the incoming pointer is itself from a PHI node. We would previously
+; insert a bitcast instruction *before* a PHI, producing an invalid module;
+; make sure we insert *after* the first non-PHI instruction.
+define void @PR20822() {
+; CHECK-LABEL: @PR20822(
+entry:
+  %f = alloca %struct.S, align 4
+; CHECK: %[[alloca:.*]] = alloca
+  br i1 undef, label %if.end, label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  br label %if.end
+
+if.end:                                           ; preds = %for.cond, %entry
+  %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ]
+; CHECK: phi i32
+; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
+  phi i32 [ undef, %entry ], [ undef, %for.cond ]
+  br i1 undef, label %if.then5, label %if.then2
+
+if.then2:                                         ; preds = %if.end
+  br label %if.then5
+
+if.then5:                                         ; preds = %if.then2, %if.end
+  %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ]
+; CHECK: phi {{.*}} %[[cast]]
+  store %struct.S undef, %struct.S* %f1, align 4
+  ret void
+}
diff --git a/test/Transforms/SROA/slice-width.ll b/test/Transforms/SROA/slice-width.ll
index 179780b..ff66dcc 100644
--- a/test/Transforms/SROA/slice-width.ll
+++ b/test/Transforms/SROA/slice-width.ll
@@ -1,7 +1,8 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @no_split_on_non_byte_width(i32) {
 ; This tests that allocas are not split into slices that are not byte width multiple
@@ -23,3 +24,83 @@ load_i1:
   %t1 = load i1* %p1
   ret void
 }
+
+; PR18726: Check that we use memcpy and memset to fill out padding when we have
+; a slice with a simple single type whose store size is smaller than the slice
+; size.
+
+%union.Foo = type { x86_fp80, i64, i64 }
+
+@foo_copy_source = external constant %union.Foo
+@i64_sink = global i64 0
+
+define void @memcpy_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Copy from a global.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i32 16, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo* %x, i32 0, i32 1
+  %elt = load i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memcpy_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 1)
+; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 2)
+
+define void @memset_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Set to all ones.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i32 16, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo* %x, i32 0, i32 1
+  %elt = load i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memset_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memset.p0i8.i32(i8* %{{.*}}, i8 -1, i32 16, i32 16, i1 false)
+; CHECK: store i64 -1, i64* @i64_sink
+
+%S.vec3float = type { float, float, float }
+%U.vec3float = type { <4 x float> }
+
+declare i32 @memcpy_vec3float_helper(%S.vec3float*)
+
+define i32 @memcpy_vec3float_widening(%S.vec3float* %x) {
+; CHECK-LABEL: @memcpy_vec3float_widening(
+; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
+; vector store, hence accidentally putting gibberish onto the stack.
+entry:
+  ; Create a temporary variable %tmp1 and copy %x[0] into it
+  %tmp1 = alloca %S.vec3float, align 4
+  %0 = bitcast %S.vec3float* %tmp1 to i8*
+  %1 = bitcast %S.vec3float* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false)
+
+  ; The following block does nothing; but appears to confuse SROA
+  %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
+  %unused2 = getelementptr inbounds %U.vec3float* %unused1, i32 0, i32 0
+  %unused3 = load <4 x float>* %unused2, align 1
+
+  ; Create a second temporary and copy %tmp1 into it
+  %tmp2 = alloca %S.vec3float, align 4
+  %2 = bitcast %S.vec3float* %tmp2 to i8*
+  %3 = bitcast %S.vec3float* %tmp1 to i8*
+; CHECK: alloca
+; CHECK-NOT: store <4 x float>
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false)
+
+  %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
+  ret i32 %result
+; CHECK: ret i32 %result
+}
diff --git a/test/Transforms/SROA/vector-lifetime-intrinsic.ll b/test/Transforms/SROA/vector-lifetime-intrinsic.ll
new file mode 100644
index 0000000..30c93b0
--- /dev/null
+++ b/test/Transforms/SROA/vector-lifetime-intrinsic.ll
@@ -0,0 +1,31 @@
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64"
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; CHECK: @wombat
+; CHECK-NOT: alloca
+; CHECK: ret void
+define void @wombat(<4 x float> %arg1) {
+bb:
+  %tmp = alloca <4 x float>, align 16
+  %tmp8 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %tmp8)
+  store <4 x float> %arg1, <4 x float>* %tmp, align 16
+  %tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
+  %tmp18 = load <3 x float>* %tmp17
+  %tmp20 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.end(i64 16, i8* %tmp20)
+  call void @wombat3(<3 x float> %tmp18)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @wombat3(<3 x float>) #0
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 9c9f6a1..830a22a 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -468,3 +468,139 @@ entry:
 ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
 ; CHECK: ret i32 %[[insert]]
 }
+
+define i32 @test7(<2 x i32> %x, <2 x i32> %y) {
+; Test that we can promote to vectors when the alloca doesn't mention any vector types.
+; CHECK-LABEL: @test7(
+entry:
+	%a = alloca [2 x i64]
+  %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]*
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 0
+  store <2 x i32> %x, <2 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1
+  store <2 x i32> %y, <2 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.tmp1 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1
+  %tmp1 = load i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1
+  %tmp2 = load i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0
+  %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test8(<2 x i32> %x) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single store with a vector type.
+; CHECK-LABEL: @test8(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <2 x i32> %x, <2 x i32>* %a.vec
+; CHECK-NOT: store
+
+  %tmp1 = load i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  %tmp2 = load i32* %a.tmp2
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 0
+; CHECK-NEXT: extractelement <2 x i32> %x, i32 1
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  ret i32 %tmp4
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define <2 x i32> @test9(i32 %x, i32 %y) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single load with a vector type.
+; CHECK-LABEL: @test9(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store i32 %x, i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>* %a.vec
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x i32> @test10(<4 x i16> %x, i32 %y) {
+; If there are multiple different vector types used, we should select the one
+; with the widest elements.
+; CHECK-LABEL: @test10(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x i32>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32>
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x float> @test11(<4 x i16> %x, i32 %y) {
+; If there are multiple different element types for different vector types,
+; pick the integer types. This isn't really important, but seems like the best
+; heuristic for making a deterministic decision.
+; CHECK-LABEL: @test11(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x float>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast i32 %y to <2 x i16>
+; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
+; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
+
+  %result = load <2 x float>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x float> %result
+; CHECK-NEXT: ret <2 x float> %[[V4]]
+}
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
new file mode 100644
index 0000000..14d7fd5
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.prof b/test/Transforms/SampleProfile/Inputs/fnptr.prof
new file mode 100644
index 0000000..6a3b4e2
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.prof
@@ -0,0 +1,12 @@
+_Z3fooi:7711:610
+1: 610
+_Z3bari:20301:1437
+1: 1437
+main:184019:0
+4: 534
+6: 2080
+9: 2064 _Z3bari:1471 _Z3fooi:631
+5.1: 1075
+5: 1075
+7: 534
+4.2: 534
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 65f1f17..e646609 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -32,8 +32,8 @@ define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
 ; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
 
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13), !dbg !27
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14), !dbg !27
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13, metadata !{}), !dbg !27
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14, metadata !{}), !dbg !27
   %cmp = icmp slt i32 %argc, 2, !dbg !28
   br i1 %cmp, label %return, label %if.end, !dbg !28
 ; CHECK: edge entry -> return probability is 1 / 2 = 50%
@@ -43,7 +43,7 @@ if.end:                                           ; preds = %entry
   %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !30
   %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
   %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17, metadata !{}), !dbg !30
   %cmp1 = icmp sgt i32 %call, 100, !dbg !35
   br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
 ; CHECK: edge if.end -> for.body probability is 1 / 2 = 50%
@@ -55,14 +55,14 @@ for.body:                                         ; preds = %if.end, %for.body
   %add = fadd double %s.015, 3.049000e+00, !dbg !36
   %conv = sitofp i32 %u.016 to double, !dbg !36
   %add4 = fadd double %add, %conv, !dbg !36
-  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18), !dbg !36
+  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18, metadata !{}), !dbg !36
   %div = fdiv double 3.940000e+00, %s.015, !dbg !37
   %mul = fmul double %div, 3.200000e-01, !dbg !37
   %add5 = fadd double %add4, %mul, !dbg !37
   %sub = fsub double %add4, %add5, !dbg !37
-  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18), !dbg !37
+  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18, metadata !{}), !dbg !37
   %inc = add nsw i32 %u.016, 1, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21, metadata !{}), !dbg !38
   %exitcond = icmp eq i32 %inc, %call, !dbg !38
   br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
 ; CHECK: edge for.body -> if.end6 probability is 1 / 10227 = 0.00977804
@@ -86,7 +86,7 @@ declare i32 @atoi(i8* nocapture) #1
 declare i32 @printf(i8* nocapture readonly, ...) #2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #3
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -98,37 +98,37 @@ attributes #4 = { nounwind readonly }
 !llvm.module.flags = !{!25, !42}
 !llvm.ident = !{!26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.4 (trunk 192896) (llvm/trunk 192895)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"branch.cc", metadata !"."}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !12, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00main\00main\00\004\000\001\000\006\00256\001\004", metadata !1, metadata !5, metadata !6, null, i32 (i32, i8**)* @main, null, null, metadata !12} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!8 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{metadata !"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !17, metadata !18, metadata !21, metadata !23}
-!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 4]
-!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 4]
-!15 = metadata !{i32 786688, metadata !4, metadata !"result", metadata !5, i32 7, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 7]
-!16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
-!18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
-!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
-!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
-!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!13 = metadata !{metadata !"0x101\00argc\0016777220\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [argc] [line 4]
+!14 = metadata !{metadata !"0x101\00argv\0033554436\000", metadata !4, metadata !5, metadata !9} ; [ DW_TAG_arg_variable ] [argv] [line 4]
+!15 = metadata !{metadata !"0x100\00result\007\000", metadata !4, metadata !5, metadata !16} ; [ DW_TAG_auto_variable ] [result] [line 7]
+!16 = metadata !{metadata !"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!17 = metadata !{metadata !"0x100\00limit\008\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_auto_variable ] [limit] [line 8]
+!18 = metadata !{metadata !"0x100\00s\0010\000", metadata !19, metadata !5, metadata !16} ; [ DW_TAG_auto_variable ] [s] [line 10]
+!19 = metadata !{metadata !"0xb\009\000\000", metadata !1, metadata !20} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = metadata !{metadata !"0xb\009\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!21 = metadata !{metadata !"0x100\00u\0011\000", metadata !22, metadata !5, metadata !8} ; [ DW_TAG_auto_variable ] [u] [line 11]
+!22 = metadata !{metadata !"0xb\0011\000\000", metadata !1, metadata !19} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!23 = metadata !{metadata !"0x100\00x\0012\000", metadata !24, metadata !5, metadata !16} ; [ DW_TAG_auto_variable ] [x] [line 12]
+!24 = metadata !{metadata !"0xb\0011\000\000", metadata !1, metadata !22} ; [ DW_TAG_lexical_block ] [./branch.cc]
 !25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
 !26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
 !27 = metadata !{i32 4, i32 0, metadata !4, null}
 !28 = metadata !{i32 5, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!29 = metadata !{metadata !"0xb\005\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!30 = metadata !{i32 8, i32 0, metadata !4, null}
 !31 = metadata !{metadata !32, metadata !32, i64 0}
 !32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
 !33 = metadata !{metadata !"omnipotent char", metadata !34, i64 0}
@@ -140,4 +140,4 @@ attributes #4 = { nounwind readonly }
 !39 = metadata !{i32 20, i32 0, metadata !4, null}
 !40 = metadata !{i32 21, i32 0, metadata !4, null}
 !41 = metadata !{i32 22, i32 0, metadata !4, null}
-!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 381be87..c39472b 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -15,7 +15,12 @@
 ;   printf("sum is %d\n", s);
 ;   return 0;
 ; }
-
+;
+; Note that this test is missing the llvm.dbg.cu annotation. This emulates
+; the effect of the user having only used -fprofile-sample-use without
+; -gmlt when invoking the driver. In those cases, we need to track source
+; location information but we do not have to generate debug info in the
+; final binary.
 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
 
 ; Function Attrs: nounwind uwtable
@@ -84,33 +89,32 @@ while.end:                                        ; preds = %while.cond
 
 declare i32 @printf(i8*, ...) #2
 
-!llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5 \000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"calls.cc", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @_Z3sumii, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [sum]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./calls.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!4 = metadata !{metadata !"0x2e\00sum\00sum\00\003\000\001\000\006\00256\000\003", metadata !1, metadata !5, metadata !6, null, i32 (i32, i32)* @_Z3sumii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 3] [def] [sum]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [./calls.cc]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", metadata !1, metadata !5, metadata !6, null, i32 ()* @main, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
 !8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !10 = metadata !{metadata !"clang version 3.5 "}
 !11 = metadata !{i32 4, i32 0, metadata !4, null}
-!12 = metadata !{i32 8, i32 0, metadata !7, null} ; [ DW_TAG_imported_declaration ]
+!12 = metadata !{i32 8, i32 0, metadata !7, null}
 !13 = metadata !{i32 9, i32 0, metadata !7, null}
 !14 = metadata !{i32 9, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !7, i32 9, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!15 = metadata !{metadata !"0xb\001", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ] [./calls.cc]
 !16 = metadata !{i32 10, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, metadata !7, i32 10, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!17 = metadata !{metadata !"0xb\0010\000\000", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ] [./calls.cc]
 !18 = metadata !{i32 10, i32 0, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!19 = metadata !{metadata !"0xb\001", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
 !20 = metadata !{i32 10, i32 0, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!21 = metadata !{metadata !"0xb\002", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
 !22 = metadata !{i32 10, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 3, i32 4} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!23 = metadata !{metadata !"0xb\003", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
 !24 = metadata !{i32 11, i32 0, metadata !7, null}
 !25 = metadata !{i32 12, i32 0, metadata !7, null}
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 0f773a5..73c73d1 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -66,25 +66,25 @@ while.end:                                        ; preds = %while.cond
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [discriminator.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.5 \000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [discriminator.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"discriminator.c", metadata !"."}
 !2 = metadata !{}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [discriminator.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", metadata !1, metadata !5, metadata !6, null, i32 (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [discriminator.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !9 = metadata !{metadata !"clang version 3.5 "}
 !10 = metadata !{i32 2, i32 0, metadata !4, null}
 !11 = metadata !{i32 3, i32 0, metadata !4, null}
 !12 = metadata !{i32 3, i32 0, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!13 = metadata !{metadata !"0xb\001", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [discriminator.c]
 !14 = metadata !{i32 4, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [discriminator.c]
-!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!15 = metadata !{metadata !"0xb\004\000\001", metadata !1, metadata !16} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!16 = metadata !{metadata !"0xb\003\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [discriminator.c]
 !17 = metadata !{i32 4, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 0, i32 1, i32 3} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!18 = metadata !{metadata !"0xb\001", metadata !1, metadata !15} ; [ DW_TAG_lexical_block ] [discriminator.c]
 !19 = metadata !{i32 5, i32 0, metadata !16, null}
 !20 = metadata !{i32 6, i32 0, metadata !16, null}
 !21 = metadata !{i32 7, i32 0, metadata !4, null}
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
new file mode 100644
index 0000000..f78123c
--- /dev/null
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -0,0 +1,155 @@
+; The two profiles used in this test are the same but encoded in different
+; formats. This checks that we produce the same profile annotations regardless
+; of the profile format.
+;
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+
+; CHECK:   edge for.body3 -> if.then probability is 534 / 2598 = 20.5543%
+; CHECK:   edge for.body3 -> if.else probability is 2064 / 2598 = 79.4457%
+; CHECK:   edge for.inc -> for.inc12 probability is 1052 / 2598 = 40.4927%
+; CHECK:   edge for.inc -> for.body3 probability is 1546 / 2598 = 59.5073%
+; CHECK:   edge for.inc12 -> for.end14 probability is 518 / 1052 = 49.2395%
+; CHECK:   edge for.inc12 -> for.cond1.preheader probability is 534 / 1052 = 50.7605%
+
+; Original C++ test case.
+;
+; #include <stdlib.h>
+; #include <math.h>
+; #include <stdio.h>
+;
+; #define N 10000
+; #define M 6000
+;
+; double foo(int x) {
+;   return x * sin((double)x);
+; }
+;
+; double bar(int x) {
+;   return x - cos((double)x);
+; }
+;
+; int main() {
+;   double (*fptr)(int);
+;   double S = 0;
+;   for (int i = 0; i < N; i++)
+;     for (int j = 0; j < M; j++) {
+;       fptr = (rand() % 100 < 30) ? foo : bar;
+;       if (rand() % 100 < 10)
+;         S += (*fptr)(i + j * 300);
+;       else
+;         S += (*fptr)(i - j / 840);
+;     }
+;   printf("S = %lf\n", S);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
+
+define double @_Z3fooi(i32 %x) #0 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !2
+  %call = tail call double @sin(double %conv) #3, !dbg !8
+  %mul = fmul double %conv, %call, !dbg !8
+  ret double %mul, !dbg !8
+}
+
+declare double @sin(double) #1
+
+define double @_Z3bari(i32 %x) #0 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !9
+  %call = tail call double @cos(double %conv) #3, !dbg !11
+  %sub = fsub double %conv, %call, !dbg !11
+  ret double %sub, !dbg !11
+}
+
+declare double @cos(double) #1
+
+define i32 @main() #2 {
+entry:
+  br label %for.cond1.preheader, !dbg !12
+
+for.cond1.preheader:                              ; preds = %for.inc12, %entry
+  %i.025 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ]
+  %S.024 = phi double [ 0.000000e+00, %entry ], [ %S.2.lcssa, %for.inc12 ]
+  br label %for.body3, !dbg !14
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %j.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.inc ]
+  %S.122 = phi double [ %S.024, %for.cond1.preheader ], [ %S.2, %for.inc ]
+  %call = tail call i32 @rand() #3, !dbg !15
+  %rem = srem i32 %call, 100, !dbg !15
+  %cmp4 = icmp slt i32 %rem, 30, !dbg !15
+  %_Z3fooi._Z3bari = select i1 %cmp4, double (i32)* @_Z3fooi, double (i32)* @_Z3bari, !dbg !15
+  %call5 = tail call i32 @rand() #3, !dbg !16
+  %rem6 = srem i32 %call5, 100, !dbg !16
+  %cmp7 = icmp slt i32 %rem6, 10, !dbg !16
+  br i1 %cmp7, label %if.then, label %if.else, !dbg !16, !prof !17
+
+if.then:                                          ; preds = %for.body3
+  %mul = mul nsw i32 %j.023, 300, !dbg !18
+  %add = add nsw i32 %mul, %i.025, !dbg !18
+  %call8 = tail call double %_Z3fooi._Z3bari(i32 %add), !dbg !18
+  br label %for.inc, !dbg !18
+
+if.else:                                          ; preds = %for.body3
+  %div = sdiv i32 %j.023, 840, !dbg !19
+  %sub = sub nsw i32 %i.025, %div, !dbg !19
+  %call10 = tail call double %_Z3fooi._Z3bari(i32 %sub), !dbg !19
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %call8.pn = phi double [ %call8, %if.then ], [ %call10, %if.else ]
+  %S.2 = fadd double %S.122, %call8.pn, !dbg !18
+  %inc = add nsw i32 %j.023, 1, !dbg !20
+  %exitcond = icmp eq i32 %j.023, 5999, !dbg !14
+  br i1 %exitcond, label %for.inc12, label %for.body3, !dbg !14, !prof !21
+
+for.inc12:                                        ; preds = %for.inc
+  %S.2.lcssa = phi double [ %S.2, %for.inc ]
+  %inc13 = add nsw i32 %i.025, 1, !dbg !22
+  %exitcond26 = icmp eq i32 %i.025, 9999, !dbg !12
+  br i1 %exitcond26, label %for.end14, label %for.cond1.preheader, !dbg !12, !prof !23
+
+for.end14:                                        ; preds = %for.inc12
+  %S.2.lcssa.lcssa = phi double [ %S.2.lcssa, %for.inc12 ]
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+; Function Attrs: nounwind
+declare i32 @rand() #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
+!1 = metadata !{metadata !"clang version 3.6.0 "}
+!2 = metadata !{i32 9, i32 3, metadata !3, null}
+!3 = metadata !{metadata !"0x2e\00foo\00foo\00\008\000\001\000\000\00256\001\008", metadata !4, metadata !5, metadata !6, null, double (i32)* @_Z3fooi, null, null, metadata !7} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
+!4 = metadata !{metadata !"fnptr.cc", metadata !"."}
+!5 = metadata !{metadata !"0x29", metadata !4}    ; [ DW_TAG_file_type ] [./fnptr.cc]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", null, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{}
+!8 = metadata !{i32 9, i32 14, metadata !3, null}
+!9 = metadata !{i32 13, i32 3, metadata !10, null}
+!10 = metadata !{metadata !"0x2e\00bar\00bar\00\0012\000\001\000\000\00256\001\0012", metadata !4, metadata !5, metadata !6, null, double (i32)* @_Z3bari, null, null, metadata !7} ; [ DW_TAG_subprogram ] [line 12] [def] [bar]
+!11 = metadata !{i32 13, i32 14, metadata !10, null}
+!12 = metadata !{i32 19, i32 3, metadata !13, null}
+!13 = metadata !{metadata !"0x2e\00main\00main\00\0016\000\001\000\000\00256\001\0016", metadata !4, metadata !5, metadata !6, null, i32 ()* @main, null, null, metadata !7} ; [ DW_TAG_subprogram ] [line 16] [def] [main]
+!14 = metadata !{i32 20, i32 5, metadata !13, null}
+!15 = metadata !{i32 21, i32 15, metadata !13, null}
+!16 = metadata !{i32 22, i32 11, metadata !13, null}
+!17 = metadata !{metadata !"branch_weights", i32 534, i32 2064}
+!18 = metadata !{i32 23, i32 14, metadata !13, null}
+!19 = metadata !{i32 25, i32 14, metadata !13, null}
+!20 = metadata !{i32 20, i32 28, metadata !13, null}
+!21 = metadata !{metadata !"branch_weights", i32 0, i32 1075}
+!22 = metadata !{i32 19, i32 26, metadata !13, null}
+!23 = metadata !{metadata !"branch_weights", i32 0, i32 534}
+!24 = metadata !{i32 27, i32 3, metadata !13, null}
+!25 = metadata !{i32 28, i32 3, metadata !13, null}
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index 939361b..9ee8ec5 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -198,39 +198,39 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [propagate.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{metadata !"0x11\004\00clang version 3.5 \000\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [propagate.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"propagate.cc", metadata !"."}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i64 (i32, i32, i64)* @_Z3fooiil, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [propagate.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 24, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [main]
+!4 = metadata !{metadata !"0x2e\00foo\00foo\00\003\000\001\000\006\00256\000\003", metadata !1, metadata !5, metadata !6, null, i64 (i32, i32, i64)* @_Z3fooiil, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [propagate.cc]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !"0x2e\00main\00main\00\0024\000\001\000\006\00256\000\0024", metadata !1, metadata !5, metadata !6, null, i32 ()* @main, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 24] [def] [main]
 !8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
 !10 = metadata !{metadata !"clang version 3.5 "}
 !11 = metadata !{i32 4, i32 0, metadata !12, null}
-!12 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!12 = metadata !{metadata !"0xb\004\000\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !13 = metadata !{i32 5, i32 0, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !1, metadata !12, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!14 = metadata !{metadata !"0xb\004\000\000", metadata !1, metadata !12} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !15 = metadata !{i32 7, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 7, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!17 = metadata !{i32 786443, metadata !1, metadata !12, i32 6, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!18 = metadata !{i32 8, i32 0, metadata !19, null} ; [ DW_TAG_imported_declaration ]
-!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 8, i32 0, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!20 = metadata !{i32 786443, metadata !1, metadata !16, i32 7, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!16 = metadata !{metadata !"0xb\007\000\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!17 = metadata !{metadata !"0xb\006\000\000", metadata !1, metadata !12} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!18 = metadata !{i32 8, i32 0, metadata !19, null}
+!19 = metadata !{metadata !"0xb\008\000\000", metadata !1, metadata !20} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!20 = metadata !{metadata !"0xb\007\000\000", metadata !1, metadata !16} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !21 = metadata !{i32 9, i32 0, metadata !19, null}
 !22 = metadata !{i32 10, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !20, i32 10, i32 0, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!23 = metadata !{metadata !"0xb\0010\000\000", metadata !1, metadata !20} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !24 = metadata !{i32 11, i32 0, metadata !25, null}
-!25 = metadata !{i32 786443, metadata !1, metadata !23, i32 10, i32 0, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!25 = metadata !{metadata !"0xb\0010\000\000", metadata !1, metadata !23} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !26 = metadata !{i32 12, i32 0, metadata !25, null}
 !27 = metadata !{i32 13, i32 0, metadata !25, null}
 !28 = metadata !{i32 14, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !1, metadata !30, i32 14, i32 0, i32 0, i32 9} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!30 = metadata !{i32 786443, metadata !1, metadata !23, i32 13, i32 0, i32 0, i32 8} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!29 = metadata !{metadata !"0xb\0014\000\000", metadata !1, metadata !30} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!30 = metadata !{metadata !"0xb\0013\000\000", metadata !1, metadata !23} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !31 = metadata !{i32 15, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !1, metadata !29, i32 14, i32 0, i32 0, i32 10} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!32 = metadata !{metadata !"0xb\0014\000\000", metadata !1, metadata !29} ; [ DW_TAG_lexical_block ] [propagate.cc]
 !33 = metadata !{i32 16, i32 0, metadata !32, null}
 !34 = metadata !{i32 17, i32 0, metadata !32, null}
 !35 = metadata !{i32 19, i32 0, metadata !20, null}
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index 53c65f4..ed38a17 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -1,4 +1,4 @@
-; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
@@ -11,8 +11,8 @@ define void @empty() {
 entry:
   ret void
 }
-; NO-DEBUG: error: No debug information found in function empty
-; MISSING-FILE: error: missing.prof:
+; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
+; MISSING-FILE: missing.prof: Could not open profile:
 ; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
 ; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
 ; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 71bf22a..eb660d2 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -17,10 +17,10 @@ entry:
   %b.addr = alloca i32, align 4
   %c = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !6), !dbg !7
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !6, metadata !{}), !dbg !7
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !8), !dbg !9
-  call void @llvm.dbg.declare(metadata !{i32* %c}, metadata !10), !dbg !12
+  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !8, metadata !{}), !dbg !9
+  call void @llvm.dbg.declare(metadata !{i32* %c}, metadata !10, metadata !{}), !dbg !12
   %tmp = load i32* %a.addr, align 4, !dbg !13
   store i32 %tmp, i32* %c, align 4, !dbg !13
   %tmp1 = load i32* %a.addr, align 4, !dbg !14
@@ -37,23 +37,23 @@ entry:
   ret i32 %add7, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, metadata !18, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.0 (trunk 131941)\000\00\000\00\000", metadata !18, metadata !19, metadata !19, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"0x2e\00f\00f\00\001\000\001\000\006\00256\000\001", metadata !18, metadata !2, metadata !3, null, i32 (i32, i32)* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!2 = metadata !{metadata !"0x29", metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !18, metadata !2, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !0} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x101\00a\0016777217\000", metadata !1, metadata !2, metadata !5} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 1, i32 11, metadata !1, null}
-!8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{metadata !"0x101\00b\0033554433\000", metadata !1, metadata !2, metadata !5} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 18, metadata !1, null}
-!10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !18, metadata !1, i32 1, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{metadata !"0x100\00c\002\000", metadata !11, metadata !2, metadata !5} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{metadata !"0xb\001\0021\000", metadata !18, metadata !1} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 9, metadata !11, null}
 !13 = metadata !{i32 2, i32 14, metadata !11, null}
 !14 = metadata !{i32 3, i32 5, metadata !11, null}
@@ -62,4 +62,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
 !19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 546e89d..ee7182b 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -16,9 +16,9 @@ define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x
 ; CHECK: %b.i1 = getelementptr i32* %b.i0, i32 1
 ; CHECK: %b.i2 = getelementptr i32* %b.i0, i32 2
 ; CHECK: %b.i3 = getelementptr i32* %b.i0, i32 3
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
 ; CHECK: %bval.i0 = load i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
 ; CHECK: %bval.i1 = load i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
 ; CHECK: %bval.i2 = load i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
@@ -37,9 +37,9 @@ define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x
 ; CHECK: store i32 %add.i3, i32* %a.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
 ; CHECK: ret void
 entry:
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !15), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !16), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !17), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !15, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !16, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !17, metadata !{}), !dbg !20
   %bval = load <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
   %cval = load <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
   %add = add <4 x i32> %bval, %cval, !dbg !21
@@ -48,7 +48,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -57,24 +57,24 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!18, !26}
 !llvm.ident = !{!19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
+!0 = metadata !{metadata !"0x11\0012\00clang version 3.4 (trunk 194134) (llvm/trunk 194126)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"/tmp/add.c", metadata !"/home/richards/llvm/build"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, metadata !14, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !"0x2e\00f1\00f1\00\003\000\001\000\006\00256\001\004", metadata !1, metadata !5, metadata !6, null, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, metadata !14} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
+!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
-!9 = metadata !{i32 786454, metadata !1, null, metadata !"V4SI", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
+!9 = metadata !{metadata !"0x16\00V4SI\001\000\000\000\000", metadata !1, null, metadata !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
+!10 = metadata !{metadata !"0x1\00\000\00128\00128\000\002048", null, null, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!11 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!13 = metadata !{metadata !"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
 !14 = metadata !{metadata !15, metadata !16, metadata !17}
-!15 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!16 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554435, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 3]
-!17 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331651, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 3]
+!15 = metadata !{metadata !"0x101\00a\0016777219\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = metadata !{metadata !"0x101\00b\0033554435\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [b] [line 3]
+!17 = metadata !{metadata !"0x101\00c\0050331651\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [c] [line 3]
 !18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
 !19 = metadata !{metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
 !20 = metadata !{i32 3, i32 0, metadata !4, null}
@@ -83,4 +83,4 @@ attributes #1 = { nounwind readnone }
 !23 = metadata !{metadata !"omnipotent char", metadata !24, i64 0}
 !24 = metadata !{metadata !"Simple C/C++ TBAA"}
 !25 = metadata !{i32 6, i32 0, metadata !4, null}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index c07440c..d054a3b 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -45,7 +45,7 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
   ret void
 }
 ; PTX-LABEL: sum_of_array(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
@@ -88,7 +88,7 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
   ret void
 }
 ; PTX-LABEL: sum_of_array2(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
@@ -99,8 +99,17 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
 
-; Similar to @sum_of_array3, but extends array indices using zext instead of
-; sext. e.g., array[zext(x + 1)][zext(y + 1)].
+
+; This function loads
+;   array[zext(x)][zext(y)]
+;   array[zext(x)][zext(y +nuw 1)]
+;   array[zext(x +nuw 1)][zext(y)]
+;   array[zext(x +nuw 1)][zext(y +nuw 1)].
+;
+; This function is similar to @sum_of_array, but it
+; 1) extends array indices using zext instead of sext;
+; 2) annotates the addition with "nuw"; otherwise, zext(x + 1) => zext(x) + 1
+;    may be invalid.
 define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
 .preheader:
   %0 = zext i32 %y to i64
@@ -109,13 +118,13 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
   %3 = addrspacecast float addrspace(3)* %2 to float*
   %4 = load float* %3, align 4
   %5 = fadd float %4, 0.000000e+00
-  %6 = add i32 %y, 1
+  %6 = add nuw i32 %y, 1
   %7 = zext i32 %6 to i64
   %8 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
   %9 = addrspacecast float addrspace(3)* %8 to float*
   %10 = load float* %9, align 4
   %11 = fadd float %5, %10
-  %12 = add i32 %x, 1
+  %12 = add nuw i32 %x, 1
   %13 = zext i32 %12 to i64
   %14 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
   %15 = addrspacecast float addrspace(3)* %14 to float*
@@ -129,7 +138,7 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
   ret void
 }
 ; PTX-LABEL: sum_of_array3(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
@@ -139,3 +148,49 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; This function loads
+;   array[zext(x)][zext(y)]
+;   array[zext(x)][zext(y)]
+;   array[zext(x) + 1][zext(y) + 1]
+;   array[zext(x) + 1][zext(y) + 1].
+;
+; We expect the generated code to reuse the computation of
+; &array[zext(x)][zext(y)]. See the expected IR and PTX for details.
+define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = zext i32 %y to i64
+  %1 = zext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i64 %0, 1
+  %7 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
+  %8 = addrspacecast float addrspace(3)* %7 to float*
+  %9 = load float* %8, align 4
+  %10 = fadd float %5, %9
+  %11 = add i64 %1, 1
+  %12 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
+  %13 = addrspacecast float addrspace(3)* %12 to float*
+  %14 = load float* %13, align 4
+  %15 = fadd float %10, %14
+  %16 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
+  %17 = addrspacecast float addrspace(3)* %16 to float*
+  %18 = load float* %17, align 4
+  %19 = fadd float %15, %18
+  store float %19, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array4(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array4(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index ed40c7e..ea0d3f5 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -235,27 +235,45 @@ entry:
 ; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array
 ; CHECK-NOT: getelementptr
 
-; if zext(a + b) <= max signed value of typeof(a + b), then we can prove
-; a + b >= 0 and zext(a + b) == sext(a + b). If we can prove further a or b is
-; non-negative, we have zext(a + b) == sext(a) + sext(b).
-define float* @inbounds_zext_add(i32 %i, i4 %j) {
+; The code that rebuilds an OR expression used to be buggy, and failed on this
+; test.
+define float* @shl_add_or(i64 %a, float* %ptr) {
+; CHECK-LABEL: @shl_add_or(
 entry:
-  %0 = add i32 %i, 1
-  %1 = zext i32 %0 to i64
-  ; Because zext(i + 1) is an index of an in bounds GEP based on
-  ; float_2d_array, zext(i + 1) <= sizeof(float_2d_array) = 4096.
-  ; Furthermore, since typeof(i + 1) is i32 and 4096 < 2^31, we are sure the
-  ; sign bit of i + 1 is 0. This implies zext(i + 1) = sext(i + 1).
-  %2 = add i4 %j, 2
-  %3 = zext i4 %2 to i64
-  ; In this case, typeof(j + 2) is i4, so zext(j + 2) <= 4096 does not imply
-  ; the sign bit of j + 2 is 0.
-  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
+  %shl = shl i64 %a, 2
+  %add = add i64 %shl, 12
+  %or = or i64 %add, 1
+; CHECK: [[OR:%or[0-9]*]] = add i64 %shl, 1
+  ; ((a << 2) + 12) and 1 have no common bits. Therefore,
+  ; SeparateConstOffsetFromGEP is able to extract the 12.
+  ; TODO(jingyue): We could reassociate the expression to combine 12 and 1.
+  %p = getelementptr float* %ptr, i64 %or
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float* %ptr, i64 [[OR]]
+; CHECK: getelementptr float* [[PTR]], i64 12
   ret float* %p
+; CHECK-NEXT: ret
 }
-; CHECK-LABEL: @inbounds_zext_add(
+
+; The source code used to be buggy in checking
+; (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0)
+; where AccumulativeByteOffset is signed but ElementTypeSizeOfGEP is unsigned.
+; The compiler would promote AccumulativeByteOffset to unsigned, causing
+; unexpected results. For example, while -64 % (int64_t)24 != 0,
+; -64 % (uint64_t)24 == 0.
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+define %struct2* @sign_mod_unsign(%struct0* %ptr, i64 %idx) {
+; CHECK-LABEL: @sign_mod_unsign(
+entry:
+  %arrayidx = add nsw i64 %idx, -2
 ; CHECK-NOT: add
-; CHECK: add i4 %j, 2
-; CHECK: sext
-; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float* %{{[a-zA-Z0-9]+}}, i64 32
+  %ptr2 = getelementptr inbounds %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
+; CHECK: getelementptr i8* [[PTR1]], i64 -64
+; CHECK: bitcast
+  ret %struct2* %ptr2
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index e1635f4..21428c6 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -47,7 +47,7 @@ T:
 }
 
 ; PR9450
-define i32 @test4(i32 %v) {
+define i32 @test4(i32 %v, i32 %w) {
 ; CHECK: entry:
 ; CHECK-NEXT:  switch i32 %v, label %T [
 ; CHECK-NEXT:    i32 3, label %V
@@ -67,7 +67,54 @@ SWITCH:
 default:
         unreachable
 U:
-        ret i32 1
+        ret i32 %w
 T:
         ret i32 2
 }
+
+
+;; We can either convert the following control-flow to a select or remove the
+;; unreachable control flow because of the undef store of null. Make sure we do
+;; the latter.
+
+define void @test5(i1 %cond, i8* %ptr) {
+
+; CHECK-LABEL: test5
+; CHECK: entry:
+; CHECK-NOT: select
+; CHECK:  store i8 2, i8* %ptr
+; CHECK:  ret
+
+entry:
+  br i1 %cond, label %bb1, label %bb3
+
+bb3:
+ br label %bb2
+
+bb1:
+ br label %bb2
+
+bb2:
+  %ptr.2 = phi i8* [ %ptr, %bb3 ], [ null, %bb1 ]
+  store i8 2, i8* %ptr.2, align 8
+  ret void
+}
+
+; CHECK-LABEL: test6
+; CHECK: entry:
+; CHECK-NOT: select
+; CHECK:  store i8 2, i8* %ptr
+; CHECK:  ret
+
+define void @test6(i1 %cond, i8* %ptr) {
+entry:
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %ptr.2 = phi i8* [ %ptr, %entry ], [ null, %bb1 ]
+  store i8 2, i8* %ptr.2, align 8
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
new file mode 100644
index 0000000..22599b3
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
+; rdar://17887153
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; When we have a covered lookup table, make sure we don't delete PHINodes that
+; are cached in PHIs.
+; CHECK-LABEL: @test
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %arg, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i4
+; CHECK-NEXT: getelementptr inbounds [8 x i64]* @switch.table, i32 0, i4 %switch.tableidx.zext
+; CHECK-NEXT: load i64* %switch.gep
+; CHECK-NEXT: add i64
+; CHECK-NEXT: ret i64
+define i64 @test(i3 %arg) {
+entry:
+  switch i3 %arg, label %Default [
+    i3 -2, label %Label6
+    i3 1, label %Label1
+    i3 2, label %Label2
+    i3 3, label %Label3
+    i3 -4, label %Label4
+    i3 -3, label %Label5
+  ]
+
+Default:
+  %v1 = phi i64 [ 7, %Label6 ], [ 11, %Label5 ], [ 6, %Label4 ], [ 13, %Label3 ], [ 9, %Label2 ], [ 15, %Label1 ], [ 8, %entry ]
+  %v2 = phi i64 [ 0, %Label6 ], [ 0, %Label5 ], [ 0, %Label4 ], [ 0, %Label3 ], [ 0, %Label2 ], [ 0, %Label1 ], [ 0, %entry ]
+  %v3 = add i64 %v1, %v2
+  ret i64 %v3
+
+Label1:
+  br label %Default
+
+Label2:
+  br label %Default
+
+Label3:
+  br label %Default
+
+Label4:
+  br label %Default
+
+Label5:
+  br label %Default
+
+Label6:
+  br label %Default
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
new file mode 100644
index 0000000..d0b8ab2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
+; rdar://17735071
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; When tableindex can't fit into i2, we should extend the type to i3.
+; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si
+; CHECK: entry:
+; CHECK-NEXT: sub i2 %0, -2
+; CHECK-NEXT: zext i2 %switch.tableidx to i3
+; CHECK-NEXT: getelementptr inbounds [4 x i64]* @switch.table, i32 0, i3 %switch.tableidx.zext
+; CHECK-NEXT: load i64* %switch.gep
+; CHECK-NEXT: ret i64 %switch.load
+define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
+entry:
+  switch i2 %0, label %1 [
+    i2 0, label %2
+    i2 1, label %3
+    i2 -2, label %4
+    i2 -1, label %5
+  ]
+
+; <label>:1                                       ; preds = %entry
+  unreachable
+
+; <label>:2                                       ; preds = %2
+  br label %6
+
+; <label>:3                                       ; preds = %4
+  br label %6
+
+; <label>:4                                       ; preds = %6
+  br label %6
+
+; <label>:5                                       ; preds = %8
+  br label %6
+
+; <label>:6                                      ; preds = %3, %5, %7, %9
+  %7 = phi i64 [ 3, %5 ], [ 2, %4 ], [ 1, %3 ], [ 0, %2 ]
+  ret i64 %7
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 51ced40..fc22e7e 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -856,10 +856,10 @@ return:
 ; CHECK: entry:
 ; CHECK: br i1 %{{.*}}, label %switch.hole_check, label %sw.default
 ; CHECK: switch.hole_check:
-; CHECK-NEXT: %switch.maskindex = trunc i32 %switch.tableidx to i6
-; CHECK-NEXT: %switch.shifted = lshr i6 -17, %switch.maskindex
+; CHECK-NEXT: %switch.maskindex = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.shifted = lshr i8 47, %switch.maskindex
 ; The mask is binary 101111.
-; CHECK-NEXT: %switch.lobit = trunc i6 %switch.shifted to i1
+; CHECK-NEXT: %switch.lobit = trunc i8 %switch.shifted to i1
 ; CHECK-NEXT: br i1 %switch.lobit, label %switch.lookup, label %sw.default
 ; CHECK-NOT: switch i32
 }
@@ -895,7 +895,7 @@ sw.bb1: br label %return
 sw.bb2: br label %return
 sw.default: br label %return
 return:
-  %x = phi i32 [ 3, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
+  %x = phi i32 [ 3, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 10, %entry ]
   ret i32 %x
 ; CHECK-LABEL: @threecases(
 ; CHECK-NOT: switch i32
@@ -915,8 +915,12 @@ return:
   %x = phi i32 [ 3, %sw.default ], [ 7, %sw.bb1 ], [ 9, %entry ]
   ret i32 %x
 ; CHECK-LABEL: @twocases(
-; CHECK: switch i32
+; CHECK-NOT: switch i32
 ; CHECK-NOT: @switch.table
+; CHECK: %switch.selectcmp
+; CHECK-NEXT: %switch.select
+; CHECK-NEXT: %switch.selectcmp1
+; CHECK-NEXT: %switch.select2
 }
 
 ; Don't build tables for switches with TLS variables.
@@ -973,3 +977,104 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; We can use linear mapping.
+define i8 @linearmap1(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 8, %sw.bb2 ], [ 13, %sw.bb1 ], [ 18, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap1(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, -5
+; CHECK-NEXT: %switch.offset = add i8 %switch.idx.mult, 18
+; CHECK-NEXT: ret i8 %switch.offset
+}
+
+; Linear mapping in a different configuration.
+define i32 @linearmap2(i8 %c) {
+entry:
+  switch i8 %c, label %sw.default [
+    i8 -10, label %return
+    i8 -11, label %sw.bb1
+    i8 -12, label %sw.bb2
+    i8 -13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i32 [ 3, %sw.default ], [ 18, %sw.bb3 ], [ 19, %sw.bb2 ], [ 20, %sw.bb1 ], [ 21, %entry ]
+  ret i32 %x
+; CHECK-LABEL: @linearmap2(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, -13
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = zext i8 %switch.tableidx to i32
+; CHECK-NEXT: %switch.offset = add i32 %switch.idx.cast, 18
+; CHECK-NEXT: ret i32 %switch.offset
+}
+
+; Linear mapping with overflows.
+define i8 @linearmap3(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 44, %sw.bb3 ], [ -56, %sw.bb2 ], [ 100, %sw.bb1 ], [ 0, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap3(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, 100
+; CHECK-NEXT: ret i8 %switch.idx.mult
+}
+
+; Linear mapping with with multiplier 1 and offset 0.
+define i8 @linearmap4(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 -2, label %return
+    i32 -1, label %sw.bb1
+    i32 0, label %sw.bb2
+    i32 1, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap4(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, -2
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: ret i8 %switch.idx.cast
+}
+
diff --git a/test/Transforms/SimplifyCFG/assume.ll b/test/Transforms/SimplifyCFG/assume.ll
new file mode 100644
index 0000000..1d1b96a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/assume.ll
@@ -0,0 +1,22 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define void @test1() {
+        call void @llvm.assume(i1 0)
+        ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: llvm.assume
+; CHECK: unreachable
+}
+
+define void @test2() {
+        call void @llvm.assume(i1 undef)
+        ret void
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: llvm.assume
+; CHECK: unreachable
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 9d8086c..9235f62 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -25,7 +25,7 @@ BB2:                                              ; preds = %BB1
 
 BB3:                                              ; preds = %BB2
   %6 = getelementptr inbounds [5 x %0]* @0, i32 0, i32 %0, !dbg !6
-  call void @llvm.dbg.value(metadata !{%0* %6}, i64 0, metadata !7), !dbg !12
+  call void @llvm.dbg.value(metadata !{%0* %6}, i64 0, metadata !7, metadata !{}), !dbg !12
   %7 = icmp eq %0* %6, null, !dbg !13
   br i1 %7, label %BB5, label %BB4, !dbg !13
 
@@ -37,22 +37,22 @@ BB5:                                              ; preds = %BB3, %BB2, %BB1, %E
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !15, i32 12, metadata !"clang (trunk 129006)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\00231\000\001\000\006\00256\000\000", metadata !15, metadata !1, metadata !3, null, void (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
+!1 = metadata !{metadata !"0x29", metadata !15} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang (trunk 129006)\001\00\000\00\000", metadata !15, metadata !4, metadata !4, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !15, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 131, i32 2, metadata !0, null}
 !6 = metadata !{i32 134, i32 2, metadata !0, null}
-!7 = metadata !{i32 590080, metadata !8, metadata !"bar", metadata !1, i32 232, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !15, metadata !0, i32 231, i32 1, i32 3} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 589862, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 589860, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{metadata !"0x100\00bar\00232\000", metadata !8, metadata !1, metadata !9} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{metadata !"0xb\00231\001\003", metadata !15, metadata !0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{metadata !"0xf\00\000\0032\0032\000\000", null, metadata !2, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{metadata !"0x26\00\000\000\000\000\000", null, metadata !2, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{metadata !"0x24\00unsigned int\000\0032\0032\000\000\007", null, metadata !2} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 232, i32 40, metadata !8, null}
 !13 = metadata !{i32 234, i32 2, metadata !8, null}
 !14 = metadata !{i32 274, i32 1, metadata !8, null}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
new file mode 100644
index 0000000..878c0a4
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL
+; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
+; NORMAL-LABEL: @foo(
+; AGGRESSIVE-LABEL: @foo(
+entry:
+  %cmp = icmp sgt i32 %d, 3
+  br i1 %cmp, label %cond.end, label %lor.lhs.false
+; NORMAL: br i1
+; AGGRESSIVE: br i1
+
+lor.lhs.false:
+  %mul = shl i32 %c, 1
+  %add = add nsw i32 %mul, %a
+  %cmp1 = icmp slt i32 %add, %b
+  br i1 %cmp1, label %cond.false, label %cond.end
+; NORMAL: br i1
+; AGGRESSIVE-NOT: br i1
+
+cond.false:
+  %0 = load i32* %input, align 4
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+  ret i32 %cond
+}
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 0547fa9..cc382be 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
 define i32 @foo(i32 %i) nounwind ssp {
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6), !dbg !7
-  call void @llvm.dbg.value(metadata !8, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6, metadata !{}), !dbg !7
+  call void @llvm.dbg.value(metadata !8, i64 0, metadata !9, metadata !{}), !dbg !11
   %1 = icmp ne i32 %i, 0, !dbg !12
 ;CHECK: call i32 (...)* @bar()
 ;CHECK-NEXT: llvm.dbg.value
@@ -10,12 +10,12 @@ define i32 @foo(i32 %i) nounwind ssp {
 
 ; <label>:2                                       ; preds = %0
   %3 = call i32 (...)* @bar(), !dbg !13
-  call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !9), !dbg !13
+  call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !9, metadata !{}), !dbg !13
   br label %6, !dbg !15
 
 ; <label>:4                                       ; preds = %0
   %5 = call i32 (...)* @bar(), !dbg !16
-  call void @llvm.dbg.value(metadata !{i32 %5}, i64 0, metadata !9), !dbg !16
+  call void @llvm.dbg.value(metadata !{i32 %5}, i64 0, metadata !9, metadata !{}), !dbg !16
   br label %6, !dbg !18
 
 ; <label>:6                                       ; preds = %4, %2
@@ -23,34 +23,34 @@ define i32 @foo(i32 %i) nounwind ssp {
   ret i32 %k.0, !dbg !19
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @bar(...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !8, metadata !8, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\002\000\001\000\006\00256\000\000", metadata !20, metadata !1, metadata !3, null, i32 (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = metadata !{metadata !"0x29", metadata !20} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang\001\00\000\00\000", metadata !20, metadata !8, metadata !8, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !2} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x101\00i\0016777218\000", metadata !0, metadata !1, metadata !5} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 2, i32 13, metadata !0, null}
 !8 = metadata !{i32 0}
-!9 = metadata !{i32 590080, metadata !10, metadata !"k", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !20, metadata !0, i32 2, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{metadata !"0x100\00k\003\000", metadata !10, metadata !1, metadata !5} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{metadata !"0xb\002\0016\000", metadata !20, metadata !0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 3, i32 12, metadata !10, null}
 !12 = metadata !{i32 4, i32 3, metadata !10, null}
 !13 = metadata !{i32 5, i32 5, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !20, metadata !10, i32 4, i32 10, i32 1} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{metadata !"0xb\004\0010\001", metadata !20, metadata !10} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 6, i32 3, metadata !14, null}
 !16 = metadata !{i32 7, i32 5, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !20, metadata !10, i32 6, i32 10, i32 2} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{metadata !"0xb\006\0010\002", metadata !20, metadata !10} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{i32 8, i32 3, metadata !17, null}
 !19 = metadata !{i32 9, i32 3, metadata !10, null}
 !20 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SimplifyCFG/hoist-with-range.ll b/test/Transforms/SimplifyCFG/hoist-with-range.ll
new file mode 100644
index 0000000..362aa9a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/hoist-with-range.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @foo(i1 %c, i8* %p) {
+; CHECK: if:
+; CHECK-NEXT: load i8* %p, !range !0
+; CHECK: !0 = metadata !{i8 0, i8 1, i8 3, i8 5}
+if:
+  br i1 %c, label %then, label %else
+then:
+  %t = load i8* %p, !range !0
+  br label %out
+else:
+  %e = load i8* %p, !range !1
+  br label %out
+out:
+  ret void
+}
+
+!0 = metadata !{ i8 0, i8 1 }
+!1 = metadata !{ i8 3, i8 5 }
diff --git a/test/Transforms/SimplifyCFG/lifetime.ll b/test/Transforms/SimplifyCFG/lifetime.ll
index b794221..7c66be5 100644
--- a/test/Transforms/SimplifyCFG/lifetime.ll
+++ b/test/Transforms/SimplifyCFG/lifetime.ll
@@ -1,11 +1,11 @@
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; Test that a lifetime intrinsic doesn't prevent us from simplifying this.
+; Test that a lifetime intrinsic isn't removed because that would change semantics
 
 ; CHECK: foo
 ; CHECK: entry:
-; CHECK-NOT: bb0:
-; CHECK-NOT: bb1:
+; CHECK: bb0:
+; CHECK: bb1:
 ; CHECK: ret
 define void @foo(i1 %x) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index fa7976d..0ba93d2 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -3,6 +3,9 @@
 declare float @llvm.sqrt.f32(float) nounwind readonly
 declare float @llvm.fma.f32(float, float, float) nounwind readonly
 declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
+declare float @llvm.fabs.f32(float) nounwind readonly
+declare float @llvm.minnum.f32(float, float) nounwind readonly
+declare float @llvm.maxnum.f32(float, float) nounwind readonly
 
 ; CHECK-LABEL: @sqrt_test(
 ; CHECK: select
@@ -21,6 +24,22 @@ test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
   ret void
 }
 
+; CHECK-LABEL: @fabs_test(
+; CHECK: select
+define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fabs.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fabs.f32(float %a) nounwind readnone
+  br label %test_fabs.exit
+
+test_fabs.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
 
 ; CHECK-LABEL: @fma_test(
 ; CHECK: select
@@ -56,3 +75,36 @@ test_fmuladd.exit:                                   ; preds = %cond.else.i, %en
   ret void
 }
 
+; CHECK-LABEL: @minnum_test(
+; CHECK: select
+define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_minnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+  br label %test_minnum.exit
+
+test_minnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: @maxnum_test(
+; CHECK: select
+define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_maxnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+  br label %test_maxnum.exit
+
+test_maxnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll b/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
new file mode 100644
index 0000000..ddf5d1f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; a, b;
+; fn1() {
+;   if (b)
+;     if (a == 0 || a == 5)
+;       return a;
+;   return 0;
+; }
+
+; Checking that we handle correctly the case when we have a switch
+; branching multiple times to the same block
+
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @fn1() {
+; CHECK-LABEL: @fn1
+; CHECK: %switch.selectcmp1 = icmp eq i32 %1, 5
+; CHECK: %switch.select2 = select i1 %switch.selectcmp1, i32 5, i32 %switch.select
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end3, label %if.then
+
+if.then:
+  %1 = load i32* @a, align 4
+  switch i32 %1, label %if.end3 [
+    i32 5, label %return
+    i32 0, label %return
+  ]
+
+if.end3:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 0, %if.end3 ], [ %1, %if.then ], [ %1, %if.then ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
new file mode 100644
index 0000000..69f97e5
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; int foo1_with_default(int a) {
+;   switch(a) {
+;     case 10:
+;       return 10;
+;     case 20:
+;       return 2;
+;   }
+;   return 4;
+; }
+
+define i32 @foo1_with_default(i32 %a) {
+; CHECK-LABEL: @foo1_with_default
+; CHECK: %switch.selectcmp = icmp eq i32 %a, 20
+; CHECK-NEXT: %switch.select = select i1 %switch.selectcmp, i32 2, i32 4
+; CHECK-NEXT: %switch.selectcmp1 = icmp eq i32 %a, 10
+; CHECK-NEXT: %switch.select2 = select i1 %switch.selectcmp1, i32 10, i32 %switch.select
+entry:
+  switch i32 %a, label %sw.epilog [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.epilog:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 4, %sw.epilog ], [ 2, %sw.bb1 ], [ 10, %sw.bb ]
+  ret i32 %retval.0
+}
+
+; int foo1_without_default(int a) {
+;   switch(a) {
+;     case 10:
+;       return 10;
+;     case 20:
+;       return 2;
+;   }
+;   __builtin_unreachable();
+; }
+
+define i32 @foo1_without_default(i32 %a) {
+; CHECK-LABEL: @foo1_without_default
+; CHECK: %switch.selectcmp = icmp eq i32 %a, 10
+; CHECK-NEXT: %switch.select = select i1 %switch.selectcmp, i32 10, i32 2
+; CHECK-NOT: %switch.selectcmp1
+entry:
+  switch i32 %a, label %sw.epilog [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.epilog:
+  unreachable
+
+return:
+  %retval.0 = phi i32 [ 2, %sw.bb1 ], [ 10, %sw.bb ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 3b449cb..adf4215 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -11,14 +11,14 @@ define void @foo() nounwind ssp {
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00\003\000\001\000\006\000\000\000", metadata !8, metadata !1, metadata !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
+!1 = metadata !{metadata !"0x29", metadata !8} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)\001\00\000\00\000", metadata !8, metadata !4, metadata !4, metadata !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !8, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 4, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{metadata !"0xb\003\0012\000", metadata !8, metadata !0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 5, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"foo.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 5353744..6100a6a 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -6,11 +6,11 @@
 
 define void @foo() nounwind readnone optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !5), !dbg !10
+  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !5, metadata !{}), !dbg !10
   ret void, !dbg !11
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13}
@@ -18,17 +18,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
 
-!0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\001\000", metadata !12, metadata !1, metadata !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{metadata !"0x29", metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", metadata !12, metadata !4, metadata !4, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !12, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
-!5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 524299, metadata !12, metadata !0, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524340, i32 0, metadata !1, metadata !"x", metadata !"x", metadata !"", metadata !1, i32 1, metadata !7, i1 false, i1 true, i32* @x} ; [ DW_TAG_variable ]
+!5 = metadata !{metadata !"0x100\00y\003\000", metadata !6, metadata !1, metadata !7} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{metadata !"0xb\002\000\000", metadata !12, metadata !0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", metadata !12, metadata !1} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !"0x34\00x\00x\00\001\000\001", metadata !1, metadata !1, metadata !7, i32* @x} ; [ DW_TAG_variable ]
 !9 = metadata !{i32 0}
 !10 = metadata !{i32 3, i32 0, metadata !6, null}
 !11 = metadata !{i32 4, i32 0, metadata !6, null}
 !12 = metadata !{metadata !"b.c", metadata !"/tmp"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index b55ac3c..c211dc1 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -7,18 +7,18 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{metadata !"0x2e\00foo\00foo\00foo\003\000\001\000\006\000\000\000", metadata !10, metadata !1, metadata !3, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{metadata !"0x29", metadata !10} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !"0x11\0012\00clang version 2.8 (trunk 112062)\001\00\000\00\001", metadata !10, metadata !11, metadata !11, metadata !12, metadata !13, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !10, metadata !1, null, metadata !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0, null} ; [ DW_TAG_variable ]
-!7 = metadata !{i32 524326, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
+!5 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", metadata !10, metadata !1} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"0x34\00i\00i\00i\002\001\001", metadata !1, metadata !1, metadata !7, i32 0, null} ; [ DW_TAG_variable ]
+!7 = metadata !{metadata !"0x26\00\000\000\000\000\000", metadata !10, metadata !1, metadata !5} ; [ DW_TAG_const_type ]
 !8 = metadata !{i32 3, i32 13, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !10, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{metadata !"0xb\003\0011\000", metadata !10, metadata !0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW"}
 !11 = metadata !{i32 0}
 !12 = metadata !{metadata !0}
 !13 = metadata !{metadata !6}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 8ce7b87..04a3f32 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -7,7 +7,7 @@
 @xyz = global i32 2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #0
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
 
 ; Function Attrs: nounwind readnone ssp
 define i32 @fn() #1 {
@@ -18,7 +18,7 @@ entry:
 ; Function Attrs: nounwind readonly ssp
 define i32 @foo(i32 %i) #2 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15, metadata !{}), !dbg !20
   %.0 = load i32* @xyz, align 4
   ret i32 %.0, !dbg !21
 }
@@ -30,29 +30,29 @@ attributes #2 = { nounwind readonly ssp }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!25}
 
-!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!0 = metadata !{metadata !"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", metadata !1, metadata !2, metadata !2, metadata !23, metadata !24, null} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
 !1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
 !2 = metadata !{null}
-!3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
-!4 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{i32 524329, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
-!6 = metadata !{i32 524334, metadata !1, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
-!7 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!3 = metadata !{metadata !"0x2e\00bar\00bar\00\005\001\001\000\006\000\001\000", metadata !1, null, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
+!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !1, metadata !5, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !"0x29", metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
+!6 = metadata !{metadata !"0x2e\00fn\00fn\00fn\006\000\001\000\006\000\001\000", metadata !1, null, metadata !7, null, i32 ()* @fn, null, null, null} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
+!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !1, metadata !5, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 524324, metadata !1, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 524334, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
-!11 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", metadata !1, metadata !5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !"0x2e\00foo\00foo\00foo\007\000\001\000\006\000\001\000", metadata !1, null, metadata !11, null, i32 (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
+!11 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !1, metadata !5, null, metadata !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9, metadata !9}
-!13 = metadata !{i32 524544, metadata !14, metadata !"bb", metadata !5, i32 5, metadata !9}
-!14 = metadata !{i32 524299, metadata !1, metadata !3, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
-!15 = metadata !{i32 524545, metadata !10, metadata !"i", metadata !5, i32 7, metadata !9}
-!16 = metadata !{i32 524340, i32 0, metadata !5, metadata !"abcd", metadata !"abcd", metadata !"", metadata !5, i32 2, metadata !9, i1 true, i1 true, null, null}
-!17 = metadata !{i32 524340, i32 0, metadata !5, metadata !"xyz", metadata !"xyz", metadata !"", metadata !5, i32 3, metadata !9, i1 false, i1 true, i32* @xyz, null}
+!13 = metadata !{metadata !"0x100\00bb\005\000", metadata !14, metadata !5, metadata !9} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{metadata !"0xb\005\000\000", metadata !1, metadata !3} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!15 = metadata !{metadata !"0x101\00i\007\000", metadata !10, metadata !5, metadata !9} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{metadata !"0x34\00abcd\00abcd\00\002\001\001", metadata !5, metadata !5, metadata !9, null, null} ; [ DW_TAG_variable ]
+!17 = metadata !{metadata !"0x34\00xyz\00xyz\00\003\000\001", metadata !5, metadata !5, metadata !9, i32* @xyz, null} ; [ DW_TAG_variable ]
 !18 = metadata !{i32 6, i32 0, metadata !19, null}
-!19 = metadata !{i32 524299, metadata !1, metadata !6, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!19 = metadata !{metadata !"0xb\006\000\000", metadata !1, metadata !6} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
 !20 = metadata !{i32 7, i32 0, metadata !10, null}
 !21 = metadata !{i32 10, i32 0, metadata !22, null}
-!22 = metadata !{i32 524299, metadata !1, metadata !10, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!22 = metadata !{metadata !"0xb\007\000\000", metadata !1, metadata !10} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
 !23 = metadata !{metadata !3, metadata !6, metadata !10}
 !24 = metadata !{metadata !16, metadata !17}
-!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
diff --git a/test/Transforms/TailCallElim/EraseBB.ll b/test/Transforms/TailCallElim/EraseBB.ll
new file mode 100644
index 0000000..c8290d7
--- /dev/null
+++ b/test/Transforms/TailCallElim/EraseBB.ll
@@ -0,0 +1,26 @@
+; RUN: opt -tailcallelim -S < %s 2>&1 | FileCheck %s
+
+; CHECK: add nsw i32
+; CHECK-NEXT: br label
+; CHECK: add nsw i32
+; CHECK-NEXT: br label
+; CHECK-NOT: Uses remain when a value is destroyed
+define i32 @test(i32 %n) {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %v1 = add nsw i32 %n, -2
+  %call1 = tail call i32 @test(i32 %v1)
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %v2 = add nsw i32 %n, 4
+  %call2 = tail call i32 @test(i32 %v2)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.else
+  %retval = phi i32 [ %call1, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval
+}
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
index 341736d..8e9814b 100644
--- a/test/Transforms/TailCallElim/basic.ll
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -174,3 +174,17 @@ if.end:
 return:
   ret void
 }
+
+declare void @test11_helper1(i8** nocapture, i8*)
+declare void @test11_helper2(i8*)
+define void @test11() {
+; CHECK-LABEL: @test11
+; CHECK-NOT: tail
+  %a = alloca i8*
+  %b = alloca i8
+  call void @test11_helper1(i8** %a, i8* %b)  ; a = &b
+  %c = load i8** %a
+  call void @test11_helper2(i8* %c)
+; CHECK: call void @test11_helper2
+  ret void
+}
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 53c65da..2e350d6 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 ; Several cases where tail call elimination should move the load above the call,
 ; then eliminate the tail recursion.
 
@@ -12,6 +14,11 @@
 ; This load can be moved above the call because the function won't write to it
 ; and the call has no side effects.
 define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_1(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -21,7 +28,6 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -32,6 +38,11 @@ else:		; preds = %entry
 ; This load can be moved above the call because the function won't write to it
 ; and the load provably can't trap.
 define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_2(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -48,7 +59,6 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -59,6 +69,11 @@ recurse:		; preds = %else
 ; This load can be safely moved above the call (even though it's from an
 ; extern_weak global) because the call has no side effects.
 define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_3(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -68,7 +83,6 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -80,6 +94,12 @@ else:		; preds = %entry
 ; unknown pointer (which normally means it might trap) because the first load
 ; proves it doesn't trap.
 define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_4(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NEXT: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -97,7 +117,6 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/Util/flattencfg.ll b/test/Transforms/Util/flattencfg.ll
new file mode 100644
index 0000000..4fcb77a
--- /dev/null
+++ b/test/Transforms/Util/flattencfg.ll
@@ -0,0 +1,26 @@
+; RUN: opt -flattencfg -S < %s | FileCheck %s
+
+
+; This test checks whether the pass completes without a crash.
+; The code is not transformed in any way
+;
+; CHECK-LABEL: @test_not_crash
+define void @test_not_crash(i32 %in_a) #0 {
+entry:
+  %cmp0 = icmp eq i32 %in_a, -1
+  %cmp1 = icmp ne i32 %in_a, 0
+  %cond0 = and i1 %cmp0, %cmp1
+  br i1 %cond0, label %b0, label %b1
+
+b0:                                ; preds = %entry
+  %cmp2 = icmp eq i32 %in_a, 0
+  %cmp3 = icmp ne i32 %in_a, 1
+  %cond1 = or i1 %cmp2, %cmp3
+  br i1 %cond1, label %exit, label %b1
+
+b1:                                       ; preds = %entry, %b0
+  br label %exit
+
+exit:                               ; preds = %entry, %b0, %b1
+  ret void
+}
diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll
new file mode 100644
index 0000000..06bd4cc
--- /dev/null
+++ b/test/Transforms/Util/lowerswitch.ll
@@ -0,0 +1,22 @@
+; RUN: opt -lowerswitch -S < %s | FileCheck %s
+
+; Test that we don't crash and have a different basic block for each incoming edge.
+define void @test_lower_switch() {
+; CHECK-LABEL: @test_lower_switch
+; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ]
+BB1:
+  switch i32 undef, label %BB2 [
+    i32 3, label %BB2
+    i32 5, label %BB2
+    i32 0, label %BB3
+    i32 2, label %BB3
+    i32 4, label %BB3
+  ]
+
+BB2:
+  %merge = phi i64 [ 1, %BB3 ], [ 0, %BB1 ], [ 0, %BB1 ], [ 0, %BB1 ]
+  ret void
+
+BB3:
+  br label %BB2
+}
author	Stephen Hines <srhines@google.com>	2014-12-01 14:51:49 -0800
committer	Stephen Hines <srhines@google.com>	2014-12-02 16:08:10 -0800
commit	37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree	8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /test/Transforms
parent	d2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
download	external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2