134 files changed, 4970 insertions, 205 deletions
diff --git a/test/Analysis/BasicAA/noalias-geps.ll b/test/Analysis/BasicAA/noalias-geps.ll
new file mode 100644
index 0000000..a93d778
--- /dev/null
+++ b/test/Analysis/BasicAA/noalias-geps.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+; Check that geps with equal base offsets of noalias base pointers stay noalias.
+define i32 @test(i32* %p, i16 %i) {
+  %pi = getelementptr i32* %p, i32 0
+  %pi.next = getelementptr i32* %p, i32 1
+  %b = icmp eq i16 %i, 0
+  br i1 %b, label %bb1, label %bb2
+
+bb1:
+  %f = getelementptr i32* %pi, i32 1
+  %g = getelementptr i32* %pi.next, i32 1
+  br label %bb3
+bb2:
+  %f2 = getelementptr i32* %pi, i32 1
+  %g2 = getelementptr i32* %pi.next, i32 1
+  br label %bb3
+
+bb3:
+  %ptr_phi = phi i32* [ %f, %bb1 ], [ %f2, %bb2 ]
+  %ptr_phi2 = phi i32* [ %g, %bb1 ], [ %g2, %bb2 ]
+; CHECK: NoAlias: i32* %f1, i32* %g1
+  %f1 = getelementptr i32* %ptr_phi , i32 1
+  %g1 = getelementptr i32* %ptr_phi2 , i32 1
+
+ret i32 0
+}
+
+; Check that geps with equal indices of noalias base pointers stay noalias.
+define i32 @test2([2 x i32]* %p, i32 %i) {
+  %pi = getelementptr [2 x i32]* %p, i32 0
+  %pi.next = getelementptr [2 x i32]* %p, i32 1
+  %b = icmp eq i32 %i, 0
+  br i1 %b, label %bb1, label %bb2
+
+bb1:
+  %f = getelementptr [2 x i32]* %pi, i32 1
+  %g = getelementptr [2 x i32]* %pi.next, i32 1
+  br label %bb3
+bb2:
+  %f2 = getelementptr [2 x i32]* %pi, i32 1
+  %g2 = getelementptr [2 x i32]* %pi.next, i32 1
+  br label %bb3
+bb3:
+  %ptr_phi = phi [2 x i32]* [ %f, %bb1 ], [ %f2, %bb2 ]
+  %ptr_phi2 = phi [2 x i32]* [ %g, %bb1 ], [ %g2, %bb2 ]
+; CHECK: NoAlias: i32* %f1, i32* %g1
+  %f1 = getelementptr [2 x i32]* %ptr_phi , i32 1, i32 %i
+  %g1 = getelementptr [2 x i32]* %ptr_phi2 , i32 1, i32 %i
+
+ret i32 0
+}
diff --git a/test/Analysis/BasicAA/phi-speculation.ll b/test/Analysis/BasicAA/phi-speculation.ll
new file mode 100644
index 0000000..21c6592
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-speculation.ll
@@ -0,0 +1,33 @@
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; ptr_phi and ptr2_phi do not alias.
+; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi
+
+define i32 @test_noalias(i32* %ptr2, i32 %count, i32* %coeff) {
+entry:
+  %ptr = getelementptr inbounds i32* %ptr2, i64 1
+  br label %while.body
+
+while.body:
+  %num = phi i32 [ %count, %entry ], [ %dec, %while.body ]
+  %ptr_phi = phi i32* [ %ptr, %entry ], [ %ptr_inc, %while.body ]
+  %ptr2_phi = phi i32* [ %ptr2, %entry ], [ %ptr2_inc, %while.body ]
+  %result.09 = phi i32 [ 0 , %entry ], [ %add, %while.body ]
+  %dec = add nsw i32 %num, -1
+  %0 = load i32* %ptr_phi, align 4
+  store i32 %0, i32* %ptr2_phi, align 4
+  %1 = load i32* %coeff, align 4
+  %2 = load i32* %ptr_phi, align 4
+  %mul = mul nsw i32 %1, %2
+  %add = add nsw i32 %mul, %result.09
+  %tobool = icmp eq i32 %dec, 0
+  %ptr_inc = getelementptr inbounds i32* %ptr_phi, i64 1
+  %ptr2_inc = getelementptr inbounds i32* %ptr2_phi, i64 1
+  br i1 %tobool, label %the_exit, label %while.body
+
+the_exit:
+  ret i32 %add
+}
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 74d06a1..08adfa8 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -88,3 +88,30 @@ exit:
 }
 
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+define i32 @test4(i32 %x) nounwind uwtable readnone ssp {
+; CHECK: Printing analysis {{.*}} for function 'test4'
+entry:
+  %conv = sext i32 %x to i64
+  switch i64 %conv, label %return [
+    i64 0, label %sw.bb
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+    i64 5, label %sw.bb1
+  ], !prof !2
+; CHECK: edge entry -> return probability is 7 / 85
+; CHECK: edge entry -> sw.bb probability is 14 / 85
+; CHECK: edge entry -> sw.bb1 probability is 64 / 85
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 5, %sw.bb1 ], [ 1, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
diff --git a/test/Analysis/Profiling/load-branch-weights-ifs.ll b/test/Analysis/Profiling/load-branch-weights-ifs.ll
new file mode 100644
index 0000000..ddbaf96
--- /dev/null
+++ b/test/Analysis/Profiling/load-branch-weights-ifs.ll
@@ -0,0 +1,122 @@
+; RUN: opt -insert-edge-profiling -o %t1 < %s
+; RUN: rm -f %t1.prof_data
+; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
+; RUN:     -llvmprof-output %t1.prof_data
+; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
+; RUN:     | FileCheck %s
+; RUN: rm -f %t1.prof_data
+
+; FIXME: profile_rt.dll could be built on win32.
+; REQUIRES: loadable_module
+
+;; func_mod - Branch taken 6 times in 7.
+define i32 @func_mod(i32 %N) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %N.addr = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  %0 = load i32* %N.addr, align 4
+  %rem = srem i32 %0, 7
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %if.then, label %if.else
+; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0
+
+if.then:
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+;; func_const_true - conditional branch which 100% taken probability.
+define i32 @func_const_true(i32 %N) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %N.addr = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  %0 = load i32* %N.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1
+
+if.then:
+  store i32 1, i32* %retval
+  br label %return
+
+if.end:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+;; func_const_true - conditional branch which 100% not-taken probability.
+define i32 @func_const_false(i32 %N) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %N.addr = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  %0 = load i32* %N.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2
+
+if.then:
+  store i32 1, i32* %retval
+  br label %return
+
+if.end:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %loop = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %loop, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %loop, align 4
+  %cmp = icmp slt i32 %0, 7000
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3
+
+for.body:
+  %1 = load i32* %loop, align 4
+  %call = call i32 @func_mod(i32 %1)
+  br label %for.inc
+
+for.inc:
+  %2 = load i32* %loop, align 4
+  %inc = add nsw i32 %2, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %for.cond
+
+for.end:
+  %call1 = call i32 @func_const_true(i32 1)
+  %call2 = call i32 @func_const_false(i32 0)
+  ret i32 0
+}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0}
+; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1}
+; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1}
+; CHECK-NOT: !4
diff --git a/test/Analysis/Profiling/load-branch-weights-loops.ll b/test/Analysis/Profiling/load-branch-weights-loops.ll
new file mode 100644
index 0000000..476f377
--- /dev/null
+++ b/test/Analysis/Profiling/load-branch-weights-loops.ll
@@ -0,0 +1,188 @@
+; RUN: opt -insert-edge-profiling -o %t1 < %s
+; RUN: rm -f %t1.prof_data
+; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
+; RUN:     -llvmprof-output %t1.prof_data
+; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
+; RUN:     | FileCheck %s
+; RUN: rm -f %t1.prof_data
+
+; FIXME: profile_rt.dll could be built on win32.
+; REQUIRES: loadable_module
+
+;; func_for - Test branch probabilities for a vanilla for loop.
+define i32 @func_for(i32 %N) nounwind uwtable {
+entry:
+  %N.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %loop = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i32 0, i32* %loop, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %loop, align 4
+  %1 = load i32* %N.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0
+
+for.body:
+  %2 = load i32* %N.addr, align 4
+  %3 = load i32* %ret, align 4
+  %add = add nsw i32 %3, %2
+  store i32 %add, i32* %ret, align 4
+  br label %for.inc
+
+for.inc:
+  %4 = load i32* %loop, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %for.cond
+
+for.end:
+  %5 = load i32* %ret, align 4
+  ret i32 %5
+}
+
+;; func_for_odd - Test branch probabilities for a for loop with a continue and
+;; a break.
+define i32 @func_for_odd(i32 %N) nounwind uwtable {
+entry:
+  %N.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %loop = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i32 0, i32* %loop, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %loop, align 4
+  %1 = load i32* %N.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1
+
+for.body:
+  %2 = load i32* %loop, align 4
+  %rem = srem i32 %2, 10
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %if.then, label %if.end
+; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2
+
+if.then:
+  br label %for.inc
+
+if.end:
+  %3 = load i32* %loop, align 4
+  %cmp1 = icmp eq i32 %3, 500
+  br i1 %cmp1, label %if.then2, label %if.end3
+; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3
+
+if.then2:
+  br label %for.end
+
+if.end3:
+  %4 = load i32* %N.addr, align 4
+  %5 = load i32* %ret, align 4
+  %add = add nsw i32 %5, %4
+  store i32 %add, i32* %ret, align 4
+  br label %for.inc
+
+for.inc:
+  %6 = load i32* %loop, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %for.cond
+
+for.end:
+  %7 = load i32* %ret, align 4
+  ret i32 %7
+}
+
+;; func_while - Test branch probability in a vanilla while loop.
+define i32 @func_while(i32 %N) nounwind uwtable {
+entry:
+  %N.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %loop = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i32 0, i32* %loop, align 4
+  br label %while.cond
+
+while.cond:
+  %0 = load i32* %loop, align 4
+  %1 = load i32* %N.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %while.body, label %while.end
+; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0
+
+while.body:
+  %2 = load i32* %N.addr, align 4
+  %3 = load i32* %ret, align 4
+  %add = add nsw i32 %3, %2
+  store i32 %add, i32* %ret, align 4
+  %4 = load i32* %loop, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %while.cond
+
+while.end:
+  %5 = load i32* %ret, align 4
+  ret i32 %5
+}
+
+;; func_while - Test branch probability in a vanilla do-while loop.
+define i32 @func_do_while(i32 %N) nounwind uwtable {
+entry:
+  %N.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %loop = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i32 0, i32* %loop, align 4
+  br label %do.body
+
+do.body:
+  %0 = load i32* %N.addr, align 4
+  %1 = load i32* %ret, align 4
+  %add = add nsw i32 %1, %0
+  store i32 %add, i32* %ret, align 4
+  %2 = load i32* %loop, align 4
+  %inc = add nsw i32 %2, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %do.cond
+
+do.cond:
+  %3 = load i32* %loop, align 4
+  %4 = load i32* %N.addr, align 4
+  %cmp = icmp slt i32 %3, %4
+  br i1 %cmp, label %do.body, label %do.end
+; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4
+
+do.end:
+  %5 = load i32* %ret, align 4
+  ret i32 %5
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 0, i32* %retval
+  %call = call i32 @func_for(i32 1000)
+  %call1 = call i32 @func_for_odd(i32 1000)
+  %call2 = call i32 @func_while(i32 1000)
+  %call3 = call i32 @func_do_while(i32 1000)
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+!1 = metadata !{metadata !"branch_weights", i32 501, i32 0}
+!2 = metadata !{metadata !"branch_weights", i32 450, i32 51}
+!3 = metadata !{metadata !"branch_weights", i32 1, i32 50}
+!4 = metadata !{metadata !"branch_weights", i32 999, i32 1}
+; CHECK-NOT: !5
diff --git a/test/Analysis/Profiling/load-branch-weights-switches.ll b/test/Analysis/Profiling/load-branch-weights-switches.ll
new file mode 100644
index 0000000..be11f04
--- /dev/null
+++ b/test/Analysis/Profiling/load-branch-weights-switches.ll
@@ -0,0 +1,165 @@
+; RUN: opt -insert-edge-profiling -o %t1 < %s
+; RUN: rm -f %t1.prof_data
+; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
+; RUN:     -llvmprof-output %t1.prof_data
+; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
+; RUN:     | FileCheck %s
+; RUN: rm -f %t1.prof_data
+
+; FIXME: profile_rt.dll could be built on win32.
+; REQUIRES: loadable_module
+
+;; func_switch - Test branch probabilities for a switch instruction with an
+;; even chance of taking each case (or no case).
+define i32 @func_switch(i32 %N) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %N.addr = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  %0 = load i32* %N.addr, align 4
+  %rem = srem i32 %0, 4
+  switch i32 %rem, label %sw.epilog [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+; CHECK: ], !prof !0
+
+sw.bb:
+  store i32 5, i32* %retval
+  br label %return
+
+sw.bb1:
+  store i32 6, i32* %retval
+  br label %return
+
+sw.bb2:
+  store i32 7, i32* %retval
+  br label %return
+
+sw.epilog:
+  store i32 8, i32* %retval
+  br label %return
+
+return:
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+;; func_switch_switch - Test branch probabilities in a switch-instruction that
+;; leads to further switch instructions.  The first-tier switch occludes some
+;; possibilities in the second-tier switches, leading to some branches having a
+;; 0 probability.
+define i32 @func_switch_switch(i32 %N) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %N.addr = alloca i32, align 4
+  store i32 %N, i32* %N.addr, align 4
+  %0 = load i32* %N.addr, align 4
+  %rem = srem i32 %0, 2
+  switch i32 %rem, label %sw.default11 [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb5
+  ]
+; CHECK: ], !prof !1
+
+sw.bb:
+  %1 = load i32* %N.addr, align 4
+  %rem1 = srem i32 %1, 4
+  switch i32 %rem1, label %sw.default [
+    i32 0, label %sw.bb2
+    i32 1, label %sw.bb3
+    i32 2, label %sw.bb4
+  ]
+; CHECK: ], !prof !2
+
+sw.bb2:
+  store i32 5, i32* %retval
+  br label %return
+
+sw.bb3:
+  store i32 6, i32* %retval
+  br label %return
+
+sw.bb4:
+  store i32 7, i32* %retval
+  br label %return
+
+sw.default:
+  store i32 8, i32* %retval
+  br label %return
+
+sw.bb5:
+  %2 = load i32* %N.addr, align 4
+  %rem6 = srem i32 %2, 4
+  switch i32 %rem6, label %sw.default10 [
+    i32 0, label %sw.bb7
+    i32 1, label %sw.bb8
+    i32 2, label %sw.bb9
+  ]
+; CHECK: ], !prof !3
+
+sw.bb7:
+  store i32 9, i32* %retval
+  br label %return
+
+sw.bb8:
+  store i32 10, i32* %retval
+  br label %return
+
+sw.bb9:
+  store i32 11, i32* %retval
+  br label %return
+
+sw.default10:
+  store i32 12, i32* %retval
+  br label %return
+
+sw.default11:
+  store i32 13, i32* %retval
+  br label %return
+
+return:
+  %3 = load i32* %retval
+  ret i32 %3
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %loop = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %loop, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %loop, align 4
+  %cmp = icmp slt i32 %0, 4000
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4
+
+for.body:
+  %1 = load i32* %loop, align 4
+  %call = call i32 @func_switch(i32 %1)
+  %2 = load i32* %loop, align 4
+  %call1 = call i32 @func_switch_switch(i32 %2)
+  br label %for.inc
+
+for.inc:
+  %3 = load i32* %loop, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* %loop, align 4
+  br label %for.cond
+
+for.end:
+  ret i32 0
+}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000}
+; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000}
+; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0}
+; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1}
+; CHECK-NOT: !5
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index b8eb6d3..61be4b7 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -113,14 +113,14 @@ let test_constants () =
   ignore (define_global "const_int_string" c m);
   insist (i32_type = type_of c);
 
-  (* RUN: grep 'const_string.*"cruel\00world"' < %t.ll
+  (* RUN: grep 'const_string.*"cruel\\00world"' < %t.ll
    *)
   group "string";
   let c = const_string context "cruel\000world" in
   ignore (define_global "const_string" c m);
   insist ((array_type i8_type 11) = type_of c);
 
-  (* RUN: grep 'const_stringz.*"hi\00again\00"' < %t.ll
+  (* RUN: grep 'const_stringz.*"hi\\00again\\00"' < %t.ll
    *)
   group "stringz";
   let c = const_stringz context "hi\000again" in
@@ -187,7 +187,7 @@ let test_constants () =
   ignore (define_global "const_all_ones" c m);
 
   group "pointer null"; begin
-    (* RUN: grep "const_pointer_null = global i64* null" < %t.ll
+    (* RUN: grep "const_pointer_null = global i64\* null" < %t.ll
      *)
     let c = const_pointer_null (pointer_type i64_type) in
     ignore (define_global "const_pointer_null" c m);
@@ -542,7 +542,7 @@ let test_users () =
 (*===-- Aliases -----------------------------------------------------------===*)
 
 let test_aliases () =
-  (* RUN: grep "@alias = alias i32* @aliasee" < %t.ll
+  (* RUN: grep "@alias = alias i32\* @aliasee" < %t.ll
    *)
   let v = declare_global i32_type "aliasee" m in
   ignore (add_alias m (pointer_type i32_type) v "alias")
@@ -554,7 +554,7 @@ let test_functions () =
   let ty = function_type i32_type [| i32_type; i64_type |] in
   let ty2 = function_type i8_type [| i8_type; i64_type |] in
   
-  (* RUN: grep "declare i32 @Fn1\(i32, i64\)" < %t.ll
+  (* RUN: grep 'declare i32 @Fn1(i32, i64)' < %t.ll
    *)
   begin group "declare";
     insist (None = lookup_function "Fn1" m);
@@ -935,7 +935,7 @@ let test_builder () =
 
   group "malloc/free"; begin
       (* RUN: grep "call.*@malloc(i32 ptrtoint" < %t.ll
-       * RUN: grep "call.*@free(i8*" < %t.ll
+       * RUN: grep "call.*@free(i8\*" < %t.ll
        * RUN: grep "call.*@malloc(i32 %" < %t.ll
        *)
       let bb1 = append_block context "MallocBlock1" fn in
@@ -947,7 +947,7 @@ let test_builder () =
   end;
 
   group "indirectbr"; begin
-    (* RUN: grep "indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]" < %t.ll
+    (* RUN: grep "indirectbr i8\* blockaddress(@X7, %IBRBlock2), \[label %IBRBlock2, label %IBRBlock3\]" < %t.ll
      *)
     let bb1 = append_block context "IBRBlock1" fn in
 
@@ -1054,10 +1054,10 @@ let test_builder () =
 
     (* RUN: grep "%build_alloca = alloca i32" < %t.ll
      * RUN: grep "%build_array_alloca = alloca i32, i32 %P2" < %t.ll
-     * RUN: grep "%build_load = load i32* %build_array_alloca" < %t.ll
-     * RUN: grep "store i32 %P2, i32* %build_alloca" < %t.ll
-     * RUN: grep "%build_gep = getelementptr i32* %build_array_alloca, i32 %P2" < %t.ll
-     * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2" < %t.ll
+     * RUN: grep "%build_load = load i32\* %build_array_alloca" < %t.ll
+     * RUN: grep "store i32 %P2, i32\* %build_alloca" < %t.ll
+     * RUN: grep "%build_gep = getelementptr i32\* %build_array_alloca, i32 %P2" < %t.ll
+     * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32\* %build_array_alloca, i32 %P2" < %t.ll
      * RUN: grep "%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1" < %t.ll
      *)
     let alloca = build_alloca i32_type "build_alloca" b in
@@ -1106,14 +1106,14 @@ let test_builder () =
      * RUN: grep "%build_fptrunc2 = fptrunc double %build_sitofp to float" < %t.ll
      * RUN: grep "%build_fpext = fpext float %build_fptrunc to double" < %t.ll
      * RUN: grep "%build_fpext2 = fpext float %build_fptrunc to double" < %t.ll
-     * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8*" < %t.ll
-     * RUN: grep "%build_ptrtoint = ptrtoint i8* %build_inttoptr to i64" < %t.ll
-     * RUN: grep "%build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64" < %t.ll
+     * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8\*" < %t.ll
+     * RUN: grep "%build_ptrtoint = ptrtoint i8\* %build_inttoptr to i64" < %t.ll
+     * RUN: grep "%build_ptrtoint2 = ptrtoint i8\* %build_inttoptr to i64" < %t.ll
      * RUN: grep "%build_bitcast = bitcast i64 %build_ptrtoint to double" < %t.ll
      * RUN: grep "%build_bitcast2 = bitcast i64 %build_ptrtoint to double" < %t.ll
      * RUN: grep "%build_bitcast3 = bitcast i64 %build_ptrtoint to double" < %t.ll
      * RUN: grep "%build_bitcast4 = bitcast i64 %build_ptrtoint to double" < %t.ll
-     * RUN: grep "%build_pointercast = bitcast i8* %build_inttoptr to i16*" < %t.ll
+     * RUN: grep "%build_pointercast = bitcast i8\* %build_inttoptr to i16*" < %t.ll
      *)
     let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
     let inst29 = build_zext inst28 i32_type "build_zext" atentry in
@@ -1148,7 +1148,7 @@ let test_builder () =
      * RUN: grep "%build_fcmp_false = fcmp false float %F1, %F2" < %t.ll
      * RUN: grep "%build_fcmp_true = fcmp true float %F2, %F1" < %t.ll
      * RUN: grep "%build_is_null.*= icmp eq.*%X0,.*null" < %t.ll
-     * RUN: grep "%build_is_not_null = icmp ne i8* %X1, null" < %t.ll
+     * RUN: grep "%build_is_not_null = icmp ne i8\* %X1, null" < %t.ll
      * RUN: grep "%build_ptrdiff" < %t.ll
      *)
     ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
@@ -1167,7 +1167,7 @@ let test_builder () =
   group "miscellaneous"; begin
     (* RUN: grep "%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)" < %t.ll
      * RUN: grep "%build_select = select i1 %build_icmp, i32 %P1, i32 %P2" < %t.ll
-     * RUN: grep "%build_va_arg = va_arg i8** null, i32" < %t.ll
+     * RUN: grep "%build_va_arg = va_arg i8\*\* null, i32" < %t.ll
      * RUN: grep "%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2" < %t.ll
      * RUN: grep "%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2" < %t.ll
      * RUN: grep "%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>" < %t.ll
@@ -1240,8 +1240,8 @@ let test_builder () =
   end;
 
   group "dbg"; begin
-    (* RUN: grep "%dbg = add i32 %P1, %P2, !dbg !1" < %t.ll
-     * RUN: grep "!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}" < %t.ll
+    (* RUN: grep '%dbg = add i32 %P1, %P2, !dbg !1' < %t.ll
+     * RUN: grep '!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}' < %t.ll
      *)
     insist ((current_debug_location atentry) = None);
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 991cc9d..b9b2237 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -18,6 +18,7 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests"
           llvm-dis llvm-extract llvm-dwarfdump
           llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
           macho-dump opt
+          profile_rt-shared
           FileCheck count not
           yaml2obj
   )
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index f80b44f..1769ee5 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -300,3 +300,34 @@ L.entry:
 
 declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
 
+define void @test_floor(<4 x float>* %X) nounwind {
+
+; CHECK: test_floor:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{v?mov(.32)?}}  r0,
+; CHECK:      bl  {{.*}}floorf
+
+; CHECK:      {{v?mov(.32)?}}  r0,
+; CHECK:      bl  {{.*}}floorf
+
+; CHECK:      {{v?mov(.32)?}}  r0,
+; CHECK:      bl  {{.*}}floorf
+
+; CHECK:      {{v?mov(.32)?}}  r0,
+; CHECK:      bl  {{.*}}floorf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
new file mode 100644
index 0000000..ec7f72d
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -mcpu=cortex-a8 -march=thumb
+; Test that this doesn't crash.
+; <rdar://problem/12183003>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.1.0"
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+
+define void @findEdges(i8*) nounwind ssp {
+  %2 = icmp sgt i32 undef, 0
+  br i1 %2, label %5, label %3
+
+; <label>:3                                       ; preds = %5, %1
+  %4 = phi i8* [ %0, %1 ], [ %19, %5 ]
+  ret void
+
+; <label>:5                                       ; preds = %5, %1
+  %6 = phi i8* [ %19, %5 ], [ %0, %1 ]
+  %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* null, i32 1)
+  %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
+  %9 = getelementptr inbounds i8* null, i32 3
+  %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %9, i32 1)
+  %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %10, 2
+  %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %6, i32 1)
+  %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 0
+  %14 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 1
+  %15 = getelementptr inbounds i8* %6, i32 3
+  %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %15, i32 1)
+  %17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 1
+  %18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 2
+  %19 = getelementptr inbounds i8* %6, i32 48
+  %20 = bitcast <16 x i8> %13 to <2 x i64>
+  %21 = bitcast <16 x i8> %8 to <2 x i64>
+  %22 = bitcast <16 x i8> %14 to <2 x i64>
+  %23 = shufflevector <2 x i64> %22, <2 x i64> undef, <1 x i32> zeroinitializer
+  %24 = bitcast <1 x i64> %23 to <8 x i8>
+  %25 = zext <8 x i8> %24 to <8 x i16>
+  %26 = sub <8 x i16> zeroinitializer, %25
+  %27 = bitcast <16 x i8> %17 to <2 x i64>
+  %28 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %26) nounwind
+  %29 = mul <8 x i16> %28, %28
+  %30 = add <8 x i16> zeroinitializer, %29
+  %31 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> undef, <8 x i16> %30) nounwind
+  %32 = bitcast <16 x i8> %11 to <2 x i64>
+  %33 = shufflevector <2 x i64> %32, <2 x i64> undef, <1 x i32> zeroinitializer
+  %34 = bitcast <1 x i64> %33 to <8 x i8>
+  %35 = zext <8 x i8> %34 to <8 x i16>
+  %36 = sub <8 x i16> %35, zeroinitializer
+  %37 = bitcast <16 x i8> %18 to <2 x i64>
+  %38 = shufflevector <2 x i64> %37, <2 x i64> undef, <1 x i32> zeroinitializer
+  %39 = bitcast <1 x i64> %38 to <8 x i8>
+  %40 = zext <8 x i8> %39 to <8 x i16>
+  %41 = sub <8 x i16> zeroinitializer, %40
+  %42 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %36) nounwind
+  %43 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %41) nounwind
+  %44 = mul <8 x i16> %42, %42
+  %45 = mul <8 x i16> %43, %43
+  %46 = add <8 x i16> %45, %44
+  %47 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %31, <8 x i16> %46) nounwind
+  %48 = bitcast <8 x i16> %47 to <2 x i64>
+  %49 = shufflevector <2 x i64> %48, <2 x i64> undef, <1 x i32> zeroinitializer
+  %50 = bitcast <1 x i64> %49 to <4 x i16>
+  %51 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %50, <4 x i16> undef) nounwind
+  %52 = tail call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %51, <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>)
+  %53 = bitcast <4 x i16> %52 to <1 x i64>
+  %54 = shufflevector <1 x i64> %53, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %55 = bitcast <2 x i64> %54 to <8 x i16>
+  %56 = tail call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %55, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+  %57 = shufflevector <2 x i64> %20, <2 x i64> undef, <1 x i32> <i32 1>
+  %58 = bitcast <1 x i64> %57 to <8 x i8>
+  %59 = zext <8 x i8> %58 to <8 x i16>
+  %60 = sub <8 x i16> zeroinitializer, %59
+  %61 = shufflevector <2 x i64> %21, <2 x i64> undef, <1 x i32> <i32 1>
+  %62 = bitcast <1 x i64> %61 to <8 x i8>
+  %63 = zext <8 x i8> %62 to <8 x i16>
+  %64 = sub <8 x i16> %63, zeroinitializer
+  %65 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %60) nounwind
+  %66 = mul <8 x i16> %65, %65
+  %67 = add <8 x i16> zeroinitializer, %66
+  %68 = shufflevector <2 x i64> %27, <2 x i64> undef, <1 x i32> <i32 1>
+  %69 = bitcast <1 x i64> %68 to <8 x i8>
+  %70 = zext <8 x i8> %69 to <8 x i16>
+  %71 = sub <8 x i16> zeroinitializer, %70
+  %72 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> undef) nounwind
+  %73 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %71) nounwind
+  %74 = mul <8 x i16> %72, %72
+  %75 = mul <8 x i16> %73, %73
+  %76 = add <8 x i16> %75, %74
+  %77 = shufflevector <2 x i64> %32, <2 x i64> undef, <1 x i32> <i32 1>
+  %78 = bitcast <1 x i64> %77 to <8 x i8>
+  %79 = zext <8 x i8> %78 to <8 x i16>
+  %80 = sub <8 x i16> %79, zeroinitializer
+  %81 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %80) nounwind
+  %82 = mul <8 x i16> %81, %81
+  %83 = add <8 x i16> zeroinitializer, %82
+  %84 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %76, <8 x i16> %83) nounwind
+  %85 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %67, <8 x i16> %84) nounwind
+  %86 = bitcast <8 x i16> %85 to <2 x i64>
+  %87 = shufflevector <2 x i64> %86, <2 x i64> undef, <1 x i32> <i32 1>
+  %88 = bitcast <1 x i64> %87 to <4 x i16>
+  %89 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %88, <4 x i16> undef) nounwind
+  %90 = tail call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %89, <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>)
+  %91 = bitcast <4 x i16> %90 to <1 x i64>
+  %92 = shufflevector <1 x i64> undef, <1 x i64> %91, <2 x i32> <i32 0, i32 1>
+  %93 = bitcast <2 x i64> %92 to <8 x i16>
+  %94 = tail call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %93, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+  %95 = bitcast <8 x i8> %56 to <1 x i64>
+  %96 = bitcast <8 x i8> %94 to <1 x i64>
+  %97 = shufflevector <1 x i64> %95, <1 x i64> %96, <2 x i32> <i32 0, i32 1>
+  %98 = bitcast <2 x i64> %97 to <16 x i8>
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* null, <16 x i8> %98, i32 1)
+  %99 = icmp slt i32 undef, undef
+  br i1 %99, label %5, label %3
+}
+
+declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/ARM/2012-08-30-select.ll b/test/CodeGen/ARM/2012-08-30-select.ll
new file mode 100644
index 0000000..8471be5
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-30-select.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://12201387
+
+;CHECK: select_s_v_v
+;CHECK: it  ne
+;CHECK-NEXT: vmovne.i32
+;CHECK: bx
+define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+entry:
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %and = and i32 %avail, 1
+  %tobool = icmp eq i32 %and, 0
+  %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
+  ret <16 x i8> %vld1.
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 8967730..6e6b363 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -159,3 +159,13 @@ entry:
   store i8 %3, i8* %old
   ret void
 }
+
+; CHECK: func4
+; This function should not need to use callee-saved registers.
+; rdar://problem/12203728
+; CHECK-NOT: r4
+define i32 @func4(i32* %p) nounwind optsize ssp {
+entry:
+  %0 = atomicrmw add i32* %p, i32 1 monotonic
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/crash-shufflevector.ll b/test/CodeGen/ARM/crash-shufflevector.ll
index ece4234..bdc0e0e 100644
--- a/test/CodeGen/ARM/crash-shufflevector.ll
+++ b/test/CodeGen/ARM/crash-shufflevector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7--
+; RUN: llc < %s -mtriple=armv7
 
 declare void @g(<16 x i8>)
 define void @f(<4 x i8> %param1, <4 x i8> %param2) {
@@ -7,4 +7,4 @@ define void @f(<4 x i8> %param1, <4 x i8> %param2) {
    %z = shufflevector <16 x i8> %y1, <16 x i8> %y2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
    call void @g(<16 x i8> %z)
    ret void
-}
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
new file mode 100644
index 0000000..e19185b
--- /dev/null
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
+
+define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane0to0:
+  %vec = insertelement <2 x float> %in, float %arg, i32 0
+  %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d0, #1
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+  ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane0to1:
+  %vec = insertelement <2 x float> %in, float %arg, i32 1
+  %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vext.32 d1, d1, d0, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+  ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane1to0:
+  %vec = insertelement <2 x float> %in, float %arg, i32 0
+  %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vext.32 d1, d0, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+  ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane1to1:
+  %vec = insertelement <2 x float> %in, float %arg, i32 1
+  %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d0, d1, #1
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+  ret <2 x float> %res
+}
+
+
+define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
+; CHECK: test_vmovs_via_vdup:
+
+  ; Do an operation (which will end up NEON because of +neonfp) to convince the
+  ; execution-domain pass that NEON is a good thing to use.
+  %res = fadd float %ret, %ret
+  ;  It makes sense for LLVM to do the addition in d0 here, because it's going
+  ;  to be returned. This means it will want a "vmov s0, s1":
+; CHECK: vdup.32 d0, d0[1]
+
+  ret float %res
+}
+
+declare float @llvm.sqrt.f32(float)
+
+declare void @bar()
+
+; This is a comp
+define float @test_ineligible(float, float %in) {
+; CHECK: test_ineligible:
+
+  %sqrt = call float @llvm.sqrt.f32(float %in)
+  %val = fadd float %sqrt, %sqrt
+
+  ; This call forces a move from a callee-saved register to the return-reg. That
+  ; move is not eligible for conversion to a d-register instructions because the
+  ; use-def chains would be messed up. Primarily a compile-test (we used to
+  ; internal fault).
+  call void @bar()
+; CHECL: bl bar
+; CHECK: vmov.f32 {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
new file mode 100644
index 0000000..392a845
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+
+@g = global i32 0, align 4
+
+define i32 @LoadGV() {
+entry:
+; THUMB: LoadGV
+; THUMB: movw [[reg0:r[0-9]+]],
+; THUMB: movt [[reg0]],
+; THUMB: add  [[reg0]], pc
+; ARM: LoadGV
+; ARM: ldr [[reg1:r[0-9]+]],
+; ARM: add [[reg1]], pc, [[reg1]]
+; ARMv7: LoadGV
+; ARMv7: movw [[reg2:r[0-9]+]],
+; ARMv7: movt [[reg2]],
+; ARMv7: add  [[reg2]], pc, [[reg2]]
+  %tmp = load i32* @g
+  ret i32 %tmp
+}
+
+@i = external global i32
+
+define i32 @LoadIndirectSymbol() {
+entry:
+; THUMB: LoadIndirectSymbol
+; THUMB: movw r[[reg3:[0-9]+]],
+; THUMB: movt r[[reg3]],
+; THUMB: add  r[[reg3]], pc
+; THUMB: ldr  r[[reg3]], [r[[reg3]]]
+; ARM: LoadIndirectSymbol
+; ARM: ldr [[reg4:r[0-9]+]],
+; ARM: ldr [[reg4]], [pc, [[reg4]]]
+; ARMv7: LoadIndirectSymbol
+; ARMv7: movw r[[reg5:[0-9]+]],
+; ARMv7: movt r[[reg5]],
+; ARMv7: add  r[[reg5]], pc, r[[reg5]]
+; ARMv7: ldr  r[[reg5]], [r[[reg5]]]
+  %tmp = load i32* @i
+  ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/fp-fast.ll b/test/CodeGen/ARM/fp-fast.ll
new file mode 100644
index 0000000..ec57187
--- /dev/null
+++ b/test/CodeGen/ARM/fp-fast.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s
+
+; CHECK: test1
+define float @test1(float %x) {
+; CHECK-NOT: vfma
+; CHECK: vmul.f32
+; CHECK-NOT: vfma
+  %t1 = fmul float %x, 3.0
+  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1)
+  ret float %t2
+}
+
+; CHECK: test2
+define float @test2(float %x, float %y) {
+; CHECK-NOT: vmul
+; CHECK: vfma.f32
+; CHECK-NOT: vmul
+  %t1 = fmul float %x, 3.0
+  %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y)
+  ret float %t2
+}
+
+; CHECK: test3
+define float @test3(float %x, float %y) {
+; CHECK-NOT: vfma
+; CHECK: vadd.f32
+; CHECK-NOT: vfma
+  %t2 = call float @llvm.fma.f32(float %x, float 1.0, float %y)
+  ret float %t2
+}
+
+; CHECK: test4
+define float @test4(float %x, float %y) {
+; CHECK-NOT: vfma
+; CHECK: vsub.f32
+; CHECK-NOT: vfma
+  %t2 = call float @llvm.fma.f32(float %x, float -1.0, float %y)
+  ret float %t2
+}
+
+; CHECK: test5
+define float @test5(float %x) {
+; CHECK-NOT: vfma
+; CHECK: vmul.f32
+; CHECK-NOT: vfma
+  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x)
+  ret float %t2
+}
+
+; CHECK: test6
+define float @test6(float %x) {
+; CHECK-NOT: vfma
+; CHECK: vmul.f32
+; CHECK-NOT: vfma
+  %t1 = fsub float -0.0, %x
+  %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1)
+  ret float %t2
+}
+
+declare float @llvm.fma.f32(float, float, float)
diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll
new file mode 100644
index 0000000..4f2d7e3
--- /dev/null
+++ b/test/CodeGen/ARM/integer_insertelement.ll
@@ -0,0 +1,35 @@
+; RUN: llc %s -o - -march=arm -mattr=+neon | FileCheck %s
+
+; This test checks that when inserting one (integer) element into a vector,
+; the vector is not spuriously copied. "vorr dX, dY, dY" is the way of moving
+; one DPR to another that we check for.
+
+; CHECK: @f
+; CHECK-NOT: vorr d
+; CHECK: vmov s
+; CHECK-NOT: vorr d
+; CHECK: mov pc, lr
+define <4 x i32> @f(<4 x i32> %in) {
+  %1 = insertelement <4 x i32> %in, i32 255, i32 3
+  ret <4 x i32> %1
+}
+
+; CHECK: @g
+; CHECK-NOT: vorr d
+; CHECK: vmov.16 d
+; CHECK-NOT: vorr d
+; CHECK: mov pc, lr
+define <8 x i16> @g(<8 x i16> %in) {
+  %1 = insertelement <8 x i16> %in, i16 255, i32 7
+  ret <8 x i16> %1
+}
+
+; CHECK: @h
+; CHECK-NOT: vorr d
+; CHECK: vmov.8 d
+; CHECK-NOT: vorr d
+; CHECK: mov pc, lr
+define <16 x i8> @h(<16 x i8> %in) {
+  %1 = insertelement <16 x i8> %in, i8 255, i32 15
+  ret <16 x i8> %1
+}
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
new file mode 100644
index 0000000..e4a00e9
--- /dev/null
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; Check generated signed and unsigned multiply accumulate long.
+
+define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
+;CHECK: MACLongTest1:
+;CHECK: umlal
+  %conv = zext i32 %a to i64
+  %conv1 = zext i32 %b to i64
+  %mul = mul i64 %conv1, %conv
+  %add = add i64 %mul, %c
+  ret i64 %add
+}
+
+define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
+;CHECK: MACLongTest2:
+;CHECK: smlal
+  %conv = sext i32 %a to i64
+  %conv1 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
+;CHECK: MACLongTest3:
+;CHECK: umlal
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %a to i64
+  %mul = mul i64 %conv, %conv1
+  %conv2 = zext i32 %c to i64
+  %add = add i64 %mul, %conv2
+  ret i64 %add
+}
+
+define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
+;CHECK: MACLongTest4:
+;CHECK: smlal
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %a to i64
+  %mul = mul nsw i64 %conv, %conv1
+  %conv2 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv2
+  ret i64 %add
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 5575566..62708ed 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -80,7 +80,7 @@ define double @f7(double %a, double %b) {
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      eq
-; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]]
+; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index cfc0e70..7507808 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -9,7 +9,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 
 ; T2: t1:
 ; T2: mvn r0, #-2147483648
-; T2: addle.w r1, r1
+; T2: addle r1, r0
 ; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
@@ -23,7 +23,7 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; ARM: mov r0, r1
 
 ; T2: t2:
-; T2: suble.w r1, r1, #10
+; T2: suble r1, #10
 ; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 10
@@ -37,7 +37,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 ; ARM: mov r0, r3
 
 ; T2: t3:
-; T2: andge.w r3, r3, r2
+; T2: andge r3, r2
 ; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
   %z = select i1 %cond, i32 -1, i32 %x
@@ -51,7 +51,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 ; ARM: mov r0, r3
 
 ; T2: t4:
-; T2: orrge.w r3, r3, r2
+; T2: orrge r3, r2
 ; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
   %z = select i1 %cond, i32 0, i32 %x
@@ -81,7 +81,7 @@ define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 
 ; T2: t6:
 ; T2-NOT: movge
-; T2: eorlt.w r3, r3, r2
+; T2: eorlt r3, r2
   %cond = icmp slt i32 %a, %b
   %tmp1 = select i1 %cond, i32 %c, i32 0
   %tmp2 = xor i32 %tmp1, %d
@@ -200,7 +200,7 @@ entry:
 
 ; T2: t13
 ; T2: cmp r1, #10
-; T2: addgt.w r0, r0, #1
+; T2: addgt r0, #1
   %cmp = icmp sgt i32 %a, 10
   %conv = zext i1 %cmp to i32
   %add = add i32 %conv, %c
@@ -216,7 +216,7 @@ entry:
 
 ; T2: t14
 ; T2: cmp r1, #10
-; T2: subgt.w r0, r0, #1
+; T2: subgt r0, #1
   %cmp = icmp sgt i32 %a, 10
   %conv = sext i1 %cmp to i32
   %add = add i32 %conv, %c
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
index 6fcbdee..2961b94 100644
--- a/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -63,3 +63,24 @@ if.then:
 if.else:
   ret i32 %sub
 }
+
+; If the sub/rsb instruction is predicated, we can't use the flags.
+; <rdar://problem/12263428>
+; Test case from MultiSource/Benchmarks/Ptrdist/bc/number.s
+; CHECK: bc_raise
+; CHECK: rsbeq
+; CHECK: cmp
+define i32 @bc_raise() nounwind ssp {
+entry:
+  %val.2.i = select i1 undef, i32 0, i32 undef
+  %sub.i = sub nsw i32 0, %val.2.i
+  %retval.0.i = select i1 undef, i32 %val.2.i, i32 %sub.i
+  %cmp1 = icmp eq i32 %retval.0.i, 0
+  br i1 %cmp1, label %land.lhs.true, label %if.end11
+
+land.lhs.true:                                    ; preds = %num2long.exit
+  ret i32 17
+
+if.end11:                                         ; preds = %num2long.exit
+  ret i32 23
+}
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 05332e4..a8c224b 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
   store <8 x i8> %2, <8 x i8>* %ptr, align 8
   ret void
 }
+
+define <4 x i32> @tdupi(i32 %x, i32 %y) {
+;CHECK: tdupi
+;CHECK: vdup.32
+  %1 = insertelement <4 x i32> undef, i32 %x, i32 0
+  %2 = insertelement <4 x i32> %1, i32 %x, i32 1
+  %3 = insertelement <4 x i32> %2, i32 %x, i32 2
+  %4 = insertelement <4 x i32> %3, i32 %y, i32 3
+  ret <4 x i32> %4
+}
+
+define <4 x float> @tdupf(float %x, float %y) {
+;CHECK: tdupf
+;CHECK: vdup.32
+  %1 = insertelement <4 x float> undef, float %x, i32 0
+  %2 = insertelement <4 x float> %1, float %x, i32 1
+  %3 = insertelement <4 x float> %2, float %x, i32 2
+  %4 = insertelement <4 x float> %3, float %y, i32 3
+  ret <4 x float> %4
+}
+
+; This test checks that when splatting an element from a vector into another,
+; the value isn't moved out to GPRs first.
+define <4 x i32> @tduplane(<4 x i32> %invec) {
+;CHECK: tduplane
+;CHECK-NOT: vmov {{.*}}, d16[1]
+;CHECK: vdup.32 {{.*}}, d16[1]
+  %in = extractelement <4 x i32> %invec, i32 1
+  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
+  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
+  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
+  %4 = insertelement <4 x i32> %3, i32 255, i32 3
+  ret <4 x i32> %4
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index 8fd3db2..22af797 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -62,3 +62,14 @@ define <4 x i8> @i(<4 x i8>* %x) {
   %2 = sdiv <4 x i8> zeroinitializer, %1
   ret <4 x i8> %2
 }
+; CHECK: j:
+define <4 x i32> @j(<4 x i8>* %in) nounwind {
+  ; CHECK: vld1
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i8>* %in, align 4
+  %2 = zext <4 x i8> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index 1fc885d..2ed65c9 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
 
 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 ;CHECK: vsetQ_lane32:
-;CHECK: vmov.32
+;CHECK: vmov s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
 	ret <4 x i32> %tmp2
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
new file mode 100644
index 0000000..802ee2c
--- /dev/null
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -print-machineinstrs=expand-isel-pseudos -o /dev/null 2>&1 | FileCheck %s
+
+; Make sure we have the correct weight attached to each successor.
+define i32 @test2(i32 %x) nounwind uwtable readnone ssp {
+; CHECK: Machine code for function test2:
+entry:
+  %conv = sext i32 %x to i64
+  switch i64 %conv, label %return [
+    i64 0, label %sw.bb
+    i64 1, label %sw.bb
+    i64 4, label %sw.bb
+    i64 5, label %sw.bb1
+  ], !prof !0
+; CHECK: BB#0: derived from LLVM BB %entry
+; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
+; CHECK: BB#4: derived from LLVM BB %entry
+; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
+; CHECK: BB#5: derived from LLVM BB %entry
+; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 5, %sw.bb1 ], [ 1, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index e9ac8b6..8a6efb6 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = #7
 ; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: r5 = #6
 ; CHECK: r0 = #1
 ; CHECK: r1 = #2
 ; CHECK: r2 = #3
 ; CHECK: r3 = #4
 ; CHECK: r4 = #5
-; CHECK: r5 = #6
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll
index ab69b22..186e393 100644
--- a/test/CodeGen/Hexagon/newvaluestore.ll
+++ b/test/CodeGen/Hexagon/newvaluestore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4  < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
 ; Check that we generate new value store packet in V4
 
 @i = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/remove_lsr.ll b/test/CodeGen/Hexagon/remove_lsr.ll
new file mode 100644
index 0000000..79b5f4a
--- /dev/null
+++ b/test/CodeGen/Hexagon/remove_lsr.ll
@@ -0,0 +1,80 @@
+; Test fix for PR-13709.
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: foo
+; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)
+; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)
+
+; Convert the sequence
+; r17:16 = lsr(r11:10, #32)
+; .. = r16
+; into
+; r17:16 = lsr(r11:10, #32)
+; .. = r11
+; This makes the lsr instruction dead and it gets removed subsequently
+; by a dead code removal pass.
+
+%union.vect64 = type { i64 }
+%union.vect32 = type { i32 }
+
+define void @foo(%union.vect64* nocapture %sss_extracted_bit_rx_data_ptr,
+ %union.vect32* nocapture %s_even, %union.vect32* nocapture %s_odd,
+ i8* nocapture %scr_s_even_code_ptr, i8* nocapture %scr_s_odd_code_ptr)
+ nounwind {
+entry:
+  %scevgep = getelementptr %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1
+  %scevgep28 = getelementptr %union.vect32* %s_odd, i32 1
+  %scevgep32 = getelementptr %union.vect32* %s_even, i32 1
+  %scevgep36 = getelementptr i8* %scr_s_odd_code_ptr, i32 1
+  %scevgep39 = getelementptr i8* %scr_s_even_code_ptr, i32 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv42 = phi i32 [ %lsr.iv.next, %for.body ], [ 2, %entry ]
+  %lsr.iv40 = phi i8* [ %scevgep41, %for.body ], [ %scevgep39, %entry ]
+  %lsr.iv37 = phi i8* [ %scevgep38, %for.body ], [ %scevgep36, %entry ]
+  %lsr.iv33 = phi %union.vect32* [ %scevgep34, %for.body ], [ %scevgep32, %entry ]
+  %lsr.iv29 = phi %union.vect32* [ %scevgep30, %for.body ], [ %scevgep28, %entry ]
+  %lsr.iv = phi %union.vect64* [ %scevgep26, %for.body ], [ %scevgep, %entry ]
+  %predicate_1.023 = phi i8 [ undef, %entry ], [ %10, %for.body ]
+  %predicate.022 = phi i8 [ undef, %entry ], [ %9, %for.body ]
+  %val.021 = phi i64 [ undef, %entry ], [ %srcval, %for.body ]
+  %lsr.iv3335 = bitcast %union.vect32* %lsr.iv33 to i32*
+  %lsr.iv2931 = bitcast %union.vect32* %lsr.iv29 to i32*
+  %lsr.iv27 = bitcast %union.vect64* %lsr.iv to i64*
+  %0 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %val.021)
+  %conv3 = sext i8 %predicate.022 to i32
+  %1 = trunc i64 %val.021 to i32
+  %2 = trunc i64 %0 to i32
+  %3 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv3, i32 %1, i32 %2)
+  store i32 %3, i32* %lsr.iv3335, align 4, !tbaa !0
+  %conv8 = sext i8 %predicate_1.023 to i32
+  %4 = lshr i64 %val.021, 32
+  %5 = trunc i64 %4 to i32
+  %6 = lshr i64 %0, 32
+  %7 = trunc i64 %6 to i32
+  %8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
+  store i32 %8, i32* %lsr.iv2931, align 4, !tbaa !0
+  %srcval = load i64* %lsr.iv27, align 8
+  %9 = load i8* %lsr.iv40, align 1, !tbaa !1
+  %10 = load i8* %lsr.iv37, align 1, !tbaa !1
+  %lftr.wideiv = trunc i32 %lsr.iv42 to i8
+  %exitcond = icmp eq i8 %lftr.wideiv, 32
+  %scevgep26 = getelementptr %union.vect64* %lsr.iv, i32 1
+  %scevgep30 = getelementptr %union.vect32* %lsr.iv29, i32 1
+  %scevgep34 = getelementptr %union.vect32* %lsr.iv33, i32 1
+  %scevgep38 = getelementptr i8* %lsr.iv37, i32 1
+  %scevgep41 = getelementptr i8* %lsr.iv40, i32 1
+  %lsr.iv.next = add i32 %lsr.iv42, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) nounwind readnone
+
+declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) nounwind readnone
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index 2e4ab63..683a4c2 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index b266ce6..1b2fbc8 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -1,12 +1,18 @@
-; RUN: llc -march=mipsel  -enable-mips-delay-filler < %s | FileCheck %s
+; RUN: llc -march=mipsel -O0 < %s | FileCheck %s -check-prefix=None
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=Default
 
 define void @foo1() nounwind {
 entry:
-; CHECK:      jalr 
-; CHECK-NOT:  nop 
-; CHECK:      jr 
-; CHECK-NOT:  nop
-; CHECK:      .end
+; Default:     jalr 
+; Default-NOT: nop 
+; Default:     jr 
+; Default-NOT: nop
+; Default:     .end
+; None: jalr 
+; None: nop 
+; None: jr 
+; None: nop
+; None: .end
 
   tail call void @foo2(i32 3) nounwind
   ret void
diff --git a/test/CodeGen/Mips/init-array.ll b/test/CodeGen/Mips/init-array.ll
new file mode 100644
index 0000000..f96ce26
--- /dev/null
+++ b/test/CodeGen/Mips/init-array.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple mipsel-unknown-linux -use-init-array < %s | FileCheck  %s
+
+target triple = "mipsel-unknown-linux"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @test }]
+; CHECK: .section
+; CHECK: .init_array
+; CHECK-NOT: .ctors
+; CHECK: .4byte test
+
+define internal void @test() section ".text.startup" {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index 0227b88..873b9f1 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -6,9 +6,15 @@
 define void @foo1(i32 %s) nounwind {
 entry:
 ; O32: bal
+; O32: lui $at, 0
+; O32: addiu $at, $at, {{[0-9]+}} 
+; N64: lui $at, 0
+; N64: daddiu $at, $at, 0
+; N64: dsll $at, $at, 16
+; N64: daddiu $at, $at, 0
 ; N64: bal
-; N64: highest
-; N64: higher
+; N64: dsll $at, $at, 16
+; N64: daddiu $at, $at, {{[0-9]+}}  
 
   %tobool = icmp eq i32 %s, 0
   br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/Mips/small-section-reserve-gp.ll b/test/CodeGen/Mips/small-section-reserve-gp.ll
new file mode 100644
index 0000000..03503fb
--- /dev/null
+++ b/test/CodeGen/Mips/small-section-reserve-gp.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=mipsel-sde-elf -march=mipsel -relocation-model=static < %s \
+; RUN: | FileCheck %s
+
+@i = internal unnamed_addr global i32 0, align 4
+
+define i32 @geti() nounwind readonly {
+entry:
+; CHECK: lw ${{[0-9]+}}, %gp_rel(i)($gp)
+  %0 = load i32* @i, align 4
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index d681091..ce98cc8 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s
 
 @foo = thread_local global i32 42
 @bar = hidden alias i32* @foo
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index a7ddb96..72d30dc 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
-; RUN: llc -march=mipsel -relocation-model=static < %s \
-; RUN:                             | FileCheck %s -check-prefix=STATIC
-; RUN: llc -march=mipsel -relocation-model=static < %s \
-; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
+; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \
+; RUN:     FileCheck %s -check-prefix=PIC
+; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler < \
+; RUN:     %s | FileCheck %s -check-prefix=STATIC
+; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler \
+; RUN:     -mips-fix-global-base-reg=false < %s  | \
+; RUN:     FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
diff --git a/test/CodeGen/Mips/uitofp.ll b/test/CodeGen/Mips/uitofp.ll
new file mode 100644
index 0000000..aff70c2
--- /dev/null
+++ b/test/CodeGen/Mips/uitofp.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips -mattr=+single-float < %s
+
+define void @f0() nounwind {
+entry:
+  %b = alloca i32, align 4
+  %a = alloca float, align 4
+  store volatile i32 1, i32* %b, align 4
+  %0 = load volatile i32* %b, align 4
+  %conv = uitofp i32 %0 to float
+  store float %conv, float* %a, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index 0003a17..b95ac68 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -9,9 +9,8 @@ target triple = "powerpc-apple-darwin11.0"
 
 define void @foo() nounwind ssp {
 entry:
-; Better: mtctr r12
-; CHECK: mr r12, [[REG:r[0-9]+]]
-; CHECK: mtctr [[REG]]
+; CHECK: mtctr r12
+; CHECK: bctrl
   %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
   call void (...)* %0() nounwind
   br label %return
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 9a456b6..638059a 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,10 +2,10 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li {{[53]}}, 0
+; CHECK: li 3, 0
 ; CHECK: li 4, 2
+; CHECK: li 5, 0
 ; CHECK: li 6, 3
-; CHECK: mr {{[53]}}, {{[53]}}
 
 define void @foo() {
   call void @bar(i64 2, i64 3)
diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
new file mode 100644
index 0000000..afa1ea8
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+
+@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
+
+define void @test(i32 %count) nounwind {
+entry:
+; CHECK: crxor 6, 6, 6
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %cmp2 = icmp sgt i32 %count, 0
+  br i1 %cmp2, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+; CHECK: crxor 6, 6, 6
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %inc = add nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/PowerPC/fsl-e500mc.ll b/test/CodeGen/PowerPC/fsl-e500mc.ll
new file mode 100644
index 0000000..09b7e41
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsl-e500mc.ll
@@ -0,0 +1,22 @@
+;
+; Test support for Freescale e500mc and its higher memcpy inlining thresholds.
+;
+; RUN: llc -mcpu=e500mc < %s 2>&1 | FileCheck %s
+; CHECK-NOT: not a recognized processor for this target
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-fsl-linux"
+
+%struct.teststruct = type { [12 x i32], i32 }
+
+define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
+entry:
+; CHECK: @copy
+; CHECK-NOT: bl memcpy
+  %0 = bitcast %struct.teststruct* %agg.result to i8*
+  %1 = bitcast %struct.teststruct* %in to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 52, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/PowerPC/fsl-e5500.ll b/test/CodeGen/PowerPC/fsl-e5500.ll
new file mode 100644
index 0000000..d47d8c8
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsl-e5500.ll
@@ -0,0 +1,22 @@
+;
+; Test support for Freescale e5500 and its higher memcpy inlining thresholds.
+;
+; RUN: llc -mcpu=e5500 < %s 2>&1 | FileCheck %s
+; CHECK-NOT: not a recognized processor for this target
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-fsl-linux"
+
+%struct.teststruct = type { [24 x i32], i32 }
+
+define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
+entry:
+; CHECK: @copy
+; CHECK-NOT: bl memcpy
+  %0 = bitcast %struct.teststruct* %agg.result to i8*
+  %1 = bitcast %struct.teststruct* %in to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 100, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
index e1ff82d..59c3388 100644
--- a/test/CodeGen/PowerPC/inlineasm-copy.ll
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=ppc32 | not grep mr
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs | FileCheck %s
 
+; CHECK-NOT: mr
 define i32 @test(i32 %Y, i32 %X) {
 entry:
         %tmp = tail call i32 asm "foo $0", "=r"( )              ; <i32> [#uses=1]
@@ -12,3 +13,9 @@ entry:
         ret i32 %tmp1
 }
 
+; CHECK: test3
+define i32 @test3(i32 %Y, i32 %X) {
+entry:
+        %tmp1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "foo $0, $1", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"( i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y )                ; <i32> [#uses=1]
+       ret i32 1
+}
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
new file mode 100644
index 0000000..f1326ba
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@double_array = global [32 x double] zeroinitializer, align 8
+@number64 = global i64 10, align 8
+@internal_static_var.x = internal unnamed_addr global i64 0, align 8
+
+define i64 @access_int64(i64 %a) nounwind readonly {
+entry:
+; CHECK: access_int64:
+; CHECK-NEXT: .align  3
+; CHECK-NEXT: .quad   .L.access_int64
+; CHECK-NEXT: .quad   .TOC.@tocbase
+; CHECK-NEXT: .text
+  %0 = load i64* @number64, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = icmp eq i64 %0, %a
+  %conv1 = zext i1 %cmp to i64 
+  ret i64 %conv1
+}
+
+define i64 @internal_static_var(i64 %a) nounwind {
+entry:
+; CHECK: internal_static_var:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %0 = load i64* @internal_static_var.x, align 8
+  %cmp = icmp eq i64 %0, %a
+  %conv1 = zext i1 %cmp to i64 
+  ret i64 %conv1 
+}
+
+define i32 @access_double(double %a) nounwind readnone {
+entry:
+; CHECK: access_double:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = fcmp oeq double %a, 2.000000e+00
+  %conv = zext i1 %cmp to i32 
+  ret i32 %conv
+}
+
+
+define i32 @access_double_array(double %a, i32 %i) nounwind readonly {
+entry:
+; CHECK: access_double_array:
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = fcmp oeq double %0, %a
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; Check the creation of 4 .tc entries:
+; * int64_t global 'number64'
+; * double constant 2.0
+; * double array 'double_array'
+; * static int64_t 'x' accessed within '@internal_static_var'
+; CHECK: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
diff --git a/test/CodeGen/PowerPC/ppc64-zext.ll b/test/CodeGen/PowerPC/ppc64-zext.ll
new file mode 100644
index 0000000..eb55445
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-zext.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux"
+
+define i64 @fun(i32 %arg32) nounwind {
+entry:
+; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+  %o = zext i32 %arg32 to i64
+  ret i64 %o
+}
+
diff --git a/test/CodeGen/PowerPC/pr13641.ll b/test/CodeGen/PowerPC/pr13641.ll
new file mode 100644
index 0000000..c4d3f3a
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr13641.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo() nounwind {
+  ret void
+}
+
+; CHECK: blr
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .quad 0
diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll
new file mode 100644
index 0000000..520921f
--- /dev/null
+++ b/test/CodeGen/PowerPC/remat-imm.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'test.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+
+@.str = private unnamed_addr constant [6 x i8] c"%d,%d\00", align 1
+
+define i32 @main() nounwind {
+entry:
+; CHECK: li 4, 128
+; CHECK-NOT: mr 4, {{.*}}
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb/thumb_jump24_fixup.ll b/test/CodeGen/Thumb/thumb_jump24_fixup.ll
new file mode 100644
index 0000000..e6a6b25
--- /dev/null
+++ b/test/CodeGen/Thumb/thumb_jump24_fixup.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple thumbv7-none-linux-gnueabi -mcpu=cortex-a8 -march=thumb -mattr=thumb2 -filetype=obj -o - < %s | llvm-objdump -r - | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64"
+target triple = "thumbv7-none-linux-gnueabi"
+
+define i32 @test_fixup_t2_uncondbranch() {
+b0:
+  invoke void @__cxa_throw(i8* null, i8* null, i8* null) noreturn
+    to label %unreachable unwind label %lpad
+
+; CHECK: {{[0-9]+}} R_ARM_THM_JUMP24 __cxa_throw
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ret i32 0
+
+unreachable:
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
diff --git a/test/CodeGen/Thumb2/longMACt.ll b/test/CodeGen/Thumb2/longMACt.ll
new file mode 100644
index 0000000..beefd60
--- /dev/null
+++ b/test/CodeGen/Thumb2/longMACt.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; Check generated signed and unsigned multiply accumulate long.
+
+define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
+;CHECK: MACLongTest1:
+;CHECK: umlal
+  %conv = zext i32 %a to i64
+  %conv1 = zext i32 %b to i64
+  %mul = mul i64 %conv1, %conv
+  %add = add i64 %mul, %c
+  ret i64 %add
+}
+
+define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
+;CHECK: MACLongTest2:
+;CHECK: smlal
+  %conv = sext i32 %a to i64
+  %conv1 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
+;CHECK: MACLongTest3:
+;CHECK: umlal
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %a to i64
+  %mul = mul i64 %conv, %conv1
+  %conv2 = zext i32 %c to i64
+  %add = add i64 %mul, %conv2
+  ret i64 %add
+}
+
+define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
+;CHECK: MACLongTest4:
+;CHECK: smlal
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %a to i64
+  %mul = mul nsw i64 %conv, %conv1
+  %conv2 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv2
+  ret i64 %add
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index ead198f..ed4d26d 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -5,7 +5,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: mvn r0, #-2147483648
 ; CHECK: cmp r2, #10
 ; CHECK: it  le
-; CHECK: addle.w r1, r1, r0
+; CHECK: addle r1, r0
 ; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
@@ -30,7 +30,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t3
 ; CHECK: cmp r2, #10
 ; CHECK: it  le
-; CHECK: suble.w r1, r1, #10
+; CHECK: suble r1, #10
 ; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 10
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 9a66b67..0465952 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,7 +5,7 @@
 ; It's hard to test for the ISEL condition because CodeGen optimizes
 ; away the bugpointed code. Just ensure the basics are still there.
 ;CHECK: func:
-;CHECK: vpxor
+;CHECK: vxorps
 ;CHECK: vinsertf128
 ;CHECK: vpshufd
 ;CHECK: vpshufd
diff --git a/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll b/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll
new file mode 100644
index 0000000..6ebbb2e
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -enable-unsafe-fp-math
+; <rdar://problem/12180135>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+define i32 @foo(float %mean) nounwind readnone ssp align 2 {
+entry:
+  %cmp = fcmp olt float %mean, -3.000000e+00
+  %f.0 = select i1 %cmp, float -3.000000e+00, float %mean
+  %cmp2 = fcmp ult float %f.0, 3.000000e+00
+  %f.1 = select i1 %cmp2, float %f.0, float 0x4007EB8520000000
+  %add = fadd float %f.1, 3.000000e+00
+  %div = fdiv float %add, 2.343750e-02
+  %0 = fpext float %div to double
+  %conv = select i1 undef, double 2.550000e+02, double %0
+  %add8 = fadd double %conv, 5.000000e-01
+  %conv9 = fptosi double %add8 to i32
+  %.conv9 = select i1 undef, i32 255, i32 %conv9
+  ret i32 %.conv9
+}
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
new file mode 100644
index 0000000..0f36ce2
--- /dev/null
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -0,0 +1,362 @@
+; RUN: llc -mcpu=corei7 -no-stack-coloring=false < %s | FileCheck %s --check-prefix=YESCOLOR
+; RUN: llc -mcpu=corei7 -no-stack-coloring=true  < %s | FileCheck %s --check-prefix=NOCOLOR
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;YESCOLOR: subq  $136, %rsp
+;NOCOLOR: subq  $264, %rsp
+
+
+define i32 @myCall_w2(i32 %in) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  call void @llvm.lifetime.end(i64 -1, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+}
+
+
+;YESCOLOR: subq  $272, %rsp
+;NOCOLOR: subq  $272, %rsp
+
+define i32 @myCall2_no_merge(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  call void @llvm.lifetime.end(i64 -1, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  ret i32 %t7
+bb3:
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  ret i32 0
+}
+
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
+
+define i32 @myCall2_w2(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  call void @llvm.lifetime.end(i64 -1, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+;YESCOLOR: subq  $208, %rsp
+;NOCOLOR: subq  $400, %rsp
+
+
+
+
+define i32 @myCall_w4(i32 %in) {
+entry:
+  %a1 = alloca [14 x i8*], align 8
+  %a2 = alloca [13 x i8*], align 8
+  %a3 = alloca [12 x i8*], align 8
+  %a4 = alloca [11 x i8*], align 8
+  %b1 = bitcast [14 x i8*]* %a1 to i8*
+  %b2 = bitcast [13 x i8*]* %a2 to i8*
+  %b3 = bitcast [12 x i8*]* %a3 to i8*
+  %b4 = bitcast [11 x i8*]* %a4 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b4)
+  call void @llvm.lifetime.start(i64 -1, i8* %b1)
+  %t1 = call i32 @foo(i32 %in, i8* %b1)
+  %t2 = call i32 @foo(i32 %in, i8* %b1)
+  call void @llvm.lifetime.end(i64 -1, i8* %b1)
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t9 = call i32 @foo(i32 %in, i8* %b2)
+  %t8 = call i32 @foo(i32 %in, i8* %b2)
+  call void @llvm.lifetime.end(i64 -1, i8* %b2)
+  call void @llvm.lifetime.start(i64 -1, i8* %b3)
+  %t3 = call i32 @foo(i32 %in, i8* %b3)
+  %t4 = call i32 @foo(i32 %in, i8* %b3)
+  call void @llvm.lifetime.end(i64 -1, i8* %b3)
+  %t11 = call i32 @foo(i32 %in, i8* %b4)
+  call void @llvm.lifetime.end(i64 -1, i8* %b4)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+}
+
+;YESCOLOR: subq  $112, %rsp
+;NOCOLOR: subq  $400, %rsp
+
+define i32 @myCall2_w4(i32 %in) {
+entry:
+  %a1 = alloca [14 x i8*], align 8
+  %a2 = alloca [13 x i8*], align 8
+  %a3 = alloca [12 x i8*], align 8
+  %a4 = alloca [11 x i8*], align 8
+  %b1 = bitcast [14 x i8*]* %a1 to i8*
+  %b2 = bitcast [13 x i8*]* %a2 to i8*
+  %b3 = bitcast [12 x i8*]* %a3 to i8*
+  %b4 = bitcast [11 x i8*]* %a4 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b1)
+  %t1 = call i32 @foo(i32 %in, i8* %b1)
+  %t2 = call i32 @foo(i32 %in, i8* %b1)
+  call void @llvm.lifetime.end(i64 -1, i8* %b1)
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t9 = call i32 @foo(i32 %in, i8* %b2)
+  %t8 = call i32 @foo(i32 %in, i8* %b2)
+  call void @llvm.lifetime.end(i64 -1, i8* %b2)
+  call void @llvm.lifetime.start(i64 -1, i8* %b3)
+  %t3 = call i32 @foo(i32 %in, i8* %b3)
+  %t4 = call i32 @foo(i32 %in, i8* %b3)
+  call void @llvm.lifetime.end(i64 -1, i8* %b3)
+  br i1 undef, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b4)
+  %t11 = call i32 @foo(i32 %in, i8* %b4)
+  call void @llvm.lifetime.end(i64 -1, i8* %b4)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
+
+
+define i32 @myCall2_noend(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
+define i32 @myCall2_noend2(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
+define i32 @myCall2_nostart(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+; Adopt the test from Transforms/Inline/array_merge.ll'
+;YESCOLOR: subq  $816, %rsp
+;NOCOLOR: subq  $1616, %rsp
+define void @array_merge() nounwind ssp {
+entry:
+  %A.i1 = alloca [100 x i32], align 4
+  %B.i2 = alloca [100 x i32], align 4
+  %A.i = alloca [100 x i32], align 4
+  %B.i = alloca [100 x i32], align 4
+  %0 = bitcast [100 x i32]* %A.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
+  %1 = bitcast [100 x i32]* %B.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind
+  call void @bar([100 x i32]* %A.i, [100 x i32]* %B.i) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind
+  %2 = bitcast [100 x i32]* %A.i1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %2) nounwind
+  %3 = bitcast [100 x i32]* %B.i2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %3) nounwind
+  call void @bar([100 x i32]* %A.i1, [100 x i32]* %B.i2) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %2) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind
+  ret void
+}
+
+;YESCOLOR: subq  $272, %rsp
+;NOCOLOR: subq  $272, %rsp
+define i32 @func_phi_lifetime(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  br i1 %d, label %bb0, label %bb1
+
+bb0:
+  %I1 = bitcast [17 x i8*]* %a to i8*
+  br label %bb2
+
+bb1:
+  %I2 = bitcast [16 x i8*]* %a2 to i8*
+  br label %bb2
+
+bb2:
+  %split = phi i8* [ %I1, %bb0 ], [ %I2, %bb1 ]
+  call void @llvm.lifetime.start(i64 -1, i8* %split)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  call void @llvm.lifetime.end(i64 -1, i8* %split)
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+
+;YESCOLOR: multi_region_bb
+;NOCOLOR: multi_region_bb
+define void @multi_region_bb() nounwind ssp {
+entry:
+  %A.i1 = alloca [100 x i32], align 4
+  %B.i2 = alloca [100 x i32], align 4
+  %A.i = alloca [100 x i32], align 4
+  %B.i = alloca [100 x i32], align 4
+  %0 = bitcast [100 x i32]* %A.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind ; <---- start #1
+  %1 = bitcast [100 x i32]* %B.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind
+  call void @bar([100 x i32]* %A.i, [100 x i32]* %B.i) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind
+  %2 = bitcast [100 x i32]* %A.i1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %2) nounwind
+  %3 = bitcast [100 x i32]* %B.i2 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %3) nounwind
+  call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind  ; <---- start #2
+  call void @bar([100 x i32]* %A.i1, [100 x i32]* %B.i2) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %2) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind
+  ret void
+}
+
+
+;YESCOLOR: subq  $272, %rsp
+;NOCOLOR: subq  $272, %rsp
+
+define i32 @myCall_end_before_begin(i32 %in, i1 %d) {
+entry:
+  %a = alloca [17 x i8*], align 8
+  %a2 = alloca [16 x i8*], align 8
+  %b = bitcast [17 x i8*]* %a to i8*
+  %b2 = bitcast [16 x i8*]* %a2 to i8*
+  %t1 = call i32 @foo(i32 %in, i8* %b)
+  %t2 = call i32 @foo(i32 %in, i8* %b)
+  call void @llvm.lifetime.end(i64 -1, i8* %b)
+  call void @llvm.lifetime.start(i64 -1, i8* %b)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start(i64 -1, i8* %b2)
+  %t3 = call i32 @foo(i32 %in, i8* %b2)
+  %t4 = call i32 @foo(i32 %in, i8* %b2)
+  %t5 = add i32 %t1, %t2
+  %t6 = add i32 %t3, %t4
+  %t7 = add i32 %t5, %t6
+  ret i32 %t7
+bb3:
+  ret i32 0
+}
+
+declare void @bar([100 x i32]* , [100 x i32]*) nounwind
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+ declare i32 @foo(i32, i8*)
+
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll
new file mode 100644
index 0000000..e7c9605
--- /dev/null
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+
+define i32 @test_get_quotient(i32 %a, i32 %b) nounwind {
+; CHECK: test_get_quotient
+; CHECK: orl %ecx, %edx
+; CHECK-NEXT: testl $-256, %edx
+; CHECK-NEXT: je
+; CHECK: idivl
+; CHECK: ret
+; CHECK: divb
+; CHECK: ret
+  %result = sdiv i32 %a, %b
+  ret i32 %result
+}
+
+define i32 @test_get_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: test_get_remainder
+; CHECK: orl %ecx, %edx
+; CHECK-NEXT: testl $-256, %edx
+; CHECK-NEXT: je
+; CHECK: idivl
+; CHECK: ret
+; CHECK: divb
+; CHECK: ret
+  %result = srem i32 %a, %b
+  ret i32 %result
+}
+
+define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: test_get_quotient_and_remainder
+; CHECK: orl %ecx, %edx
+; CHECK-NEXT: testl $-256, %edx
+; CHECK-NEXT: je
+; CHECK: idivl
+; CHECK: divb
+; CHECK: addl
+; CHECK: ret
+; CEECK-NOT: idivl
+; CHECK-NOT: divb
+  %resultdiv = sdiv i32 %a, %b
+  %resultrem = srem i32 %a, %b
+  %result = add i32 %resultdiv, %resultrem
+  ret i32 %result
+}
+
+define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
+; CHECK: test_use_div_and_idiv
+; CHECK: idivl
+; CHECK: divb
+; CHECK: divl
+; CHECK: divb
+; CHECK: addl
+; CHECK: ret
+  %resultidiv = sdiv i32 %a, %b
+  %resultdiv = udiv i32 %a, %b
+  %result = add i32 %resultidiv, %resultdiv
+  ret i32 %result
+}
+
+define i32 @test_use_div_imm_imm() nounwind {
+; CHECK: test_use_div_imm_imm
+; CHECK: movl $64
+  %resultdiv = sdiv i32 256, 4
+  ret i32 %resultdiv
+}
+
+define i32 @test_use_div_reg_imm(i32 %a) nounwind {
+; CHECK: test_use_div_reg_imm
+; CEHCK-NOT: test
+; CHECK-NOT: idiv
+; CHECK-NOT: divb
+  %resultdiv = sdiv i32 %a, 33
+  ret i32 %resultdiv
+}
+
+define i32 @test_use_rem_reg_imm(i32 %a) nounwind {
+; CHECK: test_use_rem_reg_imm
+; CEHCK-NOT: test
+; CHECK-NOT: idiv
+; CHECK-NOT: divb
+  %resultrem = srem i32 %a, 33
+  ret i32 %resultrem
+}
+
+define i32 @test_use_divrem_reg_imm(i32 %a) nounwind {
+; CHECK: test_use_divrem_reg_imm
+; CEHCK-NOT: test
+; CHECK-NOT: idiv
+; CHECK-NOT: divb
+  %resultdiv = sdiv i32 %a, 33
+  %resultrem = srem i32 %a, 33
+  %result = add i32 %resultdiv, %resultrem
+  ret i32 %result
+}
+
+define i32 @test_use_div_imm_reg(i32 %a) nounwind {
+; CHECK: test_use_div_imm_reg
+; CHECK: test
+; CHECK: idiv
+; CHECK: divb
+  %resultdiv = sdiv i32 4, %a
+  ret i32 %resultdiv
+}
+
+define i32 @test_use_rem_imm_reg(i32 %a) nounwind {
+; CHECK: test_use_rem_imm_reg
+; CHECK: test
+; CHECK: idiv
+; CHECK: divb
+  %resultdiv = sdiv i32 4, %a
+  ret i32 %resultdiv
+}
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 8ad0fa8..95854c7 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -109,8 +109,8 @@ allocas:
 ; rdar://10566486
 ; CHECK: fneg
 ; CHECK: vxorps
-define <16 x float> @fneg(<16 x float> addrspace(1)* nocapture %out) nounwind {
-  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+define <16 x float> @fneg(<16 x float> %a) nounwind {
+  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
   ret <16 x float> %1
 }
 
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 9b41709..ec11654 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -229,9 +229,8 @@ define   <8 x float> @test17(<4 x float> %y) {
 }
 
 ; CHECK: test18
-; CHECK: vshufps
-; CHECK: vshufps
-; CHECK: vunpcklps
+; CHECK: vmovshdup
+; CHECK: vblendps
 ; CHECK: ret
 define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
 }
 
 ; CHECK: test19
-; CHECK: vshufps
-; CHECK: vshufps
-; CHECK: vunpcklps
+; CHECK: vmovsldup
+; CHECK: vblendps
 ; CHECK: ret
 define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index fe0f6ca..ff56a45 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -19,12 +19,12 @@ entry:
 }
 
 ; CHECK: @t0
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NOT: vmovaps %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
+; CHECK: vextractf128 $1, %ymm0, (%rdi)
 define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
 entry:
-  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 1)
   %1 = bitcast float* %addr to <4 x float>*
   store <4 x float> %0, <4 x float>* %1, align 16
   ret void
@@ -32,27 +32,13 @@ entry:
 
 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
 
-; CHECK: @t1
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
-; CHECK-NOT: vmovups %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
-define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp {
-entry:
-  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
-  %1 = bitcast float* %addr to i8*
-  tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0)
-  ret void
-}
-
-declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
-
 ; CHECK: @t2
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NOT: vmovaps %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
+; CHECK: vextractf128 $1, %ymm0, (%rdi)
 define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
 entry:
-  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 1)
   %1 = bitcast double* %addr to <2 x double>*
   store <2 x double> %0, <2 x double>* %1, align 16
   ret void
@@ -60,28 +46,14 @@ entry:
 
 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
 
-; CHECK: @t3
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
-; CHECK-NOT: vmovups %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
-define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp {
-entry:
-  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
-  %1 = bitcast double* %addr to i8*
-  tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0)
-  ret void
-}
-
-declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
-
 ; CHECK: @t4
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NOT: vmovaps %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
+; CHECK: vextractf128 $1, %ymm0, (%rdi)
 define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
 entry:
   %0 = bitcast <4 x i64> %a to <8 x i32>
-  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 1)
   %2 = bitcast <4 x i32> %1 to <2 x i64>
   store <2 x i64> %2, <2 x i64>* %addr, align 16
   ret void
@@ -90,17 +62,43 @@ entry:
 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
 
 ; CHECK: @t5
-; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
-; CHECK-NOT: vmovdqu %xmm0, (%rdi)
-; CHECK: vextractf128 $0, %ymm0, (%rdi)
-define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp {
+; CHECK: vmovaps %xmm0, (%rdi)
+define void @t5(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+}
+
+; CHECK: @t6
+; CHECK: vmovaps %xmm0, (%rdi)
+define void @t6(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to <2 x double>*
+  store <2 x double> %0, <2 x double>* %1, align 16
+  ret void
+}
+
+; CHECK: @t7
+; CHECK: vmovaps %xmm0, (%rdi)
+define void @t7(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
 entry:
   %0 = bitcast <4 x i64> %a to <8 x i32>
   %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
-  %2 = bitcast <2 x i64>* %addr to i8*
-  %3 = bitcast <4 x i32> %1 to <16 x i8>
-  tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 16
   ret void
 }
 
-declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+; CHECK: @t8
+; CHECK: vmovups %xmm0, (%rdi)
+define void @t8(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index c5899fa..267a806 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -26,3 +26,37 @@ entry:
   %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i16> %shuffle.i
 }
+
+; CHECK: vpshufb_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
+  %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
+                                                                i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,  
+                                                                i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
+                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+  ret <32 x i8>%S
+}
+
+; CHECK: vpshufb1_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb1_test(<32 x i8> %a) nounwind {
+  %S = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
+                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
+                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
+                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+  ret <32 x i8>%S
+}
+
+
+; CHECK: vpshufb2_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb2_test(<32 x i8> %a) nounwind {
+  %S = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
+                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
+                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
+                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+  ret <32 x i8>%S
+}
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index 0cb9fd9..09eb5d1 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
@@ -39,4 +39,20 @@ define i32 @bax(<2 x i64> %c) {
 ; CHECK: ret
 }
 
+define i32 @rnd(i32 %arg) nounwind uwtable {
+  %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+  %2 = extractvalue { i32, i32 } %1, 0
+  %3 = extractvalue { i32, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i32 0, i32 %arg
+  %6 = add i32 %5, %2
+  ret i32 %6
+; CHECK: rnd
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare { i32, i32 } @llvm.x86.rdrand.32() nounwind
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index d8f4663..85a70aa 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s  -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
@@ -197,6 +198,11 @@ block2:
 ; CHECK: cvtsi2sdq {{.*}} %xmm0
 ; CHECK: movb $1, %al
 ; CHECK: callq _test16callee
+
+; AVX: movabsq $1
+; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
+; AVX: movb $1, %al
+; AVX: callq _test16callee
   call void (...)* @test16callee(double 1.000000e+00)
   ret void
 }
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index b0c1d0a..bd3514c 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma  | FileCheck %s --check-prefix=CHECK-FMA-INST
-; RUN: llc < %s -mtriple=i386-apple-darwin10               | FileCheck %s --check-prefix=CHECK-FMA-CALL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10             | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma,-fma4  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=-fma,-fma4  | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
 
 ; CHECK: test_f32
 ; CHECK-FMA-INST: vfmadd213ss
-; CHECK-FMA-CALL: _fmaf
+; CHECK-FMA-CALL: fmaf
 
 define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
@@ -15,7 +17,7 @@ entry:
 
 ; CHECK: test_f64
 ; CHECK-FMA-INST: vfmadd213sd
-; CHECK-FMA-CALL: _fma
+; CHECK-FMA-CALL: fma
 
 define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
 entry:
@@ -24,7 +26,7 @@ entry:
 }
 
 ; CHECK: test_f80
-; CHECK: _fmal
+; CHECK: fmal
 
 define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
index 90529e0..e3910a6 100755
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s
+; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
 
 define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   ; CHECK: fmadd213ss %xmm
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index fd414b3..2fe1ecd 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
 
 ; VFMADD
 define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index 5d97a87..6d98d59 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -1,8 +1,13 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
 
 ; CHECK: test_x86_fmadd_ps
-; CHECK: vfmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: vfmadd213ps     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps
+; CHECK_FMA4: vfmaddps     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   %x = fmul <4 x float> %a0, %a1
   %res = fadd <4 x float> %x, %a2
@@ -10,8 +15,11 @@ define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
 }
 
 ; CHECK: test_x86_fmsub_ps
-; CHECK: fmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: fmsub213ps     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps
+; CHECK_FMA4: vfmsubps     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   %x = fmul <4 x float> %a0, %a1
   %res = fsub <4 x float> %x, %a2
@@ -19,8 +27,11 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
 }
 
 ; CHECK: test_x86_fnmadd_ps
-; CHECK: fnmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: fnmadd213ps     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ps
+; CHECK_FMA4: vfnmaddps     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   %x = fmul <4 x float> %a0, %a1
   %res = fsub <4 x float> %a2, %x
@@ -28,8 +39,11 @@ define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl
 }
 
 ; CHECK: test_x86_fnmsub_ps
-; CHECK: fnmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: fnmsub213ps     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmsub_ps
+; CHECK_FMA4: fnmsubps     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   %x = fmul <4 x float> %a0, %a1
   %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
@@ -38,8 +52,11 @@ define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl
 }
 
 ; CHECK: test_x86_fmadd_ps_y
-; CHECK: vfmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: vfmadd213ps     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps_y
+; CHECK_FMA4: vfmaddps     %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4: ret
 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   %x = fmul <8 x float> %a0, %a1
   %res = fadd <8 x float> %x, %a2
@@ -47,8 +64,11 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
 }
 
 ; CHECK: test_x86_fmsub_ps_y
-; CHECK: vfmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: vfmsub213ps     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps_y
+; CHECK_FMA4: vfmsubps     %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4: ret
 define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   %x = fmul <8 x float> %a0, %a1
   %res = fsub <8 x float> %x, %a2
@@ -56,8 +76,11 @@ define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
 }
 
 ; CHECK: test_x86_fnmadd_ps_y
-; CHECK: vfnmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: vfnmadd213ps     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ps_y
+; CHECK_FMA4: vfnmaddps     %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4: ret
 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   %x = fmul <8 x float> %a0, %a1
   %res = fsub <8 x float> %a2, %x
@@ -65,7 +88,7 @@ define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x
 }
 
 ; CHECK: test_x86_fnmsub_ps_y
-; CHECK: vfnmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: vfnmsub213ps     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
 define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   %x = fmul <8 x float> %a0, %a1
@@ -75,8 +98,11 @@ define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x
 }
 
 ; CHECK: test_x86_fmadd_pd_y
-; CHECK: vfmadd213pd     %ymm2, %ymm0, %ymm1
+; CHECK: vfmadd213pd     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_pd_y
+; CHECK_FMA4: vfmaddpd     %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4: ret
 define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   %x = fmul <4 x double> %a0, %a1
   %res = fadd <4 x double> %x, %a2
@@ -84,8 +110,11 @@ define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4
 }
 
 ; CHECK: test_x86_fmsub_pd_y
-; CHECK: vfmsub213pd     %ymm2, %ymm0, %ymm1
+; CHECK: vfmsub213pd     %ymm2, %ymm1, %ymm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_pd_y
+; CHECK_FMA4: vfmsubpd     %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4: ret
 define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   %x = fmul <4 x double> %a0, %a1
   %res = fsub <4 x double> %x, %a2
@@ -93,8 +122,11 @@ define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4
 }
 
 ; CHECK: test_x86_fmsub_pd
-; CHECK: vfmsub213pd     %xmm2, %xmm0, %xmm1
+; CHECK: vfmsub213pd     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_pd
+; CHECK_FMA4: vfmsubpd     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   %x = fmul <2 x double> %a0, %a1
   %res = fsub <2 x double> %x, %a2
@@ -102,8 +134,11 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x
 }
 
 ; CHECK: test_x86_fnmadd_ss
-; CHECK: vfnmadd213ss    %xmm2, %xmm0, %xmm1
+; CHECK: vfnmadd213ss    %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ss
+; CHECK_FMA4: vfnmaddss    %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
   %x = fmul float %a0, %a1
   %res = fsub float %a2, %x
@@ -111,8 +146,11 @@ define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
 }
 
 ; CHECK: test_x86_fnmadd_sd
-; CHECK: vfnmadd213sd     %xmm2, %xmm0, %xmm1
+; CHECK: vfnmadd213sd     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_sd
+; CHECK_FMA4: vfnmaddsd     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
   %x = fmul double %a0, %a1
   %res = fsub double %a2, %x
@@ -120,8 +158,11 @@ define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
 }
 
 ; CHECK: test_x86_fmsub_sd
-; CHECK: vfmsub213sd     %xmm2, %xmm0, %xmm1
+; CHECK: vfmsub213sd     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_sd
+; CHECK_FMA4: vfmsubsd     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
@@ -129,11 +170,43 @@ define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
 }
 
 ; CHECK: test_x86_fnmsub_ss
-; CHECK: vfnmsub213ss     %xmm2, %xmm0, %xmm1
+; CHECK: vfnmsub213ss     %xmm2, %xmm1, %xmm0
 ; CHECK: ret
+; CHECK_FMA4: test_x86_fnmsub_ss
+; CHECK_FMA4: vfnmsubss     %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4: ret
 define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
   %x = fsub float -0.000000e+00, %a0
   %y = fmul float %x, %a1
   %res = fsub float %y, %a2
   ret float %res
 }
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vmovaps         (%rdi), %xmm2
+; CHECK: vfmadd213ps     %xmm1, %xmm0, %xmm2
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps
+; CHECK_FMA4: vfmaddps     %xmm1, (%rdi), %xmm0, %xmm0
+; CHECK_FMA4: ret
+define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = load <4 x float>* %a0
+  %y = fmul <4 x float> %x, %a1
+  %res = fadd <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: vmovaps         (%rdi), %xmm2
+; CHECK: fmsub213ps     %xmm1, %xmm0, %xmm2
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps
+; CHECK_FMA4: vfmsubps     %xmm1, (%rdi), %xmm0, %xmm0
+; CHECK_FMA4: ret
+define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = load <4 x float>* %a0
+  %y = fmul <4 x float> %x, %a1
+  %res = fsub <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
new file mode 100644
index 0000000..091f0de
--- /dev/null
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=x86-64 -mattr=-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
+
+; CHECK: test1
+define float @test1(float %a) {
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fadd float %a, %a
+  %r = fadd float %t1, %t1
+  ret float %r
+}
+
+; CHECK: test2
+define float @test2(float %a) {
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fmul float 4.0, %a
+  %t2 = fadd float %a, %a
+  %r = fadd float %t1, %t2
+  ret float %r
+}
+
+; CHECK: test3
+define float @test3(float %a) {
+; CHECK-NOT: addss
+; CHECK: xorps
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fmul float 2.0, %a
+  %t2 = fadd float %a, %a
+  %r = fsub float %t1, %t2
+  ret float %r
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 91576fb..597236e 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -19,3 +19,12 @@ entry:
 	%1 = load i64* %retval		; <i64> [#uses=1]
 	ret i64 %1
 }
+
+; The tied operands are not necessarily in the same order as the defs.
+; PR13742
+define i64 @swapped(i64 %x, i64 %y) nounwind {
+entry:
+	%x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
+        %x1 = extractvalue { i64, i64 } %x0, 0
+        ret i64 %x1
+}
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
new file mode 100644
index 0000000..014132b
--- /dev/null
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @t1() nounwind {
+entry:
+  %0 = tail call i32 asm sideeffect inteldialect "mov eax, $1\0Amov $0, eax", "=r,r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  ret i32 %0
+; CHECK: t1
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: mov eax, ecx
+; CHECK: mov ecx, eax
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 984d7e5..51320dd 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,14 +1,10 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
-; XFAIL: *
 ; rdar://5571034
 
 ; This requires physreg joining, %vreg13 is live everywhere:
 ; 304L		%CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
 ; 320L		%vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
 ; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
-;
-; This test is XFAIL until the register allocator understands trivial physreg
-; interference. <rdar://9802098>
 
 define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/pr12312.ll b/test/CodeGen/X86/pr12312.ll
new file mode 100644
index 0000000..84102f1
--- /dev/null
+++ b/test/CodeGen/X86/pr12312.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix AVX
+
+define i32 @veccond(<4 x i32> %input) {
+entry:
+  %0 = bitcast <4 x i32> %input to i128
+  %1 = icmp ne i128 %0, 0
+  br i1 %1, label %if-true-block, label %endif-block
+
+if-true-block:                                    ; preds = %entry
+  ret i32 0
+endif-block:                                      ; preds = %entry,
+  ret i32 1
+; SSE41: veccond
+; SSE41: ptest
+; SSE41: ret
+; AVX:   veccond
+; AVX:   vptest
+; AVX:   ret
+}
+
+define i32 @vectest(<4 x i32> %input) {
+entry:
+  %0 = bitcast <4 x i32> %input to i128
+  %1 = icmp ne i128 %0, 0
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+; SSE41: vectest
+; SSE41: ptest
+; SSE41: ret
+; AVX:   vectest
+; AVX:   vptest
+; AVX:   ret
+}
+
+define i32 @vecsel(<4 x i32> %input, i32 %a, i32 %b) {
+entry:
+  %0 = bitcast <4 x i32> %input to i128
+  %1 = icmp ne i128 %0, 0
+  %2 = select i1 %1, i32 %a, i32 %b
+  ret i32 %2
+; SSE41: vecsel
+; SSE41: ptest
+; SSE41: ret
+; AVX:   vecsel
+; AVX:   vptest
+; AVX:   ret
+}
diff --git a/test/CodeGen/X86/pr12359.ll b/test/CodeGen/X86/pr12359.ll
new file mode 100644
index 0000000..024b163
--- /dev/null
+++ b/test/CodeGen/X86/pr12359.ll
@@ -0,0 +1,10 @@
+; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s
+define <16 x i8> @shuf(<16 x i8> %inval1) {
+entry:
+  %0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4, i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4>
+  ret <16 x i8> %0
+; CHECK: shuf
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index 51c3d23..b823f0a 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -76,12 +76,12 @@ entry:
 
 ; X32:    f5:
 ; X32:      leal {{[jk]}}@TLSLDM(%ebx)
-; X32-NEXT: calll ___tls_get_addr@PLT
-; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
-; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
+; X32: calll ___tls_get_addr@PLT
+; X32: movl {{[jk]}}@DTPOFF(%e
+; X32: addl {{[jk]}}@DTPOFF(%e
 
 ; X64:    f5:
 ; X64:      leaq {{[jk]}}@TLSLD(%rip), %rdi
-; X64-NEXT: callq	__tls_get_addr@PLT
-; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
-; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)
+; X64: callq	__tls_get_addr@PLT
+; X64: movl {{[jk]}}@DTPOFF(%r
+; X64: addl {{[jk]}}@DTPOFF(%r
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
new file mode 100644
index 0000000..82517cb
--- /dev/null
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+
+define <2 x double> @fabs_v2f64(<2 x double> %p)
+{
+  ; CHECK: fabs_v2f64
+  ; CHECK: vandps
+  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
+
+define <4 x float> @fabs_v4f32(<4 x float> %p)
+{
+  ; CHECK: fabs_v4f32
+  ; CHECK: vandps
+  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
+
+define <4 x double> @fabs_v4f64(<4 x double> %p)
+{
+  ; CHECK: fabs_v4f64
+  ; CHECK: vandps
+  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
+
+define <8 x float> @fabs_v8f32(<8 x float> %p)
+{
+  ; CHECK: fabs_v8f32
+  ; CHECK: vandps
+  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
new file mode 100644
index 0000000..5e0160b
--- /dev/null
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+  ; CHECK: floor_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+  ; CHECK: floor_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+  ; CHECK: floor_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+  ; CHECK: floor_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index 05b263e..dc0464f 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,14 +1,38 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
 entry:
-; TODO: We should be able to generate cvtps2pd for the load.
-; For now, just check that we generate something sane.
-; CHECK: cvtss2sd
-; CHECK: cvtss2sd
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
   %0 = load <2 x float>* %in, align 8
   %1 = fpext <2 x float> %0 to <2 x double>
   store <2 x double> %1, <2 x double>* %out, align 1
   ret void
 }
+
+define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+  %0 = load <4 x float>* %in
+  %1 = fpext <4 x float> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %out, align 1
+  ret void
+}
+
+define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
+entry:
+; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
+; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+  %0 = load <8 x float>* %in
+  %1 = fpext <8 x float> %0 to <8 x double>
+  store <8 x double> %1, <8 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index 086af6b..8dfc2ea 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep unpcklps %t | count 1
-; RUN: grep unpckhps %t | count 3
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom -mattr=+sse41 | FileCheck -check-prefix=ATOM %s
 
 ; Transpose example using the more generic vector shuffle. Return float8
 ; instead of float16
@@ -14,6 +13,17 @@ target triple = "i386-apple-cl.1.0"
 
 define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
 entry:
+; CHECK: transpose2
+; CHECK: unpckhps
+; CHECK: unpckhps
+; CHECK: unpcklps
+; CHECK: unpckhps
+; Different instruction order for Atom.
+; ATOM: transpose2
+; ATOM: unpckhps
+; ATOM: unpckhps
+; ATOM: unpckhps
+; ATOM: unpcklps
 	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
 	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
 	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
@@ -27,3 +37,32 @@ entry:
 ;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
 	ret <8 x float> %r2
 }
+
+define <2 x i64> @lo_hi_shift(float* nocapture %x, float* nocapture %y) nounwind {
+entry:
+; movhps should happen before extractps to assure it gets the correct value.
+; CHECK: lo_hi_shift
+; CHECK: movhps ([[BASEREG:%[a-z]+]]),
+; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+; ATOM: lo_hi_shift
+; ATOM: movhps ([[BASEREG:%[a-z]+]]),
+; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+  %v.i = bitcast float* %y to <4 x float>*
+  %0 = load <4 x float>* %v.i, align 1
+  %1 = bitcast float* %x to <1 x i64>*
+  %.val = load <1 x i64>* %1, align 1
+  %2 = bitcast <1 x i64> %.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i = shufflevector <4 x float> %0, <4 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %cast.i = bitcast <4 x float> %0 to <2 x i64>
+  %extract.i = extractelement <2 x i64> %cast.i, i32 1
+  %3 = bitcast float* %x to i64*
+  store i64 %extract.i, i64* %3, align 4
+  %4 = bitcast <4 x float> %0 to <16 x i8>
+  %5 = bitcast <4 x float> %shuffle1.i to <16 x i8>
+  %palignr = shufflevector <16 x i8> %5, <16 x i8> %4, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %6 = bitcast <16 x i8> %palignr to <2 x i64>
+  ret <2 x i64> %6
+}
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index 9705d14..dfaa3d6 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,12 +1,17 @@
-; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc %s -o - -march=x86-64 -mattr=-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=SSE
+; RUN: llc %s -o - -march=x86-64 -mattr=+avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=AVX
 ; PR4891
 ; PR5626
 
 ; This load should be before the call, not after.
 
-; CHECK: movaps    compl+128(%rip), %xmm0
-; CHECK: movaps  %xmm0, (%rsp)
-; CHECK: callq   killcommon
+; SSE: movaps    compl+128(%rip), %xmm0
+; SSE: movaps  %xmm0, (%rsp)
+; SSE: callq   killcommon
+
+; AVX: vmovapd    compl+128(%rip), %xmm0
+; AVX: vmovapd  %xmm0, (%rsp)
+; AVX: callq   killcommon
 
 @compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
 
diff --git a/test/DebugInfo/2010-04-13-PubType.ll b/test/DebugInfo/2010-04-13-PubType.ll
index db7bb0a..559f032 100644
--- a/test/DebugInfo/2010-04-13-PubType.ll
+++ b/test/DebugInfo/2010-04-13-PubType.ll
@@ -1,6 +1,6 @@
-; RUN: llc -O0 -asm-verbose < %s > %t
-; RUN: grep "External Name" %t | grep -v X
-; RUN: grep "External Name" %t | grep Y | count 1
+; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx < %s | FileCheck %s
+; CHECK-NOT: .asciz "X" ## External Name
+; CHECK: .asciz "Y" ## External Name
 ; Test to check type with no definition is listed in pubtypes section.
 %struct.X = type opaque
 %struct.Y = type { i32 }
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
new file mode 100755
index 0000000..9a1d538
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index a227071..58fb055 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -7,16 +7,15 @@
 ; first check that we have a TAG_subprogram at a given offset and it has
 ; AT_inline.
 
-; CHECK: 0x00000134:   DW_TAG_subprogram [18]
-; CHECK-NEXT:     DW_AT_MIPS_linkage_name
+; CHECK: 0x0000011e:   DW_TAG_subprogram [18]
 ; CHECK-NEXT:     DW_AT_specification
 ; CHECK-NEXT:     DW_AT_inline
 
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
-; CHECK: 0x00000184:   DW_TAG_subprogram [20]
-; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0134 => {0x00000134})
+; CHECK: 0x0000015f:   DW_TAG_subprogram [20]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x011e => {0x0000011e})
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 2cd1001..caf12c2 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -16,8 +16,8 @@
 
 ; Verify that we refer to 'yyyy' with a relocation.
 ; LINUX:      .long   .Lstring3               # DW_AT_name
-; LINUX-NEXT: .long   39                      # DW_AT_type
-; LINUX-NEXT: .byte   1                       # DW_AT_external
+; LINUX-NEXT: .long   38                      # DW_AT_type
+; LINUX-NEXT:                                 # DW_AT_external
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_line
 ; LINUX-NEXT: .byte   9                       # DW_AT_location
diff --git a/test/DebugInfo/dwarfdump-inlining.test b/test/DebugInfo/dwarfdump-inlining.test
new file mode 100644
index 0000000..d3a7e12
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-inlining.test
@@ -0,0 +1,28 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x613 \
+RUN:   --inlining --functions | FileCheck %s -check-prefix DEEP_STACK
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x6de \
+RUN:   --inlining | FileCheck %s -check-prefix SHORTER_STACK
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x685 \
+RUN:   --inlining | FileCheck %s -check-prefix SHORT_STACK
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x640 \
+RUN:   --functions | FileCheck %s -check-prefix INL_FUNC_NAME
+
+DEEP_STACK:      inlined_h
+DEEP_STACK-NEXT: header.h:2:21
+DEEP_STACK-NEXT: inlined_g
+DEEP_STACK-NEXT: header.h:7
+DEEP_STACK-NEXT: inlined_f
+DEEP_STACK-NEXT: main.cc:3
+DEEP_STACK-NEXT: main
+DEEP_STACK-NEXT: main.cc:8
+
+SHORTER_STACK:      header.h:7:20
+SHORTER_STACK-NEXT: main.cc:3
+SHORTER_STACK-NEXT: main.cc:8
+
+SHORT_STACK:      main.cc:3:20
+SHORT_STACK-NEXT: main.cc:8
+
+INL_FUNC_NAME:      inlined_g
+INL_FUNC_NAME-NEXT: header.h:7:20
+
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
index de23dcd..973c344 100644
--- a/test/DebugInfo/dwarfdump-test.test
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -17,6 +17,8 @@ RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
 RUN:   --address=0x55c --functions \
 RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
+RUN:   | FileCheck %s -check-prefix DEBUG_RANGES
 
 MAIN: main
 MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
@@ -44,3 +46,11 @@ INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
 
 MANY_SEQ_IN_LINE_TABLE: _Z1cv
 MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0
+
+DEBUG_RANGES:      .debug_ranges contents:
+DEBUG_RANGES-NEXT: 00000000 000000000000055c 0000000000000567
+DEBUG_RANGES-NEXT: 00000000 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000000 <End of list>
+DEBUG_RANGES-NEXT: 00000030 0000000000000570 000000000000057b
+DEBUG_RANGES-NEXT: 00000030 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000030 <End of list>
diff --git a/test/DebugInfo/linkage-name.ll b/test/DebugInfo/linkage-name.ll
new file mode 100644
index 0000000..b984923
--- /dev/null
+++ b/test/DebugInfo/linkage-name.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: DW_TAG_subprogram [9] *
+; CHECK-NOT: DW_AT_MIPS_linkage_name
+; CHECK: DW_AT_specification
+
+%class.A = type { i8 }
+
+@a = global %class.A zeroinitializer, align 1
+
+define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  %b.addr = alloca i32, align 4
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !21), !dbg !23
+  store i32 %b, i32* %b.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !24), !dbg !25
+  %this1 = load %class.A** %this.addr
+  %0 = load i32* %b.addr, align 4, !dbg !26
+  ret i32 %0, !dbg !26
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, i32 0, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 5, i32 8, metadata !5, null}
+!24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 5, i32 14, metadata !5, null}
+!26 = metadata !{i32 6, i32 4, metadata !27, null}
+!27 = metadata !{i32 786443, metadata !5, i32 5, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/ExecutionEngine/MCJIT/pr13727.ll b/test/ExecutionEngine/MCJIT/pr13727.ll
new file mode 100644
index 0000000..5fa68f9
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/pr13727.ll
@@ -0,0 +1,88 @@
+; RUN: %lli -use-mcjit -O0 -disable-lazy-compilation=false %s
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.1)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.100000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/MC/AsmParser/directive_lcomm.s b/test/MC/AsmParser/directive_lcomm.s
index 0a0add5..37a350c 100644
--- a/test/MC/AsmParser/directive_lcomm.s
+++ b/test/MC/AsmParser/directive_lcomm.s
@@ -1,9 +1,14 @@
 # RUN: llvm-mc -triple i386-apple-darwin10 %s | FileCheck %s
+# RUN: llvm-mc -triple i386-pc-mingw32 %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-linux-gnu %s 2>&1 | FileCheck %s -check-prefix=ERROR
 
 # CHECK: TEST0:
-# CHECK: .zerofill __DATA,__bss,a,7,4
-# CHECK: .zerofill __DATA,__bss,b,8
-# CHECK: .zerofill __DATA,__bss,c,0
+# CHECK: .lcomm a,7,4
+# CHECK: .lcomm b,8
+# CHECK: .lcomm c,0
+
+# ELF doesn't like alignment on .lcomm.
+# ERROR: alignment not supported on this target
 TEST0:  
         .lcomm a, 8-1, 4
         .lcomm b,8
diff --git a/test/MC/AsmParser/labels.s b/test/MC/AsmParser/labels.s
index 5609175..6a9870b 100644
--- a/test/MC/AsmParser/labels.s
+++ b/test/MC/AsmParser/labels.s
@@ -41,7 +41,7 @@ foo:
 // CHECK: .comm "a 6",1
         .comm "a 6", 1
 
-// CHECK: .zerofill __DATA,__bss,"a 7",1,0
+// CHECK: .lcomm "a 7",1
         .lcomm "a 7", 1
 
 // FIXME: We don't bother to support .lsym.
diff --git a/test/MC/COFF/comm.ll b/test/MC/COFF/comm.ll
new file mode 100644
index 0000000..74da557
--- /dev/null
+++ b/test/MC/COFF/comm.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple i386-pc-mingw32 < %s | FileCheck %s
+
+@a = internal global i8 0, align 1
+@b = internal global double 0.000000e+00, align 8
+@c = common global i8 0, align 1
+@d = common global double 0.000000e+00, align 8
+
+; .lcomm uses byte alignment
+; CHECK: .lcomm	_a,1
+; CHECK: .lcomm	_b,8,8
+; .comm uses log2 alignment
+; CHECK: .comm	_c,1,0
+; CHECK: .comm	_d,8,3
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
index 5ba7d61..00b8526 100644
--- a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
@@ -1,5 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
+# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | FileCheck %s
 
 # Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
@@ -9,3 +8,4 @@
 # 
 # 'a' == 1 and data_size == 8 is invalid
 0x3d 0x3c 0xa0 0xf4
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt
new file mode 100644
index 0000000..9bb0995
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
+
+0xa0 0xf9 0x10 0x08
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt
new file mode 100644
index 0000000..84c98bf
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
+
+0xa0 0xf9 0xc0 0x0f
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt
new file mode 100644
index 0000000..9024b09
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
+
+0xa0 0xf9 0x30 0x0b
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt
new file mode 100644
index 0000000..9462812
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
+
+0x80 0xf9 0x10 0x08
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt
new file mode 100644
index 0000000..f6e71bc
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
+
+0x80 0xf9 0x30 0x0b
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
new file mode 100644
index 0000000..e53739e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
@@ -0,0 +1,77 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s
+
+0xa0 0xf9 0x00 0x00
+0xa0 0xf9 0x20 0x00
+0xa0 0xf9 0x40 0x00
+0xa0 0xf9 0x60 0x00
+0xa0 0xf9 0x80 0x00
+0xa0 0xf9 0xa0 0x00
+0xa0 0xf9 0xc0 0x00
+0xa0 0xf9 0xe0 0x00
+
+# CHECK: vld1.8  {d0[0]}, [r0], r0 @ encoding: [0xa0,0xf9,0x00,0x00]
+# CHECK: vld1.8  {d0[1]}, [r0], r0 @ encoding: [0xa0,0xf9,0x20,0x00]
+# CHECK: vld1.8  {d0[2]}, [r0], r0 @ encoding: [0xa0,0xf9,0x40,0x00]
+# CHECK: vld1.8  {d0[3]}, [r0], r0 @ encoding: [0xa0,0xf9,0x60,0x00]
+# CHECK: vld1.8  {d0[4]}, [r0], r0 @ encoding: [0xa0,0xf9,0x80,0x00]
+# CHECK: vld1.8  {d0[5]}, [r0], r0 @ encoding: [0xa0,0xf9,0xa0,0x00]
+# CHECK: vld1.8  {d0[6]}, [r0], r0 @ encoding: [0xa0,0xf9,0xc0,0x00]
+# CHECK: vld1.8  {d0[7]}, [r0], r0 @ encoding: [0xa0,0xf9,0xe0,0x00]
+
+0xa0 0xf9 0x00 0x04
+0xa0 0xf9 0x10 0x04
+0xa0 0xf9 0x40 0x04
+0xa0 0xf9 0x50 0x04
+0xa0 0xf9 0x80 0x04
+0xa0 0xf9 0x90 0x04
+0xa0 0xf9 0xc0 0x04
+0xa0 0xf9 0xd0 0x04
+
+# CHECK: vld1.16 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x40,0x04]
+# CHECK: vld1.16 {d0[1]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x50,0x04]
+# CHECK: vld1.16 {d0[2]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x04]
+# CHECK: vld1.16 {d0[2]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x90,0x04]
+# CHECK: vld1.16 {d0[3]}, [r0], r0      @ encoding: [0xa0,0xf9,0xc0,0x04]
+# CHECK: vld1.16 {d0[3]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04]
+
+0xa0 0xf9 0x00 0x08
+0xa0 0xf9 0x30 0x08
+0xa0 0xf9 0x80 0x08
+0xa0 0xf9 0xb0 0x08
+
+# CHECK: vld1.32 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x08]
+# CHECK: vld1.32 {d0[0]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0x30,0x08]
+# CHECK: vld1.32 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x08]
+# CHECK: vld1.32 {d0[1]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08]
+
+0xa0 0xf9 0x1f 0x04
+0xa0 0xf9 0x8f 0x00
+
+# CHECK: vld1.16 {d0[0]}, [r0, :16] @ encoding: [0xa0,0xf9,0x1f,0x04]
+# CHECK: vld1.8  {d0[4]}, [r0]      @ encoding: [0xa0,0xf9,0x8f,0x00]
+
+0xa0 0xf9 0x1d 0x04
+0xa0 0xf9 0x8d 0x00
+
+# CHECK: vld1.16 {d0[0]}, [r0, :16]! @ encoding: [0xa0,0xf9,0x1d,0x04]
+# CHECK: vld1.8  {d0[4]}, [r0]!      @ encoding: [0xa0,0xf9,0x8d,0x00]
+
+0xa5 0xf9 0x10 0x04
+0xa5 0xf9 0x1a 0x04
+0xae 0xf9 0x1a 0x04
+0xa5 0xf9 0x1a 0x94
+
+# CHECK: vld1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0xa5,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d0[0]}, [lr, :16], r10 @ encoding: [0xae,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94]
+
+0xa0 0xf9 0x20 0x0b
+0xa0 0xf9 0x20 0x07
+0xa0 0xf9 0x20 0x03
+
+# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0, :128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b]
+# CHECK: vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x07]
+# CHECK: vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x03]
diff --git a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
new file mode 100644
index 0000000..eb3722c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
@@ -0,0 +1,77 @@
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s
+
+0x80 0xf9 0x00 0x00
+0x81 0xf9 0x21 0x10
+0x81 0xf9 0x42 0x10
+0x81 0xf9 0x61 0x20
+0x82 0xf9 0x82 0x20
+0x82 0xf9 0xa1 0x10
+0x82 0xf9 0xc2 0x20
+0x83 0xf9 0xe3 0x30
+
+# CHECK: vst1.8  {d0[0]}, [r0], r0 @ encoding: [0x80,0xf9,0x00,0x00]
+# CHECK: vst1.8  {d1[1]}, [r1], r1 @ encoding: [0x81,0xf9,0x21,0x10]
+# CHECK: vst1.8  {d1[2]}, [r1], r2 @ encoding: [0x81,0xf9,0x42,0x10]
+# CHECK: vst1.8  {d2[3]}, [r1], r1 @ encoding: [0x81,0xf9,0x61,0x20]
+# CHECK: vst1.8  {d2[4]}, [r2], r2 @ encoding: [0x82,0xf9,0x82,0x20]
+# CHECK: vst1.8  {d1[5]}, [r2], r1 @ encoding: [0x82,0xf9,0xa1,0x10]
+# CHECK: vst1.8  {d2[6]}, [r2], r2 @ encoding: [0x82,0xf9,0xc2,0x20]
+# CHECK: vst1.8  {d3[7]}, [r3], r3 @ encoding: [0x83,0xf9,0xe3,0x30]
+
+0x80 0xf9 0x00 0x04
+0xc3 0xf9 0x13 0x04
+0xc4 0xf9 0x43 0x04
+0xc5 0xf9 0x55 0x04
+0xc6 0xf9 0x85 0x04
+0xc7 0xf9 0x95 0x74
+0xc8 0xf9 0xc7 0x84
+0xc9 0xf9 0xd9 0x94
+
+# CHECK: vst1.16 {d0[0]},  [r0], r0      @ encoding: [0x80,0xf9,0x00,0x04]
+# CHECK: vst1.16 {d16[0]}, [r3, :16], r3 @ encoding: [0xc3,0xf9,0x13,0x04]
+# CHECK: vst1.16 {d16[1]}, [r4], r3      @ encoding: [0xc4,0xf9,0x43,0x04]
+# CHECK: vst1.16 {d16[1]}, [r5, :16], r5 @ encoding: [0xc5,0xf9,0x55,0x04]
+# CHECK: vst1.16 {d16[2]}, [r6], r5      @ encoding: [0xc6,0xf9,0x85,0x04]
+# CHECK: vst1.16 {d23[2]}, [r7, :16], r5 @ encoding: [0xc7,0xf9,0x95,0x74]
+# CHECK: vst1.16 {d24[3]}, [r8], r7      @ encoding: [0xc8,0xf9,0xc7,0x84]
+# CHECK: vst1.16 {d25[3]}, [r9, :16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94]
+
+0x8a 0xf9 0x01 0xa8
+0xcb 0xf9 0x32 0x18
+0x8c 0xf9 0x83 0xb8
+0xcd 0xf9 0xb4 0x28
+
+# CHECK: vst1.32 {d10[0]}, [r10], r1      @ encoding: [0x8a,0xf9,0x01,0xa8]
+# CHECK: vst1.32 {d17[0]}, [r11, :32], r2 @ encoding: [0xcb,0xf9,0x32,0x18]
+# CHECK: vst1.32 {d11[1]}, [r12], r3      @ encoding: [0x8c,0xf9,0x83,0xb8]
+# CHECK: vst1.32 {d18[1]}, [sp, :32], r4  @ encoding: [0xcd,0xf9,0xb4,0x28]
+
+0x81 0xf9 0x1f 0x44
+0x82 0xf9 0x8f 0x30
+
+# CHECK: vst1.16 {d4[0]}, [r1, :16] @ encoding: [0x81,0xf9,0x1f,0x44]
+# CHECK: vst1.8  {d3[4]}, [r2]      @ encoding: [0x82,0xf9,0x8f,0x30]
+
+0x83 0xf9 0x1d 0x24
+0x84 0xf9 0x8d 0x10
+
+# CHECK: vst1.16 {d2[0]}, [r3, :16]! @ encoding: [0x83,0xf9,0x1d,0x24]
+# CHECK: vst1.8  {d1[4]}, [r4]!      @ encoding: [0x84,0xf9,0x8d,0x10]
+
+0x85 0xf9 0x10 0x04
+0x85 0xf9 0x1a 0x74
+0x8e 0xf9 0x1a 0x84
+0x85 0xf9 0x1a 0x94
+
+# CHECK: vst1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0x85,0xf9,0x10,0x04]
+# CHECK: vst1.16 {d7[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x74]
+# CHECK: vst1.16 {d8[0]}, [lr, :16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84]
+# CHECK: vst1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x94]
+
+0x81 0xf9 0x24 0x0b
+0x82 0xf9 0x25 0x07
+0x83 0xf9 0x26 0x03
+
+# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1, :128], r4 @ encoding: [0x81,0xf9,0x24,0x0b]
+# CHECK: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r2], r5       @ encoding: [0x82,0xf9,0x25,0x07]
+# CHECK: vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r3], r6       @ encoding: [0x83,0xf9,0x26,0x03]
diff --git a/test/MC/ELF/cfi-reg.s b/test/MC/ELF/cfi-reg.s
new file mode 100644
index 0000000..fd68d6d
--- /dev/null
+++ b/test/MC/ELF/cfi-reg.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
+// PR13754
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_offset 6, -16
+        nop
+	.cfi_offset %rsi, -16
+        nop
+	.cfi_offset rbx, -16
+        nop
+	.cfi_endproc
+
+// CHECK: f:
+// CHECK: .cfi_offset %rbp, -16
+// CHECK: .cfi_offset %rsi, -16
+// CHECK: .cfi_offset %rbx, -16
diff --git a/test/MC/ELF/lcomm.s b/test/MC/ELF/lcomm.s
new file mode 100644
index 0000000..ae8d0ba
--- /dev/null
+++ b/test/MC/ELF/lcomm.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple i386-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+
+.lcomm A, 5
+.lcomm B, 32 << 20
+
+// CHECK: (('st_name', 0x00000001) # 'A'
+// CHECK:  ('st_value', 0x00000000)
+// CHECK:  ('st_size', 0x00000005)
+// CHECK:  ('st_bind', 0x0)
+// CHECK:  ('st_type', 0x1)
+// CHECK:  ('st_other', 0x00)
+// CHECK:  ('st_shndx', 0x0003)
+// CHECK: ),
+// CHECK: (('st_name', 0x00000003) # 'B'
+// CHECK:  ('st_value', 0x00000005)
+// CHECK:  ('st_size', 0x02000000)
+// CHECK:  ('st_bind', 0x0)
+// CHECK:  ('st_type', 0x1)
+// CHECK:  ('st_other', 0x00)
+// CHECK:  ('st_shndx', 0x0003)
+// CHECK: ),
diff --git a/test/MC/Mips/do_switch.ll b/test/MC/Mips/do_switch.ll
new file mode 100644
index 0000000..7eda1b4
--- /dev/null
+++ b/test/MC/Mips/do_switch.ll
@@ -0,0 +1,39 @@
+; This test case will cause an internal EK_GPRel64BlockAddress to be 
+; produced. This was not handled for direct object and an assertion
+; to occur. This is a variation on test case test/CodeGen/Mips/do_switch.ll
+
+; RUN: llc < %s -filetype=obj -march=mips -relocation-model=static
+
+; RUN: llc < %s -filetype=obj -march=mips -relocation-model=pic
+
+; RUN: llc < %s -filetype=obj -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 
+
+define i32 @main() nounwind readnone {
+entry:
+  %x = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store volatile i32 2, i32* %x, align 4
+  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+
+  switch i32 %0, label %bb4 [
+    i32 0, label %bb5
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+  ]
+
+bb1:                                              ; preds = %entry
+  ret i32 2
+
+bb2:                                              ; preds = %entry
+  ret i32 0
+
+bb3:                                              ; preds = %entry
+  ret i32 3
+
+bb4:                                              ; preds = %entry
+  ret i32 4
+
+bb5:                                              ; preds = %entry
+  ret i32 1
+}
+
diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll
index 23ec53a..ae6de78 100644
--- a/test/MC/Mips/elf-N64.ll
+++ b/test/MC/Mips/elf-N64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | elf-dump --dump-section-data  | FileCheck %s
 
 ; Check for N64 relocation production.
 ;
diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll
index 81a89e3..0c66522 100644
--- a/test/MC/Mips/higher_highest.ll
+++ b/test/MC/Mips/higher_highest.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
-
+; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
+; RUN: false
+; XFAIL: *
+; Disabled because currently we don't have a way to generate these relocations.
+;
 ; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created.
 
 ; CHECK:     ('r_type', 0x1d)
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
new file mode 100644
index 0000000..2997782
--- /dev/null
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -0,0 +1,100 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# Logical instructions
+#------------------------------------------------------------------------------
+# CHECK:  and    $9, $6, $7      # encoding: [0x24,0x48,0xc7,0x00]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  clo    $6, $7          # encoding: [0x21,0x30,0xe6,0x70]
+# CHECK:  clz    $6, $7          # encoding: [0x20,0x30,0xe6,0x70]
+# CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
+# CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
+# CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
+# CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
+# CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
+# CHECK:  sll    $4, $3, 7       # encoding: [0xc0,0x21,0x03,0x00]
+# CHECK:  sllv   $2, $3, $5      # encoding: [0x04,0x10,0xa3,0x00]
+# CHECK:  slt    $3, $3, $5      # encoding: [0x2a,0x18,0x65,0x00]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  sltiu  $3, $3, 103     # encoding: [0x67,0x00,0x63,0x2c]
+# CHECK:  sltu   $3, $3, $5      # encoding: [0x2b,0x18,0x65,0x00]
+# CHECK:  sra    $4, $3, 7       # encoding: [0xc3,0x21,0x03,0x00]
+# CHECK:  srav   $2, $3, $5      # encoding: [0x07,0x10,0xa3,0x00]
+# CHECK:  srl    $4, $3, 7       # encoding: [0xc2,0x21,0x03,0x00]
+# CHECK:  srlv   $2, $3, $5      # encoding: [0x06,0x10,0xa3,0x00]
+# CHECK:  xor    $3, $3, $5      # encoding: [0x26,0x18,0x65,0x00]
+# CHECK:  xori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
+# CHECK:  nor    $7, $8, $zero   # encoding: [0x27,0x38,0x00,0x01]
+     and    $9,  $6, $7
+     and    $9,  $6, 17767
+     andi   $9,  $6, 17767
+     clo    $6,  $7
+     clz    $6,  $7
+     ins    $19, $9, 6,7
+     nor    $9,  $6, $7
+     or     $3,  $3, $5
+     ori    $9,  $6, 17767
+     rotr   $9,  $6, 7
+     rotrv  $9,  $6, $7
+     sll    $4,  $3, 7
+     sllv   $2,  $3, $5
+     slt    $3,  $3, $5
+     slt    $3,  $3, 103
+     slti   $3,  $3, 103
+     sltiu  $3,  $3, 103
+     sltu   $3,  $3, $5
+     sra    $4,  $3, 7
+     srav   $2,  $3, $5
+     srl    $4,  $3, 7
+     srlv   $2,  $3, $5
+     xor    $3,  $3, $5
+     xor    $9,  $6, 17767
+     xori   $9,  $6, 17767
+     wsbh   $6,  $7
+     not    $7  ,$8
+
+#------------------------------------------------------------------------------
+# Arithmetic instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  add    $9, $6, $7      # encoding: [0x20,0x48,0xc7,0x00]
+# CHECK:  addi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x20]
+# CHECK:  addiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x24]
+# CHECK:  addi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x20]
+# CHECK:  addiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x24]
+# CHECK:  addu   $9, $6, $7      # encoding: [0x21,0x48,0xc7,0x00]
+# CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
+# CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
+# CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
+# CHECK:  msubu  $6, $7          # encoding: [0x05,0x00,0xc7,0x70]
+# CHECK:  mult   $3, $5          # encoding: [0x18,0x00,0x65,0x00]
+# CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
+# CHECK:  sub    $9, $6, $7      # encoding: [0x22,0x48,0xc7,0x00]
+# CHECK:  subu   $4, $3, $5      # encoding: [0x23,0x20,0x65,0x00]
+# CHECK:  sub     $6, $zero, $7  # encoding: [0x22,0x30,0x07,0x00]
+# CHECK:  subu    $6, $zero, $7  # encoding: [0x23,0x30,0x07,0x00]
+# CHECK:  add     $7, $8, $zero  # encoding: [0x20,0x38,0x00,0x01]
+    add    $9,$6,$7
+    add    $9,$6,17767
+    addu   $9,$6,-15001
+    addi   $9,$6,17767
+    addiu  $9,$6,-15001
+    addu   $9,$6,$7
+    madd   $6,$7
+    maddu  $6,$7
+    msub   $6,$7
+    msubu  $6,$7
+    mult   $3,$5
+    multu  $3,$5
+    sub    $9,$6,$7
+    subu   $4,$3,$5
+    neg    $6,$7
+    negu   $6,$7
+    move   $7,$8
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
new file mode 100644
index 0000000..ce8024d
--- /dev/null
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -0,0 +1,162 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for FPU instructions.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# FP aritmetic  instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  abs.d      $f12, $f14         # encoding: [0x05,0x73,0x20,0x46]
+# CHECK:  abs.s      $f6, $f7           # encoding: [0x85,0x39,0x00,0x46]
+# CHECK:  add.d      $f8, $f12, $f14    # encoding: [0x00,0x62,0x2e,0x46]
+# CHECK:  add.s      $f9, $f6, $f7      # encoding: [0x40,0x32,0x07,0x46]
+# CHECK:  floor.w.d  $f12, $f14         # encoding: [0x0f,0x73,0x20,0x46]
+# CHECK:  floor.w.s  $f6, $f7           # encoding: [0x8f,0x39,0x00,0x46]
+# CHECK:  ceil.w.d   $f12, $f14         # encoding: [0x0e,0x73,0x20,0x46]
+# CHECK:  ceil.w.s   $f6, $f7           # encoding: [0x8e,0x39,0x00,0x46]
+# CHECK:  mul.d      $f8, $f12, $f14    # encoding: [0x02,0x62,0x2e,0x46]
+# CHECK:  mul.s      $f9, $f6, $f7      # encoding: [0x42,0x32,0x07,0x46]
+# CHECK:  neg.d      $f12, $f14         # encoding: [0x07,0x73,0x20,0x46]
+# CHECK:  neg.s      $f6, $f7           # encoding: [0x87,0x39,0x00,0x46]
+# CHECK:  round.w.d  $f12, $f14         # encoding: [0x0c,0x73,0x20,0x46]
+# CHECK:  round.w.s  $f6, $f7           # encoding: [0x8c,0x39,0x00,0x46]
+# CHECK:  sqrt.d     $f12, $f14         # encoding: [0x04,0x73,0x20,0x46]
+# CHECK:  sqrt.s     $f6, $f7           # encoding: [0x84,0x39,0x00,0x46]
+# CHECK:  sub.d      $f8, $f12, $f14    # encoding: [0x01,0x62,0x2e,0x46]
+# CHECK:  sub.s      $f9, $f6, $f7      # encoding: [0x41,0x32,0x07,0x46]
+# CHECK:  trunc.w.d  $f12, $f14         # encoding: [0x0d,0x73,0x20,0x46]
+# CHECK:  trunc.w.s  $f6, $f7           # encoding: [0x8d,0x39,0x00,0x46]
+
+    abs.d      $f12,$f14
+    abs.s      $f6,$f7
+    add.d      $f8,$f12,$f14
+    add.s      $f9,$f6,$f7
+    floor.w.d  $f12,$f14
+    floor.w.s  $f6,$f7
+    ceil.w.d   $f12,$f14
+    ceil.w.s   $f6,$f7
+    mul.d      $f8,$f12,$f14
+    mul.s      $f9,$f6, $f7
+    neg.d      $f12,$f14
+    neg.s      $f6,$f7
+    round.w.d  $f12,$f14
+    round.w.s  $f6,$f7
+    sqrt.d     $f12,$f14
+    sqrt.s     $f6,$f7
+    sub.d      $f8,$f12,$f14
+    sub.s      $f9,$f6,$f7
+    trunc.w.d  $f12,$f14
+    trunc.w.s  $f6,$f7
+
+#------------------------------------------------------------------------------
+# FP compare instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  c.eq.d    $f12, $f14        # encoding: [0x32,0x60,0x2e,0x46]
+# CHECK:  c.eq.s    $f6, $f7          # encoding: [0x32,0x30,0x07,0x46]
+# CHECK:  c.f.d     $f12, $f14        # encoding: [0x30,0x60,0x2e,0x46]
+# CHECK:  c.f.s     $f6, $f7          # encoding: [0x30,0x30,0x07,0x46]
+# CHECK:  c.le.d    $f12, $f14        # encoding: [0x3e,0x60,0x2e,0x46]
+# CHECK:  c.le.s    $f6, $f7          # encoding: [0x3e,0x30,0x07,0x46]
+# CHECK:  c.lt.d    $f12, $f14        # encoding: [0x3c,0x60,0x2e,0x46]
+# CHECK:  c.lt.s    $f6, $f7          # encoding: [0x3c,0x30,0x07,0x46]
+# CHECK:  c.nge.d   $f12, $f14        # encoding: [0x3d,0x60,0x2e,0x46]
+# CHECK:  c.nge.s   $f6, $f7          # encoding: [0x3d,0x30,0x07,0x46]
+# CHECK:  c.ngl.d   $f12, $f14        # encoding: [0x3b,0x60,0x2e,0x46]
+# CHECK:  c.ngl.s   $f6, $f7          # encoding: [0x3b,0x30,0x07,0x46]
+# CHECK:  c.ngle.d  $f12, $f14        # encoding: [0x39,0x60,0x2e,0x46]
+# CHECK:  c.ngle.s  $f6, $f7          # encoding: [0x39,0x30,0x07,0x46]
+# CHECK:  c.ngt.d   $f12, $f14        # encoding: [0x3f,0x60,0x2e,0x46]
+# CHECK:  c.ngt.s   $f6, $f7          # encoding: [0x3f,0x30,0x07,0x46]
+# CHECK:  c.ole.d   $f12, $f14        # encoding: [0x36,0x60,0x2e,0x46]
+# CHECK:  c.ole.s   $f6, $f7          # encoding: [0x36,0x30,0x07,0x46]
+# CHECK:  c.olt.d   $f12, $f14        # encoding: [0x34,0x60,0x2e,0x46]
+# CHECK:  c.olt.s   $f6, $f7          # encoding: [0x34,0x30,0x07,0x46]
+# CHECK:  c.seq.d   $f12, $f14        # encoding: [0x3a,0x60,0x2e,0x46]
+# CHECK:  c.seq.s   $f6, $f7          # encoding: [0x3a,0x30,0x07,0x46]
+# CHECK:  c.sf.d    $f12, $f14        # encoding: [0x38,0x60,0x2e,0x46]
+# CHECK:  c.sf.s    $f6, $f7          # encoding: [0x38,0x30,0x07,0x46]
+# CHECK:  c.ueq.d   $f12, $f14        # encoding: [0x33,0x60,0x2e,0x46]
+# CHECK:  c.ueq.s   $f28, $f18        # encoding: [0x33,0xe0,0x12,0x46]
+# CHECK:  c.ule.d   $f12, $f14        # encoding: [0x37,0x60,0x2e,0x46]
+# CHECK:  c.ule.s   $f6, $f7          # encoding: [0x37,0x30,0x07,0x46]
+# CHECK:  c.ult.d   $f12, $f14        # encoding: [0x35,0x60,0x2e,0x46]
+# CHECK:  c.ult.s   $f6, $f7          # encoding: [0x35,0x30,0x07,0x46]
+# CHECK:  c.un.d    $f12, $f14        # encoding: [0x31,0x60,0x2e,0x46]
+# CHECK:  c.un.s    $f6, $f7          # encoding: [0x31,0x30,0x07,0x46]
+
+     c.eq.d    $f12,$f14
+     c.eq.s    $f6,$f7
+     c.f.d     $f12,$f14
+     c.f.s     $f6,$f7
+     c.le.d    $f12,$f14
+     c.le.s    $f6,$f7
+     c.lt.d    $f12,$f14
+     c.lt.s    $f6,$f7
+     c.nge.d   $f12,$f14
+     c.nge.s   $f6,$f7
+     c.ngl.d   $f12,$f14
+     c.ngl.s   $f6,$f7
+     c.ngle.d  $f12,$f14
+     c.ngle.s  $f6,$f7
+     c.ngt.d   $f12,$f14
+     c.ngt.s   $f6,$f7
+     c.ole.d   $f12,$f14
+     c.ole.s   $f6,$f7
+     c.olt.d   $f12,$f14
+     c.olt.s   $f6,$f7
+     c.seq.d   $f12,$f14
+     c.seq.s   $f6,$f7
+     c.sf.d    $f12,$f14
+     c.sf.s    $f6,$f7
+     c.ueq.d   $f12,$f14
+     c.ueq.s   $f28,$f18
+     c.ule.d   $f12,$f14
+     c.ule.s   $f6,$f7
+     c.ult.d   $f12,$f14
+     c.ult.s   $f6,$f7
+     c.un.d    $f12,$f14
+     c.un.s    $f6,$f7
+
+#------------------------------------------------------------------------------
+# FP convert instructions
+#------------------------------------------------------------------------------
+# CHECK:  cvt.d.s   $f6, $f7          # encoding: [0xa1,0x39,0x00,0x46]
+# CHECK:  cvt.d.w   $f12, $f14        # encoding: [0x21,0x73,0x80,0x46]
+# CHECK:  cvt.s.d   $f12, $f14        # encoding: [0x20,0x73,0x20,0x46]
+# CHECK:  cvt.s.w   $f6, $f7          # encoding: [0xa0,0x39,0x80,0x46]
+# CHECK:  cvt.w.d   $f12, $f14        # encoding: [0x24,0x73,0x20,0x46]
+# CHECK:  cvt.w.s   $f6, $f7          # encoding: [0xa4,0x39,0x00,0x46]
+
+  cvt.d.s   $f6,$f7
+  cvt.d.w   $f12,$f14
+  cvt.s.d   $f12,$f14
+  cvt.s.w   $f6,$f7
+  cvt.w.d   $f12,$f14
+  cvt.w.s   $f6,$f7
+
+#------------------------------------------------------------------------------
+# FP move instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  cfc1    $6, $fcc0            # encoding: [0x00,0x00,0x46,0x44]
+# CHECK:  mfc1    $6, $f7              # encoding: [0x00,0x38,0x06,0x44]
+# CHECK:  mfhi    $5                   # encoding: [0x10,0x28,0x00,0x00]
+# CHECK:  mflo    $5                   # encoding: [0x12,0x28,0x00,0x00]
+# CHECK:  mov.d   $f6, $f8             # encoding: [0x86,0x41,0x20,0x46]
+# CHECK:  mov.s   $f6, $f7             # encoding: [0x86,0x39,0x00,0x46]
+# CHECK:  mtc1    $6, $f7              # encoding: [0x00,0x38,0x86,0x44]
+# CHECK:  mthi    $7                   # encoding: [0x11,0x00,0xe0,0x00]
+# CHECK:  mtlo    $7                   # encoding: [0x13,0x00,0xe0,0x00]
+# CHECK:  swc1    $f9, 9158($7)        # encoding: [0xc6,0x23,0xe9,0xe4]
+
+   cfc1    $a2,$0
+   mfc1    $a2,$f7
+   mfhi    $a1
+   mflo    $a1
+   mov.d   $f6,$f8
+   mov.s   $f6,$f7
+   mtc1    $a2,$f7
+   mthi    $a3
+   mtlo    $a3
+   swc1    $f9,9158($a3)
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
new file mode 100644
index 0000000..998be41
--- /dev/null
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -0,0 +1,72 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for jumps and branches.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# Branch instructions
+#------------------------------------------------------------------------------
+# CHECK:   b 1332                 # encoding: [0x34,0x05,0x00,0x10]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bc1f 1332              # encoding: [0x34,0x05,0x00,0x45]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bc1t 1332              # encoding: [0x34,0x05,0x01,0x45]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   beq $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x11]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bgez $6, 1332          # encoding: [0x34,0x05,0xc1,0x04]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bgezal $6, 1332        # encoding: [0x34,0x05,0xd1,0x04]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bgtz $6, 1332          # encoding: [0x34,0x05,0xc0,0x1c]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   blez $6, 1332          # encoding: [0x34,0x05,0xc0,0x18]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bne $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x15]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   bal     1332           # encoding: [0x34,0x05,0x00,0x04]
+# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+         b 1332
+         nop
+         bc1f 1332
+         nop
+         bc1t 1332
+         nop
+         beq $9,$6,1332
+         nop
+         bgez $6,1332
+         nop
+         bgezal $6,1332
+         nop
+         bgtz $6,1332
+         nop
+         blez $6,1332
+         nop
+         bne $9,$6,1332
+         nop
+         bal 1332
+         nop
+
+end_of_code:
+#------------------------------------------------------------------------------
+# Jump instructions
+#------------------------------------------------------------------------------
+# CHECK:   j 1328               # encoding: [0x30,0x05,0x00,0x08]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jal 1328             # encoding: [0x30,0x05,0x00,0x0c]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+
+
+   j 1328
+   nop
+   jal 1328
+   nop
+   jalr $6
+   nop
+   jr $7
+   nop
+   j $7
diff --git a/test/MC/Mips/mips-memory-instructions.s b/test/MC/Mips/mips-memory-instructions.s
new file mode 100644
index 0000000..b5f1267
--- /dev/null
+++ b/test/MC/Mips/mips-memory-instructions.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for loads and stores.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# Memory store instructions
+#------------------------------------------------------------------------------
+# CHECK:  sb      $4, 16($5)      # encoding: [0x10,0x00,0xa4,0xa0]
+# CHECK:  sc      $4, 16($5)      # encoding: [0x10,0x00,0xa4,0xe0]
+# CHECK:  sh      $4, 16($5)      # encoding: [0x10,0x00,0xa4,0xa4]
+# CHECK:  sw      $4, 16($5)      # encoding: [0x10,0x00,0xa4,0xac]
+# CHECK:  sw      $7,  0($5)      # encoding: [0x00,0x00,0xa7,0xac]
+# CHECK:  swc1    $f2, 16($5)     # encoding: [0x10,0x00,0xa2,0xe4]
+# CHECK:  swl     $4, 16($5)      # encoding: [0x10,0x00,0xa4,0xa8]
+     sb   $4, 16($5)
+     sc   $4, 16($5)
+     sh   $4, 16($5)
+     sw   $4, 16($5)
+     sw   $7,   ($5)
+     swc1 $f2, 16($5)
+     swl  $4, 16($5)
+
+#------------------------------------------------------------------------------
+# Memory load instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  lb  $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x80]
+# CHECK:  lw  $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x8c]
+# CHECK:  lbu $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x90]
+# CHECK:  lh  $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x84]
+# CHECK:  lhu $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x94]
+# CHECK:  ll  $4, 4($5)       # encoding: [0x04,0x00,0xa4,0xc0]
+# CHECK:  lw  $4, 4($5)       # encoding: [0x04,0x00,0xa4,0x8c]
+# CHECK:  lw  $7, 0($7)       # encoding: [0x00,0x00,0xe7,0x8c]
+# CHECK:  lw  $2, 16($sp)     # encoding: [0x10,0x00,0xa2,0x8f]
+
+      lb      $4, 4($5)
+      lw      $4, 4($5)
+      lbu     $4, 4($5)
+      lh      $4, 4($5)
+      lhu     $4, 4($5)
+      ll      $4, 4($5)
+      lw      $4, 4($5)
+      lw      $7,    ($7)
+      lw      $2, 16($sp)
diff --git a/test/MC/Mips/mips-relocations.s b/test/MC/Mips/mips-relocations.s
new file mode 100644
index 0000000..ff71c75
--- /dev/null
+++ b/test/MC/Mips/mips-relocations.s
@@ -0,0 +1,41 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for relocations.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+# CHECK:  lui   $2, %hi(_gp_disp)     # encoding: [A,A,0x02,0x3c]
+# CHECK:                              #   fixup A - offset: 0, value: _gp_disp@ABS_HI, kind: fixup_Mips_HI16
+# CHECK:  addiu $2, $2, %lo(_gp_disp) # encoding: [A,A,0x42,0x24]
+# CHECK:                              #   fixup A - offset: 0, value: _gp_disp@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw    $25, %call16(strchr)($gp)   # encoding: [A,A,0x99,0x8f]
+# CHECK:                                    #   fixup A - offset: 0, value: strchr@GOT_CALL, kind: fixup_Mips_CALL16
+# CHECK:  lw      $3, %got(loop_1)($2)    # encoding: [A,A,0x43,0x8c]
+# CHECK:                                  #   fixup A - offset: 0, value: loop_1@GOT, kind: fixup_Mips_GOT_Local
+# CHECK:  lui     $2, %dtprel_hi(_gp_disp) # encoding: [A,A,0x02,0x3c]
+# CHECK:                                        #   fixup A - offset: 0, value: _gp_disp@DTPREL_HI, kind: fixup_Mips_DTPREL_HI
+# CHECK:  addiu   $2, $2, %dtprel_hi(_gp_disp) # encoding: [A,A,0x42,0x24]
+# CHECK:                                  #   fixup A - offset: 0, value: _gp_disp@DTPREL_HI, kind: fixup_Mips_DTPREL_HI
+# CHECK:  lw      $3, %got(loop_1)($2)      # encoding: [A,A,0x43,0x8c]
+# CHECK:                                    #   fixup A - offset: 0, value: loop_1@GOT, kind: fixup_Mips_GOT_Local
+# CHECK:  lw      $4, %got_disp(loop_2)($3) # encoding: [A,A,0x64,0x8c]
+# CHECK:                                    #   fixup A - offset: 0, value: loop_2@GOT_DISP, kind: fixup_Mips_GOT_DISP
+# CHECK:  lw      $5, %got_page(loop_3)($4) # encoding: [A,A,0x85,0x8c]
+# CHECK:                                    #   fixup A - offset: 0, value: loop_3@GOT_PAGE, kind: fixup_Mips_GOT_PAGE
+# CHECK:  lw      $6, %got_ofst(loop_4)($5) # encoding: [A,A,0xa6,0x8c]
+# CHECK:                                    #   fixup A - offset: 0, value: loop_4@GOT_OFST, kind: fixup_Mips_GOT_OFST
+# CHECK:  lui     $2, %tprel_hi(_gp_disp)   # encoding: [A,A,0x02,0x3c]
+# CHECK:                                    #   fixup A - offset: 0, value: _gp_disp@TPREL_HI, kind: fixup_Mips_TPREL_HI
+# CHECK:  addiu   $2, $2, %tprel_lo(_gp_disp) # encoding: [A,A,0x42,0x24]
+# CHECK:                                      #   fixup A - offset: 0, value: _gp_disp@TPREL_LO, kind: fixup_Mips_TPREL_LO
+
+    lui	$2, %hi(_gp_disp)
+	  addiu	$2, $2, %lo(_gp_disp)
+    lw	$25, %call16(strchr)($gp)
+    lw      $3, %got(loop_1)($2)
+    lui	$2, %dtprel_hi(_gp_disp)
+	  addiu	$2, $2, %dtprel_hi(_gp_disp)
+    lw	$3, %got(loop_1)($2)
+    lw	$4, %got_disp(loop_2)($3)
+    lw	$5, %got_page(loop_3)($4)
+    lw	$6, %got_ofst(loop_4)($5)
+    lui	$2, %tprel_hi(_gp_disp)
+	  addiu	$2, $2, %tprel_lo(_gp_disp)
diff --git a/test/MC/Mips/mips64extins.ll b/test/MC/Mips/mips64extins.ll
new file mode 100644
index 0000000..ebe8f86
--- /dev/null
+++ b/test/MC/Mips/mips64extins.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -mattr=n64 %s -o - \
+; RUN: | llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 - \
+; RUN: | FileCheck %s
+
+define i64 @dext(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 1023
+  ret i64 %and
+}
+
+define i64 @dextu(i64 %i) nounwind readnone {
+entry:
+; CHECK: dextu ${{[0-9]+}}, ${{[0-9]+}}, 2, 6
+  %shr = lshr i64 %i, 34
+  %and = and i64 %shr, 63
+  ret i64 %and
+}
+
+define i64 @dextm(i64 %i) nounwind readnone {
+entry:
+; CHECK: dextm ${{[0-9]+}}, ${{[0-9]+}}, 5, 2
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 17179869183
+  ret i64 %and
+}
+
+define i64 @dins(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10
+  %shl2 = shl i64 %j, 8
+  %and = and i64 %shl2, 261888
+  %and3 = and i64 %i, -261889
+  %or = or i64 %and3, %and
+  ret i64 %or
+}
+
+define i64 @dinsm(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dinsm ${{[0-9]+}}, ${{[0-9]+}}, 10, 1
+  %shl4 = shl i64 %j, 10
+  %and = and i64 %shl4, 8796093021184
+  %and5 = and i64 %i, -8796093021185
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
+
+define i64 @dinsu(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dinsu ${{[0-9]+}}, ${{[0-9]+}}, 8, 13
+  %shl4 = shl i64 %j, 40
+  %and = and i64 %shl4, 9006099743113216
+  %and5 = and i64 %i, -9006099743113217
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
diff --git a/test/MC/Mips/mips64shift.ll b/test/MC/Mips/mips64shift.ll
index 7817b96..99cac7b 100644
--- a/test/MC/Mips/mips64shift.ll
+++ b/test/MC/Mips/mips64shift.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - | llvm-objdump -disassemble -triple mips64el - | FileCheck %s
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -disable-mips-delay-filler %s -o - \
+; RUN: | llvm-objdump -disassemble -triple mips64el - | FileCheck %s 
 
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - \
+; RUN: | llvm-objdump -disassemble -triple mips64el - | FileCheck %s 
 
 define i64 @f3(i64 %a0) nounwind readnone {
 entry:
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
new file mode 100644
index 0000000..f9d8460
--- /dev/null
+++ b/test/MC/Mips/mips_directives.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple mips-unknown-unknown %s
+
+$BB0_2:
+	.frame	$sp,0,$ra
+	.mask 	0x00000000,0
+	.fmask	0x00000000,0
+	.set	noreorder
+	.set	nomacro
+$JTI0_0:
+	.gpword	($BB0_2)
diff --git a/test/MC/Mips/multi-64bit-func.ll b/test/MC/Mips/multi-64bit-func.ll
index 6e0d784..83577aa 100644
--- a/test/MC/Mips/multi-64bit-func.ll
+++ b/test/MC/Mips/multi-64bit-func.ll
@@ -1,8 +1,8 @@
 ; There is no real check here. If the test doesn't 
 ; assert it passes.
-; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s 
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -disable-mips-delay-filler < %s 
 ; Run it again without extra nop in delay slot
-; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -enable-mips-delay-filler < %s 
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s 
 
 define i32 @bosco1(i32 %x) nounwind readnone {
 entry:
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
index ca4afc3..c377e1a 100644
--- a/test/MC/X86/intel-syntax-2.s
+++ b/test/MC/X86/intel-syntax-2.s
@@ -1,7 +1,9 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown  %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=att %s | FileCheck %s
 
 	.intel_syntax
 _test:
 // CHECK:	movl	$257, -4(%rsp)
 	mov	DWORD PTR [RSP - 4], 257
-
+    .att_syntax
+// CHECK:	movl	$257, -4(%rsp)
+    movl $257, -4(%rsp)
+\ No newline at end of file
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 6a2d5bb..03cb62e 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -1164,6 +1164,10 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 // CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
 	movd %rdi,%xmm0
 
+// CHECK: movd  %xmm0, %rax
+// CHECK: encoding: [0x66,0x48,0x0f,0x7e,0xc0]
+        movd  %xmm0, %rax
+
 // CHECK: movntil %eax, (%rdi)
 // CHECK: encoding: [0x0f,0xc3,0x07]
 // CHECK: movntil
diff --git a/test/Makefile b/test/Makefile
index 9ddfabf..ae7a674 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -131,7 +131,7 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
-	@$(ECHOPATH) s,@OCAMLOPT@,$(OCAMLOPT) -cc \\\\\"$(CXX_FOR_OCAMLOPT)\\\\\" -I $(LibDir)/ocaml,g >> lit.tmp
+	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
diff --git a/test/Object/Inputs/dext-test.elf-mips64r2 b/test/Object/Inputs/dext-test.elf-mips64r2
new file mode 100644
index 0000000..59dbaef
--- /dev/null
+++ b/test/Object/Inputs/dext-test.elf-mips64r2
diff --git a/test/Object/Inputs/relocations.elf-x86-64 b/test/Object/Inputs/relocations.elf-x86-64
new file mode 100644
index 0000000..6e340c7
--- /dev/null
+++ b/test/Object/Inputs/relocations.elf-x86-64
diff --git a/test/Object/Mips/feature.test b/test/Object/Mips/feature.test
new file mode 100644
index 0000000..e8da609
--- /dev/null
+++ b/test/Object/Mips/feature.test
@@ -0,0 +1,11 @@
+RUN: llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 %p/../Inputs/dext-test.elf-mips64r2 \
+RUN: | FileCheck %s
+
+CHECK: Disassembly of section .text:
+CHECK: .text:
+CHECK:        0:	08 00 e0 03                                  	jr	$ra
+CHECK:        4:	43 49 82 7c                                  	dext $2, $4, 5, 10
+CHECK:        8:	08 00 e0 03                                  	jr	$ra
+CHECK:        c:	83 28 82 7c                                  	dext $2, $4, 2, 6
+CHECK:       10:	08 00 e0 03                                  	jr	$ra
+CHECK:       14:	43 09 82 7c                                  	dext $2, $4, 5, 2
diff --git a/test/Object/Mips/lit.local.cfg b/test/Object/Mips/lit.local.cfg
new file mode 100644
index 0000000..1499317
--- /dev/null
+++ b/test/Object/Mips/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.test']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index a394a23..6d35a26 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -9,6 +9,9 @@ RUN:              | FileCheck %s -check-prefix ELF-x86-64
 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \
 RUN:              | FileCheck %s -check-prefix ELF-hexagon
 
+RUN: llvm-objdump -r %p/Inputs/relocations.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-complex-x86-64
+
 COFF-i386: .text
 COFF-i386: IMAGE_REL_I386_DIR32 L_.str
 COFF-i386: IMAGE_REL_I386_REL32 _puts
@@ -36,3 +39,13 @@ ELF-hexagon: R_HEX_HI16 puts
 ELF-hexagon: R_HEX_LO16 puts
 ELF-hexagon: R_HEX_B15_PCREL testf
 ELF-hexagon: R_HEX_B22_PCREL puts
+
+ELF-complex-x86-64: .text
+ELF-complex-x86-64-NEXT: R_X86_64_8 .data-4
+ELF-complex-x86-64-NEXT: R_X86_64_16 .data-4
+ELF-complex-x86-64-NEXT: R_X86_64_32 .data-4
+ELF-complex-x86-64-NEXT: R_X86_64_32S .data-4
+ELF-complex-x86-64-NEXT: R_X86_64_64 .data-4
+ELF-complex-x86-64-NEXT: R_X86_64_PC32 .data-4-P
+ELF-complex-x86-64-NEXT: R_X86_64_32 .data+0
+ELF-complex-x86-64-NEXT: R_X86_64_32 .data+4
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 18de368..1d8d623 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -3,15 +3,59 @@
 
 // Support for an `!if' operator as part of a `let' statement.
 // CHECK:      class C
-// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, !if({ C:x{2} }, 0, 1), !if({ C:x{2} }, 1, 1), !if({ C:x{2} }, 0, 0), !if({ C:x{1} }, C:y{3}, 0), !if({ C:x{1} }, C:y{2}, 1), !if({ C:x{0} }, C:y{3}, C:z), !if({ C:x{0} }, C:y{2}, C:y{2}), !if({ C:x{0} }, C:y{1}, C:y{1}), !if({ C:x{0} }, C:y{0}, C:y{0}) };
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, !if({ C:y{3} }, 1, !if({ C:y{2} }, { C:x{0} }, !if({ C:y{1} }, { C:x{1} }, !if({ C:y{0} }, { C:x{2} }, ?)))){0}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){1}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){0}, !if({ C:x{2} }, 2, 6){2}, !if({ C:x{2} }, 2, 6){1}, !if({ C:x{2} }, 2, 6){0}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){1}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){0}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){3}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){2}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){1}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){0} };
 class C<bits<3> x, bits<4> y, bit z> {
   bits<16> n;
 
+  let n{11}  = !if(y{3}, 1,
+               !if(y{2}, x{0},
+               !if(y{1}, x{1},
+               !if(y{0}, x{2}, ?))));
+  let n{10-9}= !if(x{2}, y{3-2},
+               !if(x{1}, y{2-1},
+               !if(x{0}, y{1-0}, ?)));
   let n{8-6} = !if(x{2}, 0b010, 0b110);
   let n{5-4} = !if(x{1}, y{3-2}, {0, 1});
   let n{3-0} = !if(x{0}, y{3-0}, {z, y{2}, y{1}, y{0}});
 }
 
+def C1 : C<{1, 0, 1}, {0, 1, 0, 1}, 0>;
+def C2 : C<{0, 1, 0}, {1, 0, 1, 0}, 1>;
+def C3 : C<{0, 0, 0}, {1, 0, 1, 0}, 0>;
+def C4 : C<{0, 0, 0}, {0, 0, 0, 0}, 0>;
+
+// CHECK: def C1
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 };
+// CHECK: def C2
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0 };
+// CHECK: def C3
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, ?, ?, 1, 1, 0, 0, 1, 0, 0, 1, 0 };
+// CHECK: def C4
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, 1, 1, 0, 0, 1, 0, 0, 0, 0 };
+
+class S<int s> {
+  bits<2> val = !if(!eq(s, 8),  {0, 0},
+                !if(!eq(s, 16), 0b01,
+                !if(!eq(s, 32), 2,
+                !if(!eq(s, 64), {1, 1}, ?))));
+}
+
+def D8  : S<8>;
+def D16 : S<16>;
+def D32 : S<32>;
+def D64 : S<64>;
+def D128: S<128>;
+// CHECK: def D128
+// CHECK-NEXT: bits<2> val = { ?, ? };
+// CHECK: def D16
+// CHECK-NEXT: bits<2> val = { 0, 1 };
+// CHECK: def D32
+// CHECK-NEXT: bits<2> val = { 1, 0 };
+// CHECK: def D64
+// CHECK-NEXT: bits<2> val = { 1, 1 };
+// CHECK: def D8
+// CHECK-NEXT: bits<2> val = { 0, 0 };
+
 // CHECK:      def One
 // CHECK-NEXT: list<int> first = [1, 2, 3];
 // CHECK-NEXT: list<int> rest = [1, 2, 3];
diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td
new file mode 100644
index 0000000..5f3e3da
--- /dev/null
+++ b/test/TableGen/list-element-bitref.td
@@ -0,0 +1,15 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class C<list<bits<8>> L> {
+  bits<2> V0 = L[0]{1-0};
+  bits<2> V1 = L[1]{3-2};
+  string V2 = !if(L[0]{0}, "Odd", "Even");
+}
+
+def c0 : C<[0b0101, 0b1010]>;
+
+// CHECK: def c0
+// CHECk-NEXT: bits<2> V0 = { 0, 1 };
+// CHECk-NEXT: bits<2> V1 = { 1, 0 };
+// CHECk-NEXT: string V2 = "Odd";
diff --git a/test/TableGen/pr8330.td b/test/TableGen/pr8330.td
new file mode 100644
index 0000000..7779b63
--- /dev/null
+++ b/test/TableGen/pr8330.td
@@ -0,0 +1,29 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Or4<bits<8> Val> {
+  bits<8> V = {Val{7}, Val{6}, Val{5}, Val{4}, Val{3}, 1, Val{1}, Val{0} };
+}
+
+class Whatev<bits<8> x>;
+
+class Whatever<bits<8> x> {
+  bits<8> W = {x{0}, x{1}, x{2}, x{3}, x{4}, x{5}, x{6}, x{7} };
+}
+
+multiclass X<bits<8> BaseOpc> {
+ def bar : Whatev<Or4<BaseOpc>.V >;
+}
+
+multiclass Y<bits<8> BaseOpc> {
+ def foo : Whatever<Or4<BaseOpc>.V >;
+}
+
+defm a : X<4>;
+
+// CHECK: def abar
+
+defm b : Y<8>;
+
+// CHECK: def bfoo
+// CHECK-NEXT: bits<8> W = { 0, 0, 1, 1, 0, 0, 0, 0 };
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 7a8cdd5..e0eb90a 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -310,3 +310,17 @@ define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
   store i32 %c, i32* %4, align 4
   ret void
 }
+
+; Check another case like PR13547 where strdup is not like malloc.
+; CHECK: @test25
+; CHECK: load i8
+; CHECK: store i8 0
+; CHECK: store i8 %tmp
+define i8* @test25(i8* %p) nounwind {
+  %p.4 = getelementptr i8* %p, i64 4
+  %tmp = load i8* %p.4, align 1
+  store i8 0, i8* %p.4, align 1
+  %q = call i8* @strdup(i8* %p) nounwind optsize
+  store i8 %tmp, i8* %p.4, align 1
+  ret i8* %q
+}
diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll
new file mode 100644
index 0000000..66b6929
--- /dev/null
+++ b/test/Transforms/GVN/malloc-load-removal.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS
+; PR13694
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+declare i8* @malloc(i64) nounwind
+
+define noalias i8* @test() nounwind uwtable ssp {
+entry:
+  %call = tail call i8* @malloc(i64 100) nounwind
+  %0 = load i8* %call, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i8 0, i8* %call, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i8* %call
+
+; CHECK: @test
+; CHECK-NOT: load
+; CHECK-NOT: icmp
+
+; CHECK_NO_LIBCALLS: @test
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: icmp
+}
diff --git a/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
new file mode 100644
index 0000000..4efaf8c
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; rdar://12182093
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: @udiv400
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @udiv400(i32 %x) {
+entry:
+  %div = lshr i32 %x, 2
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
+
+
+; CHECK: @udiv400_no
+; CHECK: ashr
+; CHECK: div
+; CHECK: ret
+define i32 @udiv400_no(i32 %x) {
+entry:
+  %div = ashr i32 %x, 2
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
+
+; CHECK: @sdiv400_yes
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @sdiv400_yes(i32 %x) {
+entry:
+  %div = lshr i32 %x, 2
+  ; The sign bits of both operands are zero (i.e. we can prove they are
+  ; unsigned inputs), turn this into a udiv.
+  ; Next, optimize this just like sdiv.
+  %div1 = sdiv i32 %div, 100
+  ret i32 %div1
+}
+
+
+; CHECK: @udiv_i80
+; CHECK: udiv i80 %x, 400
+; CHECK: ret
+define i80 @udiv_i80(i80 %x) {
+  %div = lshr i80 %x, 2
+  %div1 = udiv i80 %div, 100
+  ret i80 %div1
+}
+
+define i32 @no_crash_notconst_udiv(i32 %x, i32 %notconst) {
+  %div = lshr i32 %x, %notconst
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 3f22522..2cb970b 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,13 +1,148 @@
 ; RUN: opt < %s -instcombine -S | not grep select
 
-define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) {
- %r = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> zeroinitializer
- %g = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 6, i32 9, i32 1>
- %b = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>,  <4 x i32> zeroinitializer, <4 x i32> <i32 7, i32 1, i32 4, i32 9>
- %a = select <4 x i1> zeroinitializer,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 8, i32 5>
- store <4 x i32> %r, <4 x i32>* %A
- store <4 x i32> %g, <4 x i32>* %B
- store <4 x i32> %b, <4 x i32>* %C
- store <4 x i32> %a, <4 x i32>* %D
+define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
+                 <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
+                 <4 x i32> *%I, <4 x i32> *%J, <4 x i32> *%K, <4 x i32> *%L,
+                 <4 x i32> *%M, <4 x i32> *%N, <4 x i32> *%O, <4 x i32> *%P,
+                 <4 x i32> *%Q, <4 x i32> *%R, <4 x i32> *%S, <4 x i32> *%T,
+                 <4 x i32> *%U, <4 x i32> *%V, <4 x i32> *%W, <4 x i32> *%X,
+                 <4 x i32> *%Y, <4 x i32> *%Z, <4 x i32> *%BA, <4 x i32> *%BB,
+                 <4 x i32> *%BC, <4 x i32> *%BD, <4 x i32> *%BE, <4 x i32> *%BF,
+                 <4 x i32> *%BG, <4 x i32> *%BH, <4 x i32> *%BI, <4 x i32> *%BJ,
+                 <4 x i32> *%BK, <4 x i32> *%BL, <4 x i32> *%BM, <4 x i32> *%BN,
+                 <4 x i32> *%BO, <4 x i32> *%BP, <4 x i32> *%BQ, <4 x i32> *%BR,
+                 <4 x i32> *%BS, <4 x i32> *%BT, <4 x i32> *%BU, <4 x i32> *%BV,
+                 <4 x i32> *%BW, <4 x i32> *%BX, <4 x i32> *%BY, <4 x i32> *%BZ,
+                 <4 x i32> *%CA, <4 x i32> *%CB, <4 x i32> *%CC, <4 x i32> *%CD,
+                 <4 x i32> *%CE, <4 x i32> *%CF, <4 x i32> *%CG, <4 x i32> *%CH,
+                 <4 x i32> *%CI, <4 x i32> *%CJ, <4 x i32> *%CK, <4 x i32> *%CL) {
+ %a = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 9, i32 87, i32 57, i32 8>
+ %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 99, i32 49, i32 29>
+ %c = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 18, i32 53, i32 84>
+ %d = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 29, i32 82, i32 45, i32 16>
+ %e = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 11, i32 15, i32 32, i32 99>
+ %f = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 19, i32 86, i32 29, i32 33>
+ %g = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 10, i32 26, i32 45>
+ %h = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 88, i32 70, i32 90, i32 48>
+ %i = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 30, i32 53, i32 42, i32 12>
+ %j = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 46, i32 24, i32 93, i32 26>
+ %k = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 33, i32 99, i32 15, i32 57>
+ %l = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 51, i32 60, i32 60, i32 50>
+ %m = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 50, i32 12, i32 7, i32 45>
+ %n = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 65, i32 36, i32 36>
+ %o = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 54, i32 0, i32 17, i32 78>
+ %p = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 56, i32 13, i32 64, i32 48>
+ %q = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 52, i32 69, i32 88, i32 11>, <4 x i32> zeroinitializer
+ %r = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 5, i32 87, i32 68, i32 14>, <4 x i32> zeroinitializer
+ %s = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 47, i32 17, i32 66, i32 63>, <4 x i32> zeroinitializer
+ %t = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 64, i32 25, i32 73, i32 81>, <4 x i32> zeroinitializer
+ %u = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 51, i32 41, i32 61, i32 63>, <4 x i32> zeroinitializer
+ %v = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 39, i32 59, i32 17, i32 0>, <4 x i32> zeroinitializer
+ %w = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 91, i32 99, i32 97, i32 29>, <4 x i32> zeroinitializer
+ %x = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 89, i32 45, i32 89, i32 10>, <4 x i32> zeroinitializer
+ %y = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 25, i32 70, i32 21, i32 27>, <4 x i32> zeroinitializer
+ %z = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 40, i32 12, i32 27, i32 88>, <4 x i32> zeroinitializer
+ %ba = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 36, i32 35, i32 90, i32 23>, <4 x i32> zeroinitializer
+ %bb = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 83, i32 3, i32 64, i32 82>, <4 x i32> zeroinitializer
+ %bc = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 15, i32 72, i32 2, i32 54>, <4 x i32> zeroinitializer
+ %bd = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 32, i32 47, i32 100, i32 84>, <4 x i32> zeroinitializer
+ %be = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 92, i32 57, i32 82, i32 1>, <4 x i32> zeroinitializer
+ %bf = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 42, i32 14, i32 22, i32 89>, <4 x i32> zeroinitializer
+ %bg = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 33, i32 10, i32 67, i32 66>, <4 x i32> <i32 42, i32 91, i32 47, i32 40>
+ %bh = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 8, i32 13, i32 48, i32 0>, <4 x i32> <i32 84, i32 66, i32 87, i32 84>
+ %bi = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 85, i32 96, i32 1, i32 94>, <4 x i32> <i32 54, i32 57, i32 7, i32 92>
+ %bj = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 55, i32 21, i32 92, i32 68>, <4 x i32> <i32 51, i32 61, i32 62, i32 39>
+ %bk = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 42, i32 18, i32 77, i32 74>, <4 x i32> <i32 82, i32 33, i32 30, i32 7>
+ %bl = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 80, i32 92, i32 61, i32 84>, <4 x i32> <i32 43, i32 89, i32 92, i32 6>
+ %bm = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 49, i32 14, i32 62, i32 62>, <4 x i32> <i32 35, i32 33, i32 92, i32 59>
+ %bn = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 3, i32 97, i32 49, i32 18>, <4 x i32> <i32 56, i32 64, i32 19, i32 75>
+ %bo = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 91, i32 57, i32 0, i32 1>, <4 x i32> <i32 43, i32 63, i32 64, i32 11>
+ %bp = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 41, i32 65, i32 18, i32 11>, <4 x i32> <i32 86, i32 26, i32 31, i32 3>
+ %bq = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 31, i32 46, i32 32, i32 68>, <4 x i32> <i32 100, i32 59, i32 62, i32 6>
+ %br = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 76, i32 67, i32 87, i32 7>, <4 x i32> <i32 63, i32 48, i32 97, i32 24>
+ %bs = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 83, i32 89, i32 19, i32 4>, <4 x i32> <i32 21, i32 2, i32 40, i32 21>
+ %bt = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 45, i32 76, i32 81, i32 100>, <4 x i32> <i32 65, i32 26, i32 100, i32 46>
+ %bu = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 16, i32 75, i32 31, i32 17>, <4 x i32> <i32 37, i32 66, i32 86, i32 65>
+ %bv = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 13, i32 25, i32 43, i32 59>, <4 x i32> <i32 82, i32 78, i32 60, i32 52>
+ %bw = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bx = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %by = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bz = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ca = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cb = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cc = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cd = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ce = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cf = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cg = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ch = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ci = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cj = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ck = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cl = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ store <4 x i32> %a, <4 x i32>* %A
+ store <4 x i32> %b, <4 x i32>* %B
+ store <4 x i32> %c, <4 x i32>* %C
+ store <4 x i32> %d, <4 x i32>* %D
+ store <4 x i32> %e, <4 x i32>* %E
+ store <4 x i32> %f, <4 x i32>* %F
+ store <4 x i32> %g, <4 x i32>* %G
+ store <4 x i32> %h, <4 x i32>* %H
+ store <4 x i32> %i, <4 x i32>* %I
+ store <4 x i32> %j, <4 x i32>* %J
+ store <4 x i32> %k, <4 x i32>* %K
+ store <4 x i32> %l, <4 x i32>* %L
+ store <4 x i32> %m, <4 x i32>* %M
+ store <4 x i32> %n, <4 x i32>* %N
+ store <4 x i32> %o, <4 x i32>* %O
+ store <4 x i32> %p, <4 x i32>* %P
+ store <4 x i32> %q, <4 x i32>* %Q
+ store <4 x i32> %r, <4 x i32>* %R
+ store <4 x i32> %s, <4 x i32>* %S
+ store <4 x i32> %t, <4 x i32>* %T
+ store <4 x i32> %u, <4 x i32>* %U
+ store <4 x i32> %v, <4 x i32>* %V
+ store <4 x i32> %w, <4 x i32>* %W
+ store <4 x i32> %x, <4 x i32>* %X
+ store <4 x i32> %y, <4 x i32>* %Y
+ store <4 x i32> %z, <4 x i32>* %Z
+ store <4 x i32> %ba, <4 x i32>* %BA
+ store <4 x i32> %bb, <4 x i32>* %BB
+ store <4 x i32> %bc, <4 x i32>* %BC
+ store <4 x i32> %bd, <4 x i32>* %BD
+ store <4 x i32> %be, <4 x i32>* %BE
+ store <4 x i32> %bf, <4 x i32>* %BF
+ store <4 x i32> %bg, <4 x i32>* %BG
+ store <4 x i32> %bh, <4 x i32>* %BH
+ store <4 x i32> %bi, <4 x i32>* %BI
+ store <4 x i32> %bj, <4 x i32>* %BJ
+ store <4 x i32> %bk, <4 x i32>* %BK
+ store <4 x i32> %bl, <4 x i32>* %BL
+ store <4 x i32> %bm, <4 x i32>* %BM
+ store <4 x i32> %bn, <4 x i32>* %BN
+ store <4 x i32> %bo, <4 x i32>* %BO
+ store <4 x i32> %bp, <4 x i32>* %BP
+ store <4 x i32> %bq, <4 x i32>* %BQ
+ store <4 x i32> %br, <4 x i32>* %BR
+ store <4 x i32> %bs, <4 x i32>* %BS
+ store <4 x i32> %bt, <4 x i32>* %BT
+ store <4 x i32> %bu, <4 x i32>* %BU
+ store <4 x i32> %bv, <4 x i32>* %BV
+ store <4 x i32> %bw, <4 x i32>* %BW
+ store <4 x i32> %bx, <4 x i32>* %BX
+ store <4 x i32> %by, <4 x i32>* %BY
+ store <4 x i32> %bz, <4 x i32>* %BZ
+ store <4 x i32> %ca, <4 x i32>* %CA
+ store <4 x i32> %cb, <4 x i32>* %CB
+ store <4 x i32> %cc, <4 x i32>* %CC
+ store <4 x i32> %cd, <4 x i32>* %CD
+ store <4 x i32> %ce, <4 x i32>* %CE
+ store <4 x i32> %cf, <4 x i32>* %CF
+ store <4 x i32> %cg, <4 x i32>* %CG
+ store <4 x i32> %ch, <4 x i32>* %CH
+ store <4 x i32> %ci, <4 x i32>* %CI
+ store <4 x i32> %cj, <4 x i32>* %CJ
+ store <4 x i32> %ck, <4 x i32>* %CK
+ store <4 x i32> %cl, <4 x i32>* %CL
  ret void
 }
diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
index d95e8f8..74f2fdd 100644
--- a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
+++ b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
@@ -6,9 +6,9 @@
 ; The udiv instructions shouldn't be optimized away, and the
 ; sext instructions should be optimized to zext.
 
-define i64 @bar(i32 %x) nounwind {
+define i64 @bar(i32 %x, i32 %g) nounwind {
   %y = lshr i32 %x, 30
-  %r = udiv i32 %y, 3
+  %r = udiv i32 %y, %g
   %z = sext i32 %r to i64
   ret i64 %z
 }
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 8a81857..9676efe 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -121,3 +121,39 @@ L4:
   call void @quux()
   br label %L0
 }
+
+; Make sure the edge value of %0 from entry to L2 includes 0 and L3 is
+; reachable.
+; CHECK: test_switch_default
+; CHECK: entry:
+; CHECK: load
+; CHECK: switch
+; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
+; CHECK: store
+; CHECK: br
+; CHECK: L2:
+; CHECK: icmp
+define void @test_switch_default(i32* nocapture %status) nounwind {
+entry:
+  %0 = load i32* %status, align 4
+  switch i32 %0, label %L2 [
+    i32 5061, label %L1
+    i32 0, label %L2
+  ]
+
+L1:
+  store i32 10025, i32* %status, align 4
+  br label %L2
+
+L2:
+  %1 = load i32* %status, align 4
+  %cmp57.i = icmp eq i32 %1, 0
+  br i1 %cmp57.i, label %L3, label %L4
+
+L3:
+  store i32 10000, i32* %status, align 4
+  br label %L4
+
+L4:
+  ret void
+}
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 6f28d53..98f9334 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -29,7 +29,7 @@ Out:		; preds = %LoopTail
 }
 
 
-declare void @foo2(i32)
+declare void @foo2(i32) nounwind
 
 
 ;; It is ok and desirable to hoist this potentially trapping instruction.
@@ -64,3 +64,29 @@ Out:		; preds = %Loop
 	%C = sub i32 %A, %B		; <i32> [#uses=1]
 	ret i32 %C
 }
+
+; CHECK: @test4
+; CHECK: call
+; CHECK: sdiv
+; CHECK: ret
+define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  call void @foo_may_call_exit(i32 0)
+  %div = sdiv i32 %x, %y
+  %add = add nsw i32 %n.01, %div
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %n.0.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %n.0.lcssa
+}
+
+declare void @foo_may_call_exit(i32)
+
diff --git a/test/Transforms/LoopRotate/multiple-exits.ll b/test/Transforms/LoopRotate/multiple-exits.ll
new file mode 100644
index 0000000..675d71f
--- /dev/null
+++ b/test/Transforms/LoopRotate/multiple-exits.ll
@@ -0,0 +1,236 @@
+; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; PR7447
+define i32 @test1([100 x i32]* nocapture %a) nounwind readonly {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond1, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
+  %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
+  br i1 %i.0, label %for.cond1, label %return
+
+for.cond1:                                        ; preds = %for.cond, %land.rhs
+  %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
+  %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
+  %cmp2 = icmp ult i32 %i.1, 100
+  br i1 %cmp2, label %land.rhs, label %for.cond
+
+land.rhs:                                         ; preds = %for.cond1
+  %conv = zext i32 %i.1 to i64
+  %arrayidx = getelementptr inbounds [100 x i32]* %a, i64 0, i64 %conv
+  %0 = load i32* %arrayidx, align 4
+  %add = add i32 %0, %sum.1
+  %cmp4 = icmp ugt i32 %add, 1000
+  %inc = add i32 %i.1, 1
+  br i1 %cmp4, label %return, label %for.cond1
+
+return:                                           ; preds = %for.cond, %land.rhs
+  %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
+  ret i32 %retval.0
+
+; CHECK: @test1
+; CHECK: for.cond1.preheader:
+; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
+; CHECK: br label %for.cond1
+
+; CHECK: for.cond1:
+; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
+; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
+; CHECK: %cmp2 = icmp ult i32 %i.1, 100
+; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
+}
+
+define void @test2(i32 %x) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
+  %cmp = icmp eq i32 %i.0, %x
+  br i1 %cmp, label %return.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %call = tail call i32 @foo(i32 %i.0) nounwind
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %if.end, label %a
+
+if.end:                                           ; preds = %for.body
+  %call1 = tail call i32 @foo(i32 42) nounwind
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+a:                                                ; preds = %for.body
+  %call2 = tail call i32 @bar(i32 1) nounwind
+  br label %return
+
+return.loopexit:                                  ; preds = %for.cond
+  br label %return
+
+return:                                           ; preds = %return.loopexit, %a
+  ret void
+
+; CHECK: @test2
+; CHECK: if.end:
+; CHECK: %inc = add i32 %i.02, 1
+; CHECK: %cmp = icmp eq i32 %inc, %x
+; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
+}
+
+declare i32 @foo(i32)
+
+declare i32 @bar(i32)
+
+@_ZTIi = external constant i8*
+
+; Verify dominators.
+define void @test3(i32 %x) {
+entry:
+  %cmp2 = icmp eq i32 0, %x
+  br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  invoke void @_Z3fooi(i32 %i.03)
+          to label %for.inc unwind label %lpad
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.03, 1
+  %cmp = icmp eq i32 %inc, %x
+  br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
+
+lpad:                                             ; preds = %for.body
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %4 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
+
+for.body.i.lr.ph:                                 ; preds = %catch
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.lr.ph, %for.inc.i
+  %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
+  invoke void @_Z3fooi(i32 %i.0.i1)
+          to label %for.inc.i unwind label %lpad.i
+
+for.inc.i:                                        ; preds = %for.body.i
+  %inc.i = add i32 %i.0.i1, 1
+  %cmp.i = icmp eq i32 %inc.i, 0
+  br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
+
+lpad.i:                                           ; preds = %for.body.i
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %6 = extractvalue { i8*, i32 } %5, 0
+  %7 = extractvalue { i8*, i32 } %5, 1
+  %matches.i = icmp eq i32 %7, %3
+  br i1 %matches.i, label %catch.i, label %lpad1.body
+
+catch.i:                                          ; preds = %lpad.i
+  %8 = tail call i8* @__cxa_begin_catch(i8* %6) nounwind
+  invoke void @test3(i32 0)
+          to label %invoke.cont2.i unwind label %lpad1.i
+
+invoke.cont2.i:                                   ; preds = %catch.i
+  tail call void @__cxa_end_catch() nounwind
+  br label %invoke.cont2
+
+lpad1.i:                                          ; preds = %catch.i
+  %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %10 = extractvalue { i8*, i32 } %9, 0
+  %11 = extractvalue { i8*, i32 } %9, 1
+  tail call void @__cxa_end_catch() nounwind
+  br label %lpad1.body
+
+for.cond.i.invoke.cont2.loopexit_crit_edge:       ; preds = %for.inc.i
+  br label %invoke.cont2.loopexit
+
+invoke.cont2.loopexit:                            ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
+  br label %invoke.cont2
+
+invoke.cont2:                                     ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
+  tail call void @__cxa_end_catch() nounwind
+  br label %try.cont
+
+for.cond.try.cont.loopexit_crit_edge:             ; preds = %for.inc
+  br label %try.cont.loopexit
+
+try.cont.loopexit:                                ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
+  br label %try.cont
+
+try.cont:                                         ; preds = %try.cont.loopexit, %invoke.cont2
+  ret void
+
+lpad1.body:                                       ; preds = %lpad1.i, %lpad.i
+  %exn.slot.0.i = phi i8* [ %10, %lpad1.i ], [ %6, %lpad.i ]
+  %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
+  tail call void @__cxa_end_catch() nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad1.body, %lpad
+  %exn.slot.0 = phi i8* [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
+  %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val5
+}
+
+declare void @_Z3fooi(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+define void @test4() nounwind uwtable {
+entry:
+  br label %"7"
+
+"3":                                              ; preds = %"7"
+  br i1 undef, label %"31", label %"4"
+
+"4":                                              ; preds = %"3"
+  %. = select i1 undef, float 0x3F50624DE0000000, float undef
+  %0 = add i32 %1, 1
+  br label %"7"
+
+"7":                                              ; preds = %"4", %entry
+  %1 = phi i32 [ %0, %"4" ], [ 0, %entry ]
+  %2 = icmp slt i32 %1, 100
+  br i1 %2, label %"3", label %"8"
+
+"8":                                              ; preds = %"7"
+  br i1 undef, label %"9", label %"31"
+
+"9":                                              ; preds = %"8"
+  br label %"33"
+
+"27":                                             ; preds = %"31"
+  unreachable
+
+"31":                                             ; preds = %"8", %"3"
+  br i1 undef, label %"27", label %"32"
+
+"32":                                             ; preds = %"31"
+  br label %"33"
+
+"33":                                             ; preds = %"32", %"9"
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 0a7ba5d..7b64b1b 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
+; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
 
@@ -1498,7 +1498,7 @@ define i8* @test49(i8* %p) nounwind {
 }
 
 ; Do delete retain+release with intervening stores of the
-; address value;
+; address value.
 
 ; CHECK: define void @test50(
 ; CHECK-NOT: @objc_
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index a618a21..32be03e 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -16,6 +16,10 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 declare void @use(i8*)
 declare void @objc_release(i8*)
+declare i8* @def()
+declare void @__crasher_block_invoke(i8* nocapture)
+declare i8* @objc_retainBlock(i8*)
+declare void @__crasher_block_invoke1(i8* nocapture)
 
 !0 = metadata !{}
 
@@ -279,11 +283,13 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because isn't isn't sophisticated enough in
+; reasnoning about nesting.
 
 ; CHECK: define void @test6(
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
 ; CHECK: }
 define void @test6() nounwind {
 entry:
@@ -345,11 +351,13 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because isn't isn't sophisticated enough in
+; reasnoning about nesting.
 
 ; CHECK: define void @test7(
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
 ; CHECK: }
 define void @test7() nounwind {
 entry:
@@ -553,12 +561,12 @@ forcoll.empty:
   ret void
 }
 
-; Like test9, but without a split backedge. This we can optimize.
+; Like test9, but without a split backedge. TODO: optimize this.
 
 ; CHECK: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
 ; CHECK: }
 define void @test9b() nounwind {
 entry:
@@ -687,12 +695,12 @@ forcoll.empty:
   ret void
 }
 
-; Like test10, but without a split backedge. This we can optimize.
+; Like test10, but without a split backedge. TODO: optimize this.
 
 ; CHECK: define void @test10b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
 ; CHECK: }
 define void @test10b() nounwind {
 entry:
@@ -751,3 +759,64 @@ forcoll.empty:
   call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; Pointers to strong pointers can obscure provenance relationships. Be conservative
+; in the face of escaping pointers. rdar://12150909.
+
+%struct.__block_d = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_d_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+@__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; CHECK: define void @test11(
+; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
+; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test11() {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8
+  %block9 = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8
+  %call = call i8* @def(), !clang.arc.no_objc_arc_exceptions !0
+  %foo = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 5
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__crasher_block_invoke to i8*), i8** %block.invoke, align 8
+  %block.d = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp to %struct.__block_d*), %struct.__block_d** %block.d, align 8
+  %foo2 = tail call i8* @objc_retain(i8* %call) nounwind
+  store i8* %foo2, i8** %foo, align 8
+  %foo4 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block to i8*
+  %foo5 = call i8* @objc_retainBlock(i8* %foo4) nounwind
+  call void @use(i8* %foo5), !clang.arc.no_objc_arc_exceptions !0
+  call void @objc_release(i8* %foo5) nounwind
+  %strongdestroy = load i8** %foo, align 8
+  call void @objc_release(i8* %strongdestroy) nounwind, !clang.imprecise_release !0
+  %foo10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 5
+  %block.isa11 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa11, align 8
+  %block.flags12 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags12, align 8
+  %block.reserved13 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 2
+  store i32 0, i32* %block.reserved13, align 4
+  %block.invoke14 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__crasher_block_invoke1 to i8*), i8** %block.invoke14, align 8
+  %block.d15 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 4
+  store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp5 to %struct.__block_d*), %struct.__block_d** %block.d15, align 8
+  %foo18 = call i8* @objc_retain(i8* %call) nounwind
+  store i8* %call, i8** %foo10, align 8
+  %foo20 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9 to i8*
+  %foo21 = call i8* @objc_retainBlock(i8* %foo20) nounwind
+  call void @use(i8* %foo21), !clang.arc.no_objc_arc_exceptions !0
+  call void @objc_release(i8* %foo21) nounwind
+  %strongdestroy25 = load i8** %foo10, align 8
+  call void @objc_release(i8* %strongdestroy25) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
new file mode 100644
index 0000000..53d5448
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
@@ -0,0 +1,37 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+; This test case was written to trigger an incorrect assert statement in
+; -simplifycfg.  Thus we don't actually want to check the output, just that
+; -simplifycfg ran successfully.  Thus we only check that the function still
+; exists, and that it still calls foo().
+;
+; NOTE: There are some obviously dead blocks and missing branch weight
+;       metadata.  Both of these features were key to triggering the assert.
+;       Additionally, the not-taken weight of the branch with a weight had to
+;       be 0 to trigger the assert.
+
+declare void @foo() nounwind uwtable
+
+define void @func(i32 %A) nounwind uwtable {
+; CHECK: define void @func
+entry:
+  %cmp11 = icmp eq i32 %A, 1
+  br i1 %cmp11, label %if.then, label %if.else, !prof !0
+
+if.then:
+  call void @foo()
+; CHECK: call void @foo()
+  br label %if.else
+
+if.else:
+  %cmp17 = icmp eq i32 %A, 2
+  br i1 %cmp17, label %if.then2, label %if.end
+
+if.then2:
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 0}
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 546cc75..b28e4a4 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -141,8 +141,9 @@ UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
         ret i1 %UnifiedRetVal
         
 ; CHECK: @test6
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+; CHECK: %switch.tableidx = sub i32 %tmp.2.i, 14
+; CHECK: %0 = icmp ult i32 %switch.tableidx, 6
+; CHECK: select i1 %0, i1 true, i1 false
 }
 
 define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
new file mode 100644
index 0000000..414da93
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
@@ -0,0 +1,140 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The table for @f
+; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
+
+; The int table for @h
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
+
+; The float table for @h
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+
+; The table for @foostring
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+
+; A simple int-to-int selection switch.
+; It is dense enough to be replaced by table lookup.
+; The result is directly by a ret from an otherwise empty bb,
+; so we return early, directly from the lookup bb.
+
+define i32 @f(i32 %c) nounwind uwtable readnone {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+    i32 47, label %sw.bb5
+    i32 48, label %sw.bb6
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+  %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i32 %retval.0
+
+; CHECK: @f
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: return:
+; CHECK-NEXT: ret i32 15
+}
+
+; A switch used to initialize two variables, an i8 and a float.
+
+declare void @dummy(i8 signext, float)
+define void @h(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.epilog
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+
+sw.epilog:
+  %a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
+  %b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ]
+  call void @dummy(i8 signext %a.0, float %b.0)
+  ret void
+
+; CHECK: @h
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8* %switch.gep
+; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
+; CHECK-NEXT: br label %sw.epilog
+; CHECK: sw.epilog:
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
+; CHECK-NEXT: ret void
+}
+
+
+; Switch used to return a string.
+
+@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
+@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
+@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
+@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
+@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
+
+define i8* @foostring(i32 %x)  {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+
+return:
+  %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
+                      [ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+                      [ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+                      [ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+                      [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ]
+  ret i8* %retval.0
+
+; CHECK: @foostring
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8** %switch.gep
+; CHECK-NEXT: ret i8* %switch.load
+}
diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
new file mode 100644
index 0000000..b4ab8b4
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
@@ -0,0 +1,333 @@
+; RUN: opt  < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @acos_test(float %f) nounwind readnone {
+; CHECK: acos_test
+    %conv = fpext float %f to double
+    %call = call double @acos(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @acosf(float %f)
+}
+
+define double @acos_test2(float %f) nounwind readnone {
+; CHECK: acos_test2
+    %conv = fpext float %f to double
+    %call = call double @acos(double %conv)
+    ret double %call
+; CHECK: call double @acos(double %conv)
+}
+
+define float @acosh_test(float %f) nounwind readnone {
+; CHECK: acosh_test
+    %conv = fpext float %f to double
+    %call = call double @acosh(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @acoshf(float %f)
+}
+
+define double @acosh_test2(float %f) nounwind readnone {
+; CHECK: acosh_test2
+    %conv = fpext float %f to double
+    %call = call double @acosh(double %conv)
+    ret double %call
+; CHECK: call double @acosh(double %conv)
+}
+
+define float @asin_test(float %f) nounwind readnone {
+; CHECK: asin_test
+    %conv = fpext float %f to double
+    %call = call double @asin(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @asinf(float %f)
+}
+
+define double @asin_test2(float %f) nounwind readnone {
+; CHECK: asin_test2
+    %conv = fpext float %f to double
+    %call = call double @asin(double %conv)
+    ret double %call
+; CHECK: call double @asin(double %conv)
+}
+
+define float @asinh_test(float %f) nounwind readnone {
+; CHECK: asinh_test
+    %conv = fpext float %f to double
+    %call = call double @asinh(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @asinhf(float %f)
+}
+
+define double @asinh_test2(float %f) nounwind readnone {
+; CHECK: asinh_test2
+    %conv = fpext float %f to double
+    %call = call double @asinh(double %conv)
+    ret double %call
+; CHECK: call double @asinh(double %conv)
+}
+
+define float @atan_test(float %f) nounwind readnone {
+; CHECK: atan_test
+    %conv = fpext float %f to double
+    %call = call double @atan(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @atanf(float %f)
+}
+
+define double @atan_test2(float %f) nounwind readnone {
+; CHECK: atan_test2
+    %conv = fpext float %f to double
+    %call = call double @atan(double %conv)
+    ret double %call
+; CHECK: call double @atan(double %conv)
+}
+define float @atanh_test(float %f) nounwind readnone {
+; CHECK: atanh_test
+    %conv = fpext float %f to double
+    %call = call double @atanh(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @atanhf(float %f)
+}
+
+define double @atanh_test2(float %f) nounwind readnone {
+; CHECK: atanh_test2
+    %conv = fpext float %f to double
+    %call = call double @atanh(double %conv)
+    ret double %call
+; CHECK: call double @atanh(double %conv)
+}
+define float @cbrt_test(float %f) nounwind readnone {
+; CHECK: cbrt_test
+    %conv = fpext float %f to double
+    %call = call double @cbrt(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @cbrtf(float %f)
+}
+
+define double @cbrt_test2(float %f) nounwind readnone {
+; CHECK: cbrt_test2
+    %conv = fpext float %f to double
+    %call = call double @cbrt(double %conv)
+    ret double %call
+; CHECK: call double @cbrt(double %conv)
+}
+define float @exp_test(float %f) nounwind readnone {
+; CHECK: exp_test
+    %conv = fpext float %f to double
+    %call = call double @exp(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @expf(float %f)
+}
+
+define double @exp_test2(float %f) nounwind readnone {
+; CHECK: exp_test2
+    %conv = fpext float %f to double
+    %call = call double @exp(double %conv)
+    ret double %call
+; CHECK: call double @exp(double %conv)
+}
+define float @expm1_test(float %f) nounwind readnone {
+; CHECK: expm1_test
+    %conv = fpext float %f to double
+    %call = call double @expm1(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @expm1f(float %f)
+}
+
+define double @expm1_test2(float %f) nounwind readnone {
+; CHECK: expm1_test2
+    %conv = fpext float %f to double
+    %call = call double @expm1(double %conv)
+    ret double %call
+; CHECK: call double @expm1(double %conv)
+}
+define float @exp10_test(float %f) nounwind readnone {
+; CHECK: exp10_test
+    %conv = fpext float %f to double
+    %call = call double @exp10(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @exp10f(float %f)
+}
+
+define double @exp10_test2(float %f) nounwind readnone {
+; CHECK: exp10_test2
+    %conv = fpext float %f to double
+    %call = call double @exp10(double %conv)
+    ret double %call
+; CHECK: call double @exp10(double %conv)
+}
+define float @log_test(float %f) nounwind readnone {
+; CHECK: log_test
+    %conv = fpext float %f to double
+    %call = call double @log(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @logf(float %f)
+}
+
+define double @log_test2(float %f) nounwind readnone {
+; CHECK: log_test2
+    %conv = fpext float %f to double
+    %call = call double @log(double %conv)
+    ret double %call
+; CHECK: call double @log(double %conv)
+}
+define float @log10_test(float %f) nounwind readnone {
+; CHECK: log10_test
+    %conv = fpext float %f to double
+    %call = call double @log10(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @log10f(float %f)
+}
+
+define double @log10_test2(float %f) nounwind readnone {
+; CHECK: log10_test2
+    %conv = fpext float %f to double
+    %call = call double @log10(double %conv)
+    ret double %call
+; CHECK: call double @log10(double %conv)
+}
+define float @log1p_test(float %f) nounwind readnone {
+; CHECK: log1p_test
+    %conv = fpext float %f to double
+    %call = call double @log1p(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @log1pf(float %f)
+}
+
+define double @log1p_test2(float %f) nounwind readnone {
+; CHECK: log1p_test2
+    %conv = fpext float %f to double
+    %call = call double @log1p(double %conv)
+    ret double %call
+; CHECK: call double @log1p(double %conv)
+}
+define float @log2_test(float %f) nounwind readnone {
+; CHECK: log2_test
+    %conv = fpext float %f to double
+    %call = call double @log2(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @log2f(float %f)
+}
+
+define double @log2_test2(float %f) nounwind readnone {
+; CHECK: log2_test2
+    %conv = fpext float %f to double
+    %call = call double @log2(double %conv)
+    ret double %call
+; CHECK: call double @log2(double %conv)
+}
+define float @logb_test(float %f) nounwind readnone {
+; CHECK: logb_test
+    %conv = fpext float %f to double
+    %call = call double @logb(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @logbf(float %f)
+}
+
+define double @logb_test2(float %f) nounwind readnone {
+; CHECK: logb_test2
+    %conv = fpext float %f to double
+    %call = call double @logb(double %conv)
+    ret double %call
+; CHECK: call double @logb(double %conv)
+}
+define float @sin_test(float %f) nounwind readnone {
+; CHECK: sin_test
+    %conv = fpext float %f to double
+    %call = call double @sin(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @sinf(float %f)
+}
+
+define double @sin_test2(float %f) nounwind readnone {
+; CHECK: sin_test2
+    %conv = fpext float %f to double
+    %call = call double @sin(double %conv)
+    ret double %call
+; CHECK: call double @sin(double %conv)
+}
+define float @sqrt_test(float %f) nounwind readnone {
+; CHECK: sqrt_test
+    %conv = fpext float %f to double
+    %call = call double @sqrt(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @sqrtf(float %f)
+}
+
+define double @sqrt_test2(float %f) nounwind readnone {
+; CHECK: sqrt_test2
+    %conv = fpext float %f to double
+    %call = call double @sqrt(double %conv)
+    ret double %call
+; CHECK: call double @sqrt(double %conv)
+}
+define float @tan_test(float %f) nounwind readnone {
+; CHECK: tan_test
+    %conv = fpext float %f to double
+    %call = call double @tan(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @tanf(float %f)
+}
+
+define double @tan_test2(float %f) nounwind readnone {
+; CHECK: tan_test2
+    %conv = fpext float %f to double
+    %call = call double @tan(double %conv)
+    ret double %call
+; CHECK: call double @tan(double %conv)
+}
+define float @tanh_test(float %f) nounwind readnone {
+; CHECK: tanh_test
+    %conv = fpext float %f to double
+    %call = call double @tanh(double %conv)
+    %conv1 = fptrunc double %call to float
+    ret float %conv1
+; CHECK: call float @tanhf(float %f)
+}
+
+define double @tanh_test2(float %f) nounwind readnone {
+; CHECK: tanh_test2
+    %conv = fpext float %f to double
+    %call = call double @tanh(double %conv)
+    ret double %call
+; CHECK: call double @tanh(double %conv)
+}
+
+declare double @tanh(double) nounwind readnone
+declare double @tan(double) nounwind readnone
+declare double @sqrt(double) nounwind readnone
+declare double @sin(double) nounwind readnone
+declare double @log2(double) nounwind readnone
+declare double @log1p(double) nounwind readnone
+declare double @log10(double) nounwind readnone
+declare double @log(double) nounwind readnone
+declare double @logb(double) nounwind readnone
+declare double @exp10(double) nounwind readnone
+declare double @expm1(double) nounwind readnone
+declare double @exp(double) nounwind readnone
+declare double @cbrt(double) nounwind readnone
+declare double @atanh(double) nounwind readnone
+declare double @atan(double) nounwind readnone
+declare double @acos(double) nounwind readnone
+declare double @acosh(double) nounwind readnone
+declare double @asin(double) nounwind readnone
+declare double @asinh(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
new file mode 100644
index 0000000..aecb887
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
@@ -0,0 +1,179 @@
+; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @test1(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @ceil(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test1
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test2(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @fabs(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test2
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test3(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @floor(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test3
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test4(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @nearbyint(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test4
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test5(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @rint(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test5
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test6(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @round(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test6
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test7(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @trunc(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test7
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+define i32 @test8(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @ceil(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test8
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test9(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fabs(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test9
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test10(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @floor(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test10
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test11(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @nearbyint(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test11
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test12(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @rint(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test12
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test13(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @round(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test13
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test14(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @trunc(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK: @test14
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+declare double @fabs(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @nearbyint(double) nounwind readnone
+declare double @rint(double) nounwind readnone
+declare double @round(double) nounwind readnone
+declare double @trunc(double) nounwind readnone