6 files changed, 101 insertions, 31 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 7f299aa..0198908 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -8,8 +8,9 @@
 define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
 entry:
 ; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
 ; CHECK-NOT: sub sp
-; CHECK: vpush 
+; CHECK: push 
 	%predicted_block = alloca [4 x [4 x i32]], align 4		; <[4 x [4 x i32]]*> [#uses=1]
 	br label %cond_next489
 
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 707f051..75428ac 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -4,27 +4,40 @@
 ; micro-coded and would have long issue latency even if predicated on
 ; false predicate.
 
-%0 = type { float, float, float, float }
-%pln = type { %vec, float }
-%vec = type { [4 x float] }
-
-define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
-; CHECK: aaa:
-; CHECK: vldr.32
-; CHECK-NOT: vldrne
-; CHECK-NOT: vpopne
-; CHECK-NOT: popne
-; CHECK: vpop
-; CHECK: pop
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
 entry:
-  br i1 undef, label %bb81, label %bb48
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+  br i1 undef, label %if.else, label %if.then
 
-bb48:                                             ; preds = %entry
-  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
-  ret float 0.000000e+00
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
 
-bb81:                                             ; preds = %entry
-  ret float 0.000000e+00
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
 }
 
-declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
new file mode 100644
index 0000000..63f8557
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
index 3bc8fee..52e40b2 100644
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,14 +4,14 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-128]
+; CHECK: vstr.32 s{{.*}}, [lr, #-96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-64]
+; CHECK: vstr.32 s{{.*}}, [lr, #-32]
+; CHECK: vstr.32 s{{.*}}, [lr]
+; CHECK: vstr.32 s{{.*}}, [lr, #32]
+; CHECK: vstr.32 s{{.*}}, [lr, #64]
+; CHECK: vstr.32 s{{.*}}, [lr, #96]
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3909554..24eb3a8 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -271,7 +271,6 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
 ; CHECK: vmov.i32 q9, #0x3F000000
-; CHECK: vmov d0, d17
 ; CHECK: vmla.f32 q8, q8, d0[0]
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 080341c..9ed6a01 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,8 +23,6 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64
-; CHECK: vadd.f64
 ; CHECK: vldr.64 [[LDR:d.*]],
 ; CHECK: LPC0_0:
 ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]