diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
; CHECK-NOT: sub sp
-; CHECK: vpush
+; CHECK: push
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
br label %cond_next489
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
; micro-coded and would have long issue latency even if predicated on
; false predicate.
-%0 = type { float, float, float, float }
-%pln = type { %vec, float }
-%vec = type { [4 x float] }
-define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
-; CHECK: aaa:
-; CHECK: vldr.32
-; CHECK-NOT: vldrne
-; CHECK-NOT: vpopne
-; CHECK-NOT: popne
-; CHECK: vpop
-; CHECK: pop
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
- br i1 undef, label %bb81, label %bb48
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+ br i1 undef, label %if.else, label %if.then
-bb48: ; preds = %entry
- %0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
- ret float 0.000000e+00
+if.then: ; preds = %entry
+ %mul73 = fmul double undef, 0.000000e+00
+ %sub76 = fsub double %mul73, undef
+ store double %sub76, double* undef, align 4
+ %call88 = tail call double @cos(double 0.000000e+00) nounwind
+ %mul89 = fmul double undef, %call88
+ %sub92 = fsub double %mul89, undef
+ store double %sub92, double* undef, align 4
+ ret void
-bb81: ; preds = %entry
- ret float 0.000000e+00
+if.else: ; preds = %entry
+ %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+ %add112 = fadd double %tmp101, undef
+ %mul118 = fmul double %add112, undef
+ store double 0.000000e+00, double* %x, align 4
+ ret void
-declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
+declare double @acos(double)
+declare double @sqrt(double) readnone
+declare double @cos(double) readnone
+declare double @fabs(double)
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt11.ll
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+%struct.xyz_t = type { double, double, double }
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+ %0 = icmp sgt i32 %tsets, 0
+ br i1 %0, label %bb.nph, label %bb6
+bb.nph: ; preds = %entry
+ %1 = add nsw i32 %b, %a
+ %2 = add nsw i32 %1, %c
+ br label %bb
+bb: ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+ %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+ %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+ %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+ %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+ %3 = load double* %scevgep10, align 4
+ %4 = load double* %scevgep11, align 4
+ %5 = fcmp uge double %3, %4
+ br i1 %5, label %bb3, label %bb1
+bb1: ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+ %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+ %6 = load double* %scevgep12, align 4
+ %7 = fcmp uge double %3, %6
+ br i1 %7, label %bb3, label %bb2
+bb2: ; preds = %bb1
+ %8 = add nsw i32 %2, %r.19
+ br label %bb4
+bb3: ; preds = %bb1, %bb
+ %9 = add nsw i32 %r.19, 1
+ br label %bb4
+bb4: ; preds = %bb3, %bb2
+ %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+ %10 = add nsw i32 %n.08, 1
+ %exitcond = icmp eq i32 %10, %tsets
+ br i1 %exitcond, label %bb6, label %bb
+bb6: ; preds = %bb4, %entry
+ %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+ ret i32 %r.1.lcssa
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
; constant offset addressing, so that each of the following stores
; uses the same register.
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-128]
+; CHECK: vstr.32 s{{.*}}, [lr, #-96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-64]
+; CHECK: vstr.32 s{{.*}}, [lr, #-32]
+; CHECK: vstr.32 s{{.*}}, [lr]
+; CHECK: vstr.32 s{{.*}}, [lr, #32]
+; CHECK: vstr.32 s{{.*}}, [lr, #64]
+; CHECK: vstr.32 s{{.*}}, [lr, #96]
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
; CHECK: t10:
; CHECK: vmov.i32 q9, #0x3F000000
-; CHECK: vmov d0, d17
; CHECK: vmla.f32 q8, q8, d0[0]
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
%4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
; Constant pool load followed by add.
; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64
-; CHECK: vadd.f64
; CHECK: vldr.64 [[LDR:d.*]],
; CHECK: LPC0_0:
; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]