diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
commit | 8239daf7c83a65a189c352cce3191cdc3bbfe151 (patch) | |
tree | 84b82c2cf503208d1f67007255f2f56fdb383c55 /test/CodeGen | |
parent | 41957f6eb2271e5f1981b32a873d1b58217c6411 (diff) | |
download | external_llvm-8239daf7c83a65a189c352cce3191cdc3bbfe151.zip external_llvm-8239daf7c83a65a189c352cce3191cdc3bbfe151.tar.gz external_llvm-8239daf7c83a65a189c352cce3191cdc3bbfe151.tar.bz2 |
Two sets of changes. Sorry they are intermingled.
1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to
"optimize for latency". Call instructions don't have the right latency and
this is more likely to use introduce spills.
2. Fix if-converter cost function. For ARM, it should use instruction latencies,
not # of micro-ops since multi-latency instructions is completely executed
even when the predicate is false. Also, some instruction will be "slower"
when they are predicated due to the register def becoming implicit input.
rdar://8598427
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118135 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/ARM/ifcvt10.ll | 51 | ||||
-rw-r--r-- | test/CodeGen/ARM/ifcvt11.ll | 59 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM/reg_sequence.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll | 2 |
6 files changed, 101 insertions, 31 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 7f299aa..0198908 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -8,8 +8,9 @@ define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { entry: ; Make sure to use base-updating stores for saving callee-saved registers. +; CHECK: push ; CHECK-NOT: sub sp -; CHECK: vpush +; CHECK: push %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1] br label %cond_next489 diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 707f051..75428ac 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -4,27 +4,40 @@ ; micro-coded and would have long issue latency even if predicated on ; false predicate. -%0 = type { float, float, float, float } -%pln = type { %vec, float } -%vec = type { [4 x float] } - -define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) { -; CHECK: aaa: -; CHECK: vldr.32 -; CHECK-NOT: vldrne -; CHECK-NOT: vpopne -; CHECK-NOT: popne -; CHECK: vpop -; CHECK: pop +define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind { entry: - br i1 undef, label %bb81, label %bb48 +; CHECK: t: +; CHECK: vpop {d8} +; CHECK-NOT: vpopne +; CHECK: ldmia sp!, {r7, pc} +; CHECK: vpop {d8} +; CHECK: ldmia sp!, {r7, pc} + br i1 undef, label %if.else, label %if.then -bb48: ; preds = %entry - %0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0] - ret float 0.000000e+00 +if.then: ; preds = %entry + %mul73 = fmul double undef, 0.000000e+00 + %sub76 = fsub double %mul73, undef + store double %sub76, double* undef, align 4 + %call88 = tail call double @cos(double 0.000000e+00) nounwind + %mul89 = fmul double undef, %call88 + %sub92 = fsub double %mul89, undef + store double %sub92, double* undef, align 4 + ret void -bb81: ; preds = %entry - ret float 0.000000e+00 +if.else: ; preds = %entry + %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555) + %add112 = fadd double %tmp101, undef + %mul118 = fmul double %add112, undef + store double 0.000000e+00, double* %x, align 4 + ret void } -declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind +declare double @acos(double) + +declare double @sqrt(double) readnone + +declare double @cos(double) readnone + +declare double @fabs(double) + +declare double @llvm.pow.f64(double, double) nounwind readonly diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll new file mode 100644 index 0000000..63f8557 --- /dev/null +++ b/test/CodeGen/ARM/ifcvt11.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s +; rdar://8598427 +; Adjust if-converter heuristics to avoid predicating vmrs which can cause +; significant regression. + +%struct.xyz_t = type { double, double, double } + +define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline { +; CHECK: effie: +entry: + %0 = icmp sgt i32 %tsets, 0 + br i1 %0, label %bb.nph, label %bb6 + +bb.nph: ; preds = %entry + %1 = add nsw i32 %b, %a + %2 = add nsw i32 %1, %c + br label %bb + +bb: ; preds = %bb4, %bb.nph +; CHECK: vcmpe.f64 +; CHECK: vmrs apsr_nzcv, fpscr + %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ] + %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ] + %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0 + %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1 + %3 = load double* %scevgep10, align 4 + %4 = load double* %scevgep11, align 4 + %5 = fcmp uge double %3, %4 + br i1 %5, label %bb3, label %bb1 + +bb1: ; preds = %bb +; CHECK-NOT: it +; CHECK-NOT: vcmpemi +; CHECK-NOT: vmrsmi +; CHECK: vcmpe.f64 +; CHECK: vmrs apsr_nzcv, fpscr + %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2 + %6 = load double* %scevgep12, align 4 + %7 = fcmp uge double %3, %6 + br i1 %7, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + %8 = add nsw i32 %2, %r.19 + br label %bb4 + +bb3: ; preds = %bb1, %bb + %9 = add nsw i32 %r.19, 1 + br label %bb4 + +bb4: ; preds = %bb3, %bb2 + %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ] + %10 = add nsw i32 %n.08, 1 + %exitcond = icmp eq i32 %10, %tsets + br i1 %exitcond, label %bb6, label %bb + +bb6: ; preds = %bb4, %entry + %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ] + ret i32 %r.1.lcssa +} diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 3bc8fee..52e40b2 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64] -; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96] +; CHECK: vstr.32 s{{.*}}, [lr, #-128] +; CHECK: vstr.32 s{{.*}}, [lr, #-96] +; CHECK: vstr.32 s{{.*}}, [lr, #-64] +; CHECK: vstr.32 s{{.*}}, [lr, #-32] +; CHECK: vstr.32 s{{.*}}, [lr] +; CHECK: vstr.32 s{{.*}}, [lr, #32] +; CHECK: vstr.32 s{{.*}}, [lr, #64] +; CHECK: vstr.32 s{{.*}}, [lr, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 3909554..24eb3a8 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -271,7 +271,6 @@ define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: ; CHECK: vmov.i32 q9, #0x3F000000 -; CHECK: vmov d0, d17 ; CHECK: vmla.f32 q8, q8, d0[0] %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll index 080341c..9ed6a01 100644 --- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll +++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll @@ -23,8 +23,6 @@ entry: %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] ; Constant pool load followed by add. ; Then clobber the loaded register, not the sum. -; CHECK: vldr.64 -; CHECK: vadd.f64 ; CHECK: vldr.64 [[LDR:d.*]], ; CHECK: LPC0_0: ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]] |