aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/ARM/fpcmp-opt.ll
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-07-13 19:27:42 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-07-13 19:27:42 +0000
commit218977b53eb215e5534db2f727d109ab18817cc1 (patch)
tree045e03459bb634046cbc898c34d0f0ee4efd5e23 /test/CodeGen/ARM/fpcmp-opt.ll
parent7a415999625f9791a8a7eea2027e628e29de15c0 (diff)
downloadexternal_llvm-218977b53eb215e5534db2f727d109ab18817cc1.zip
external_llvm-218977b53eb215e5534db2f727d109ab18817cc1.tar.gz
external_llvm-218977b53eb215e5534db2f727d109ab18817cc1.tar.bz2
Extend the r107852 optimization which turns some fp compare to code sequence using only i32 operations. It now optimize some f64 compares when fp compare is exceptionally slow (e.g. cortex-a8). It also catches comparison against 0.0.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108258 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM/fpcmp-opt.ll')
-rw-r--r--test/CodeGen/ARM/fpcmp-opt.ll71
1 files changed, 62 insertions, 9 deletions
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 8016033..6875288 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
; rdar://7461510
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
entry:
-; CHECK: t1:
-; CHECK-NOT: vldr
-; CHECK: ldr
-; CHECK: ldr
-; CHECK: cmp r0, r1
-; CHECK-NOT: vcmpe.f32
-; CHECK-NOT: vmrs
-; CHECK: beq
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
%0 = load float* %a
%1 = load float* %b
%2 = fcmp une float %0, %1
@@ -25,5 +33,50 @@ bb2:
ret i32 %4
}
+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+ %0 = load double* %a
+ %1 = fcmp oeq double %0, 0.000000e+00
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ %2 = call i32 @bar()
+ ret i32 %2
+
+bb2:
+ %3 = call i32 @foo()
+ ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+ %0 = load float* %a
+ %1 = fcmp oeq float %0, 0.000000e+00
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ %2 = call i32 @bar()
+ ret i32 %2
+
+bb2:
+ %3 = call i32 @foo()
+ ret i32 %3
+}
+
declare i32 @bar()
declare i32 @foo()