diff options
author | Evan Cheng <evan.cheng@apple.com> | 2011-02-23 02:24:55 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2011-02-23 02:24:55 +0000 |
commit | e573fb32556ba8430ccd723f71233cdf56b76340 (patch) | |
tree | 5a0d83b9a67c56dad00cf86a21ec6ed04e3bbfd5 /test/CodeGen | |
parent | d2a50734234a80893ad71da90d9f32032c47e000 (diff) | |
download | external_llvm-e573fb32556ba8430ccd723f71233cdf56b76340.zip external_llvm-e573fb32556ba8430ccd723f71233cdf56b76340.tar.gz external_llvm-e573fb32556ba8430ccd723f71233cdf56b76340.tar.bz2 |
More fcopysign correctness and performance fix.
The previous codegen for the slow path (when values are in VFP / NEON
registers) was incorrect if the source is NaN.
The new codegen uses NEON vbsl instruction to copy the sign bit. e.g.
vmov.i32 d1, #0x80000000
vbsl d1, d2, d0
If NEON is not available, it uses integer instructions to copy the sign bit.
rdar://9034702
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126295 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/fcopysign.ll | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index 1050cd2..d30e3eb 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -9,9 +9,8 @@ entry: ; SOFT: bfi r0, r1, #31, #1 ; HARD: test1: -; HARD: vabs.f32 d0, d0 -; HARD: cmp r0, #0 -; HARD: vneglt.f32 s0, s0 +; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000 +; HARD: vbsl [[REG1]], d2, d0 %0 = tail call float @copysignf(float %x, float %y) nounwind ret float %0 } @@ -23,9 +22,9 @@ entry: ; SOFT: bfi r1, r2, #31, #1 ; HARD: test2: -; HARD: vabs.f64 d0, d0 -; HARD: cmp r1, #0 -; HARD: vneglt.f64 d0, d0 +; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000 +; HARD: vshl.i64 [[REG2]], [[REG2]], #32 +; HARD: vbsl [[REG2]], d1, d0 %0 = tail call double @copysign(double %x, double %y) nounwind ret double %0 } @@ -33,9 +32,9 @@ entry: define double @test3(double %x, double %y, double %z) nounwind { entry: ; SOFT: test3: -; SOFT: vabs.f64 -; SOFT: cmp {{.*}}, #0 -; SOFT: vneglt.f64 +; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000 +; SOFT: vshl.i64 [[REG3]], [[REG3]], #32 +; SOFT: vbsl [[REG3]], %0 = fmul double %x, %y %1 = tail call double @copysign(double %0, double %z) nounwind ret double %1 |