diff options
author | Renato Golin <renato.golin@linaro.org> | 2013-03-21 18:47:47 +0000 |
---|---|---|
committer | Renato Golin <renato.golin@linaro.org> | 2013-03-21 18:47:47 +0000 |
commit | 3382a840747c42c4a98eac802ee7b347a8ded1e4 (patch) | |
tree | de4c0a5050772a0b2c7d3b0f1e771814d0f24fe9 /test/CodeGen | |
parent | b55b00b4d4b79daec2be43a2a6cd0f5891370296 (diff) | |
download | external_llvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.zip external_llvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.tar.gz external_llvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.tar.bz2 |
Avoid NEON SP-FP unless unsafe-math or Darwin
NEON is not IEEE 754 compliant, so we should avoid lowering single-precision
floating point operations with NEON unless unsafe-math is turned on. The
equivalent VFP instructions are IEEE 754 compliant, but in some cores they're
much slower, so some archs/OSs might still request it to be on by default,
such as Swift and Darwin.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177651 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/fadds.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmuls.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/ARM/fnegs.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/ARM/fnmscs.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/ARM/fp_convert.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/ARM/fsubs.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/neon-spfp.ll | 76 | ||||
-rw-r--r-- | test/CodeGen/ARM/neon_minmax.ll | 2 |
8 files changed, 120 insertions, 11 deletions
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 48ef5ed..5aabc66 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { @@ -18,6 +19,8 @@ entry: ; NFP0: vadd.f32 s ; CORTEXA8: test: -; CORTEXA8: vadd.f32 d +; CORTEXA8: vadd.f32 s +; CORTEXA8U: test: +; CORTEXA8U: vadd.f32 d ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} +; CORTEXA9: vadd.f32 s diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index 1566a92..9569880 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { @@ -18,9 +19,11 @@ entry: ; NFP0: vmul.f32 s ; CORTEXA8: test: -; CORTEXA8: vmul.f32 d +; CORTEXA8: vmul.f32 s +; CORTEXA8U: test: +; CORTEXA8U: vmul.f32 d ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} +; CORTEXA9: vmul.f32 s ; VFP2: test2 define float @test2(float %a) nounwind { diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index 418b598..30549b0 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test1(float* %a) { @@ -22,7 +23,10 @@ entry: ; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test1: -; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} +; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}} + +; CORTEXA8U: test1: +; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test1: ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} @@ -46,7 +50,10 @@ entry: ; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test2: -; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} +; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}} + +; CORTEXA8U: test2: +; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test2: ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 9ce9b7a..b5b4211 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) nounwind { @@ -11,9 +12,13 @@ entry: ; NEON: t1: ; NEON: vnmla.f32 +; A8U: t1: +; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} + ; A8: t1: ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} -; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc @@ -28,9 +33,13 @@ entry: ; NEON: t2: ; NEON: vnmla.f32 +; A8U: t2: +; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} +; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} + ; A8: t2: ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} -; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc @@ -45,6 +54,10 @@ entry: ; NEON: t3: ; NEON: vnmla.f64 +; A8U: t3: +; A8U: vnmul.f64 d +; A8U: vsub.f64 d + ; A8: t3: ; A8: vnmul.f64 d ; A8: vsub.f64 d @@ -62,6 +75,10 @@ entry: ; NEON: t4: ; NEON: vnmla.f64 +; A8U: t4: +; A8U: vnmul.f64 d +; A8U: vsub.f64 d + ; A8: t4: ; A8: vnmul.f64 d ; A8: vsub.f64 d diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 44298b9..51f3890 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2 define i32 @test1(float %a, float %b) { diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index f039e74..3013e56 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 @@ -9,5 +10,6 @@ entry: } ; VFP2: vsub.f32 s -; NFP1: vsub.f32 d +; NFP1U: vsub.f32 d +; NFP1: vsub.f32 s ; NFP0: vsub.f32 s diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll new file mode 100644 index 0000000..c00f0d1 --- /dev/null +++ b/test/CodeGen/ARM/neon-spfp.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT + +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT + +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT + +; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since +; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected. + +@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1 + +; CHECK-LINUXA5: main: +; CHECK-LINUXA8: main: +; CHECK-LINUXA9: main: +; CHECK-LINUXA15: main: +; CHECK-LINUXSWIFT: main: +; CHECK-UNSAFEA5: main: +; CHECK-UNSAFEA8: main: +; CHECK-UNSAFEA9: main: +; CHECK-UNSAFEA15: main: +; CHECK-UNSAFESWIFT: main: +; CHECK-DARWINA5: main: +; CHECK-DARWINA8: main: +; CHECK-DARWINA9: main: +; CHECK-DARWINA15: main: +; CHECK-DARWINSWIFT: main: +define i32 @main() { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ] + %mul = fmul float %q.03, 0x3FEFAE1480000000 +; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}} +; Swift is *always* unsafe +; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}} + +; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}} +; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}} +; A9 and A15 don't need this +; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}} +; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}} +; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}} + +; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}} +; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}} +; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}} +; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}} +; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}} + %conv = fpext float %mul to double + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1 + %inc = add nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, 16000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll index d301c6a..0a7c8b2 100644 --- a/test/CodeGen/ARM/neon_minmax.ll +++ b/test/CodeGen/ARM/neon_minmax.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s define float @fmin_ole(float %x) nounwind { ;CHECK: fmin_ole: |