diff options
Diffstat (limited to 'test/CodeGen/NVPTX/fp-contract.ll')
-rw-r--r-- | test/CodeGen/NVPTX/fp-contract.ll | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/fp-contract.ll b/test/CodeGen/NVPTX/fp-contract.ll new file mode 100644 index 0000000..3f68b18 --- /dev/null +++ b/test/CodeGen/NVPTX/fp-contract.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT + +target triple = "nvptx64-unknown-cuda" + +;; Make sure we are generating proper instruction sequences for fused ops +;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit +;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas +;; is free to fuse with a multiply if it is able. If fusion is not allowed, +;; we do not form fma.rn at the PTX level and explicitly generate add.rn +;; for all adds to prevent ptxas from fusion the ops. + +;; FAST-LABEL: @t0 +;; DEFAULT-LABEL: @t0 +define float @t0(float %a, float %b, float %c) { +;; FAST: fma.rn.f32 +;; DEFAULT: mul.rn.f32 +;; DEFAULT: add.rn.f32 + %v0 = fmul float %a, %b + %v1 = fadd float %v0, %c + ret float %v1 +} + +;; FAST-LABEL: @t1 +;; DEFAULT-LABEL: @t1 +define float @t1(float %a, float %b) { +;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32 +;; to prevent ptxas from fusing this with anything else. +;; FAST: add.f32 +;; DEFAULT: add.rn.f32 + %v1 = fadd float %a, %b + ret float %v1 +} |