diff options
author | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:13 +0000 |
---|---|---|
committer | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:13 +0000 |
commit | f51a22e815a7494cbe1aba25d275015a70b5c31d (patch) | |
tree | 1abd574a3cced1781dd5add580a196854a952d02 /lib/Target | |
parent | ac78a0645ddd2046fb66237ba4cfadffa2d367d7 (diff) | |
download | external_llvm-f51a22e815a7494cbe1aba25d275015a70b5c31d.zip external_llvm-f51a22e815a7494cbe1aba25d275015a70b5c31d.tar.gz external_llvm-f51a22e815a7494cbe1aba25d275015a70b5c31d.tar.bz2 |
[NVPTX] Add (1.0 / sqrt(x)) => rsqrt(x) generation when allowable by FP flags
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185178 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/NVPTX/NVPTXInstrInfo.td | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 553a6ba..b6f7526 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -136,7 +136,8 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; -def doF32FTZ : Predicate<"UseF32FTZ">; +def doF32FTZ : Predicate<"UseF32FTZ==1">; +def doNoF32FTZ : Predicate<"UseF32FTZ==0">; def doFMAF32 : Predicate<"doFMAF32">; def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; @@ -765,6 +766,16 @@ def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), (fdiv Float32Regs:$a, fpimm:$b))]>, Requires<[reqPTX20]>; +// +// F32 rsqrt +// + +def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), + "rsqrt.approx.f32 \t$dst, $b;", []>; + +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), + (RSQRTF32approx1r Float32Regs:$b)>, + Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { def rrr : NVPTXInst<(outs Float32Regs:$dst), |