summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-01-27 12:21:04 -0800
committerKenneth Graunke <kenneth@whitecape.org>2016-04-04 11:35:16 -0700
commit65fbc43d54403905e3eaea02372b5a364dc1d773 (patch)
tree612a74b4914f251f97f63fb1b46ef252b59c2228 /src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
parent3aa51e02d6659c8f2e34a4153ae140a4f90cc51b (diff)
downloadexternal_mesa3d-65fbc43d54403905e3eaea02372b5a364dc1d773.zip
external_mesa3d-65fbc43d54403905e3eaea02372b5a364dc1d773.tar.gz
external_mesa3d-65fbc43d54403905e3eaea02372b5a364dc1d773.tar.bz2
i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range.
The SIN and COS instructions on Intel hardware can produce values slightly outside of the [-1.0, 1.0] range for a small set of values. Obviously, this can break everyone's expectations about trig functions. According to an internal presentation, the COS instruction can produce a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One suggested workaround is to multiply by 0.99997, scaling down the amplitude slightly. Apparently this also minimizes the error function, reducing the maximum error from 0.00006 to about 0.00003. When enabled, fixes 16 dEQP precision tests dEQP-GLES31.functional.shaders.builtin_functions.precision. {cos,sin}.{highp,mediump}_compute.{scalar,vec2,vec4,vec4}. at the cost of making every sin and cos call more expensive (about twice the number of cycles on recent hardware). Enabling this option has been shown to reduce GPUTest Volplosion performance by about 10%. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_nir.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index ee6929b..6c8fd06 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1101,12 +1101,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_fsin:
- inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+ if (!compiler->precise_trig) {
+ inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+ } else {
+ src_reg tmp = src_reg(this, glsl_type::vec4_type);
+ inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
+ inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+ }
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+ if (!compiler->precise_trig) {
+ inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+ } else {
+ src_reg tmp = src_reg(this, glsl_type::vec4_type);
+ inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
+ inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+ }
inst->saturate = instr->dest.saturate;
break;