summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZack Rusin <zackr@vmware.com>2013-07-11 12:16:06 -0400
committerZack Rusin <zackr@vmware.com>2013-07-11 20:19:04 -0400
commit00cd455bd50c6b16b2f72a6d2726de3d3818c7f5 (patch)
tree2b13c67bd84020a1bad285c3678b497a571ae1a4
parenta171812d27afb1a52c5d81deaa6027f30bc102e8 (diff)
downloadexternal_mesa3d-00cd455bd50c6b16b2f72a6d2726de3d3818c7f5.zip
external_mesa3d-00cd455bd50c6b16b2f72a6d2726de3d3818c7f5.tar.gz
external_mesa3d-00cd455bd50c6b16b2f72a6d2726de3d3818c7f5.tar.bz2
gallium: fixup definitions of the rsq and sqrt
GLSL spec says that rsq is undefined for src<=0, but the D3D10 spec says it needs to be a NaN, so lets stop taking an absolute value of the source which completely breaks that behavior. For the gl program we can simply insert an extra abs instrunction which produces the desired behavior there. Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Brian Paul <brianp@vmware.com>
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c16
-rw-r--r--src/gallium/docs/source/tgsi.rst6
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c6
4 files changed, 15 insertions, 18 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 68bd124..e99c8ef 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -633,8 +633,6 @@ rsq_emit(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
- emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
- emit_data->args[0]);
if (bld_base->rsq_action.emit) {
bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
} else {
@@ -1349,9 +1347,6 @@ rcp_emit_cpu(
}
/* Reciprical squareroot (CPU Only) */
-
-/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
- * greater than or equal to 0 */
static void
recip_sqrt_emit_cpu(
const struct lp_build_tgsi_action * action,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index bed0852..1f8e62d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
assert(src->f[2] != 0.0f);
assert(src->f[3] != 0.0f);
#endif
- dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
- dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
- dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
- dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
+ dst->f[0] = 1.0f / sqrtf(src->f[0]);
+ dst->f[1] = 1.0f / sqrtf(src->f[1]);
+ dst->f[2] = 1.0f / sqrtf(src->f[2]);
+ dst->f[3] = 1.0f / sqrtf(src->f[3]);
}
static void
micro_sqrt(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
- dst->f[0] = sqrtf(fabsf(src->f[0]));
- dst->f[1] = sqrtf(fabsf(src->f[1]));
- dst->f[2] = sqrtf(fabsf(src->f[2]));
- dst->f[3] = sqrtf(fabsf(src->f[3]));
+ dst->f[0] = sqrtf(src->f[0]);
+ dst->f[1] = sqrtf(src->f[1]);
+ dst->f[2] = sqrtf(src->f[2]);
+ dst->f[3] = sqrtf(src->f[3]);
}
static void
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 13daa62..4d26c46 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -94,16 +94,16 @@ This instruction replicates its result.
.. opcode:: RSQ - Reciprocal Square Root
-This instruction replicates its result.
+This instruction replicates its result. The results are undefined for src <= 0.
.. math::
- dst = \frac{1}{\sqrt{|src.x|}}
+ dst = \frac{1}{\sqrt{src.x}}
.. opcode:: SQRT - Square Root
-This instruction replicates its result.
+This instruction replicates its result. The results are undefined for src < 0.
.. math::
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 7f30697..67c6f53 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -615,8 +615,6 @@ translate_opcode( unsigned op )
return TGSI_OPCODE_RCP;
case OPCODE_RET:
return TGSI_OPCODE_RET;
- case OPCODE_RSQ:
- return TGSI_OPCODE_RSQ;
case OPCODE_SCS:
return TGSI_OPCODE_SCS;
case OPCODE_SEQ:
@@ -756,6 +754,10 @@ compile_instruction(
emit_ddy( t, dst[0], &inst->SrcReg[0] );
break;
+ case OPCODE_RSQ:
+ ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
+ break;
+
default:
ureg_insn( ureg,
translate_opcode( inst->Opcode ),