summaryrefslogtreecommitdiffstats
path: root/src/glsl/lower_instructions.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/glsl/lower_instructions.cpp')
-rw-r--r--src/glsl/lower_instructions.cpp65
1 files changed, 39 insertions, 26 deletions
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 6842853..09afe55 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -36,7 +36,7 @@
* - EXP_TO_EXP2
* - POW_TO_EXP2
* - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
* - LDEXP_TO_ARITH
* - BITFIELD_INSERT_TO_BFM_BFI
* - CARRY_TO_ARITH
@@ -77,14 +77,17 @@
* Many older GPUs don't have an x**y instruction. For these GPUs, convert
* x**y to 2**(y * log2(x)).
*
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
* -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
*
* Many GPUs don't have a MOD instruction (945 and 965 included), and
* if we have to break it down like this anyway, it gives an
* opportunity to do things like constant fold the (1.0 / op1) easily.
*
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precision errors.
+ *
* LDEXP_TO_ARITH:
* -------------
* Converts ir_binop_ldexp to arithmetic and bit operations.
@@ -136,7 +139,7 @@ private:
void sub_to_add_neg(ir_expression *);
void div_to_mul_rcp(ir_expression *);
void int_div_to_mul_rcp(ir_expression *);
- void mod_to_fract(ir_expression *);
+ void mod_to_floor(ir_expression *);
void exp_to_exp2(ir_expression *);
void pow_to_exp2(ir_expression *);
void log_to_log2(ir_expression *);
@@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
}
void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
{
- ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
- ir_var_temporary);
- this->base_ir->insert_before(temp);
-
- ir_assignment *const assign =
- new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
- ir->operands[1], NULL);
-
- this->base_ir->insert_before(assign);
+ ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
+ ir_var_temporary);
+ ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
+ ir_var_temporary);
+ this->base_ir->insert_before(x);
+ this->base_ir->insert_before(y);
+
+ ir_assignment *const assign_x =
+ new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
+ ir->operands[0], NULL);
+ ir_assignment *const assign_y =
+ new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
+ ir->operands[1], NULL);
+
+ this->base_ir->insert_before(assign_x);
+ this->base_ir->insert_before(assign_y);
ir_expression *const div_expr =
- new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
- ir->operands[0],
- new(ir) ir_dereference_variable(temp));
+ new(ir) ir_expression(ir_binop_div, x->type,
+ new(ir) ir_dereference_variable(x),
+ new(ir) ir_dereference_variable(y));
/* Don't generate new IR that would need to be lowered in an additional
* pass.
@@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
if (lowering(DIV_TO_MUL_RCP))
div_to_mul_rcp(div_expr);
- ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
- ir->operands[0]->type,
- div_expr,
- NULL);
+ ir_expression *const floor_expr =
+ new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
- ir->operation = ir_binop_mul;
- ir->operands[0] = new(ir) ir_dereference_variable(temp);
- ir->operands[1] = expr;
+ ir_expression *const mul_expr =
+ new(ir) ir_expression(ir_binop_mul,
+ new(ir) ir_dereference_variable(y),
+ floor_expr);
+
+ ir->operation = ir_binop_sub;
+ ir->operands[0] = new(ir) ir_dereference_variable(x);
+ ir->operands[1] = mul_expr;
this->progress = true;
}
@@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
break;
case ir_binop_mod:
- if (lowering(MOD_TO_FRACT) && ir->type->is_float())
- mod_to_fract(ir);
+ if (lowering(MOD_TO_FLOOR) && ir->type->is_float())
+ mod_to_floor(ir);
break;
case ir_binop_pow: