summaryrefslogtreecommitdiffstats
path: root/src/glsl/lower_instructions.cpp
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2012-12-01 23:49:26 -0800
committerMatt Turner <mattst88@gmail.com>2013-02-28 13:18:59 -0800
commit93066ce1299a7be8f670e527f249940c635605b4 (patch)
tree1e7c958b7feb9a605a6c9f482bc68798990c89f4 /src/glsl/lower_instructions.cpp
parent18281d60889c7bb0ef14d2aa8a080cdaead7adb3 (diff)
downloadexternal_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.zip
external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.gz
external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.bz2
glsl: Convert mix() to use a new ir_triop_lrp opcode.
Many GPUs have an instruction to do linear interpolation which is more efficient than simply performing the algebra necessary (two multiplies, an add, and a subtract). Pattern matching or peepholing this is more desirable, but can be tricky. By using an opcode, we can at least make shaders which use the mix() built-in get the more efficient behavior. Currently, all consumers lower ir_triop_lrp. Subsequent patches will actually generate different code. v2 [mattst88]: - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a subsequent patch and ir_triop_lrp translated directly. v3 [mattst88]: - Move changes from the next patch to opt_algebraic.cpp to accept 3-src operations. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/glsl/lower_instructions.cpp')
-rw-r--r--src/glsl/lower_instructions.cpp35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index a8ef765..1ce7b7c 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -37,6 +37,7 @@
* - POW_TO_EXP2
* - LOG_TO_LOG2
* - MOD_TO_FRACT
+ * - LRP_TO_ARITH
*
* SUB_TO_ADD_NEG:
* ---------------
@@ -79,13 +80,20 @@
* Many GPUs don't have a MOD instruction (945 and 965 included), and
* if we have to break it down like this anyway, it gives an
* opportunity to do things like constant fold the (1.0 / op1) easily.
+ *
+ * LRP_TO_ARITH:
+ * -------------
+ * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
*/
#include "main/core.h" /* for M_LOG2E */
#include "glsl_types.h"
#include "ir.h"
+#include "ir_builder.h"
#include "ir_optimization.h"
+using namespace ir_builder;
+
class lower_instructions_visitor : public ir_hierarchical_visitor {
public:
lower_instructions_visitor(unsigned lower)
@@ -105,6 +113,7 @@ private:
void exp_to_exp2(ir_expression *);
void pow_to_exp2(ir_expression *);
void log_to_log2(ir_expression *);
+ void lrp_to_arith(ir_expression *);
};
/**
@@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
this->progress = true;
}
+void
+lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
+{
+ /* (lrp x y a) -> x*(1-a) + y*a */
+
+ /* Save op2 */
+ ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
+ ir_var_temporary);
+ this->base_ir->insert_before(temp);
+ this->base_ir->insert_before(assign(temp, ir->operands[2]));
+
+ ir_constant *one = new(ir) ir_constant(1.0f);
+
+ ir->operation = ir_binop_add;
+ ir->operands[0] = mul(ir->operands[0], sub(one, temp));
+ ir->operands[1] = mul(ir->operands[1], temp);
+ ir->operands[2] = NULL;
+
+ this->progress = true;
+}
+
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
pow_to_exp2(ir);
break;
+ case ir_triop_lrp:
+ if (lowering(LRP_TO_ARITH))
+ lrp_to_arith(ir);
+ break;
+
default:
return visit_continue;
}