glsl: Convert mix() to use a new ir_triop_lrp opcode.

Many GPUs have an instruction to do linear interpolation which is more efficient than simply performing the algebra necessary (two multiplies, an add, and a subtract). Pattern matching or peepholing this is more desirable, but can be tricky. By using an opcode, we can at least make shaders which use the mix() built-in get the more efficient behavior. Currently, all consumers lower ir_triop_lrp. Subsequent patches will actually generate different code. v2 [mattst88]: - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a subsequent patch and ir_triop_lrp translated directly. v3 [mattst88]: - Move changes from the next patch to opt_algebraic.cpp to accept 3-src operations. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
author: Kenneth Graunke <kenneth@whitecape.org> 2012-12-01 23:49:26 -0800
committer: Matt Turner <mattst88@gmail.com> 2013-02-28 13:18:59 -0800
commit: 93066ce1299a7be8f670e527f249940c635605b4 (patch)
tree: 1e7c958b7feb9a605a6c9f482bc68798990c89f4 /src/glsl/lower_instructions.cpp
parent: 18281d60889c7bb0ef14d2aa8a080cdaead7adb3 (diff)
download: external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.zip
external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.gz
external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.bz2
1 files changed, 35 insertions, 0 deletions
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index a8ef765..1ce7b7c 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -37,6 +37,7 @@
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
  * - MOD_TO_FRACT
+ * - LRP_TO_ARITH
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -79,13 +80,20 @@
  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  * if we have to break it down like this anyway, it gives an
  * opportunity to do things like constant fold the (1.0 / op1) easily.
+ *
+ * LRP_TO_ARITH:
+ * -------------
+ * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
  */
 
 #include "main/core.h" /* for M_LOG2E */
 #include "glsl_types.h"
 #include "ir.h"
+#include "ir_builder.h"
 #include "ir_optimization.h"
 
+using namespace ir_builder;
+
 class lower_instructions_visitor : public ir_hierarchical_visitor {
 public:
    lower_instructions_visitor(unsigned lower)
@@ -105,6 +113,7 @@ private:
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
+   void lrp_to_arith(ir_expression *);
 };
 
 /**
@@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
+{
+   /* (lrp x y a) -> x*(1-a) + y*a */
+
+   /* Save op2 */
+   ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
+					   ir_var_temporary);
+   this->base_ir->insert_before(temp);
+   this->base_ir->insert_before(assign(temp, ir->operands[2]));
+
+   ir_constant *one = new(ir) ir_constant(1.0f);
+
+   ir->operation = ir_binop_add;
+   ir->operands[0] = mul(ir->operands[0], sub(one, temp));
+   ir->operands[1] = mul(ir->operands[1], temp);
+   ir->operands[2] = NULL;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
 	 pow_to_exp2(ir);
       break;
 
+   case ir_triop_lrp:
+      if (lowering(LRP_TO_ARITH))
+	 lrp_to_arith(ir);
+      break;
+
    default:
       return visit_continue;
    }
author	Kenneth Graunke <kenneth@whitecape.org>	2012-12-01 23:49:26 -0800
committer	Matt Turner <mattst88@gmail.com>	2013-02-28 13:18:59 -0800
commit	93066ce1299a7be8f670e527f249940c635605b4 (patch)
tree	1e7c958b7feb9a605a6c9f482bc68798990c89f4 /src/glsl/lower_instructions.cpp
parent	18281d60889c7bb0ef14d2aa8a080cdaead7adb3 (diff)
download	external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.zip external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.gz external_mesa3d-93066ce1299a7be8f670e527f249940c635605b4.tar.bz2