summaryrefslogtreecommitdiffstats
path: root/src/glsl/opt_algebraic.cpp
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2014-01-05 22:57:01 -0800
committerKenneth Graunke <kenneth@whitecape.org>2014-01-07 12:54:57 -0800
commit847bc36a38d42967ad6bf0492fe90a4892d9d799 (patch)
treeda704ff271f8302bd2d25333d9b1118ffd8cf37b /src/glsl/opt_algebraic.cpp
parent5e3fd6a9dbd62ca5cd9965282fac01a34d23733e (diff)
downloadexternal_mesa3d-847bc36a38d42967ad6bf0492fe90a4892d9d799.zip
external_mesa3d-847bc36a38d42967ad6bf0492fe90a4892d9d799.tar.gz
external_mesa3d-847bc36a38d42967ad6bf0492fe90a4892d9d799.tar.bz2
glsl: Optimize pow(2, x) --> exp2(x).
On Haswell, POW takes 24 cycles, while EXP2 only takes 14. Plus, using POW requires putting 2.0 in a register, while EXP2 doesn't. I believe that EXP2 will be faster than POW on basically all GPUs, so it makes sense to optimize it. Looking at the savage2 subset of shader-db: total instructions in shared programs: 113225 -> 113179 (-0.04%) instructions in affected programs: 2139 -> 2093 (-2.15%) instances of 'math pow': 795 -> 749 (-6.14%) instances of 'math exp': 389 -> 435 (11.8%) Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/glsl/opt_algebraic.cpp')
-rw-r--r--src/glsl/opt_algebraic.cpp11
1 files changed, 11 insertions, 0 deletions
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 5e885f7..332f0b7 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -88,6 +88,12 @@ is_vec_one(ir_constant *ir)
}
static inline bool
+is_vec_two(ir_constant *ir)
+{
+ return (ir == NULL) ? false : ir->is_value(2.0, 2);
+}
+
+static inline bool
is_vec_negative_one(ir_constant *ir)
{
return (ir == NULL) ? false : ir->is_negative_one();
@@ -420,6 +426,11 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
/* 1^x == 1 */
if (is_vec_one(op_const[0]))
return op_const[0];
+
+ /* pow(2,x) == exp2(x) */
+ if (is_vec_two(op_const[0]))
+ return expr(ir_unop_exp2, ir->operands[1]);
+
break;
case ir_unop_rcp: