summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2012-08-01 19:35:18 -0700
committerEric Anholt <eric@anholt.net>2012-12-14 16:06:35 -0800
commitc9e48e5b083b6cf97ecdb2d17c874ea631203b06 (patch)
tree5c29717096b968c3ba3fa292a52d19f90f63dea3 /src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
parent471af25fc57dc43a8277b4b17ec82547287621d0 (diff)
downloadexternal_mesa3d-c9e48e5b083b6cf97ecdb2d17c874ea631203b06.zip
external_mesa3d-c9e48e5b083b6cf97ecdb2d17c874ea631203b06.tar.gz
external_mesa3d-c9e48e5b083b6cf97ecdb2d17c874ea631203b06.tar.bz2
i965: Generalize VS compute-to-MRF for compute-to-another-GRF, too.
No statistically significant performance difference on glbenchmark 2.7 (n=60). It reduces cycles spent in the vertex shader by 3.3% +/- 0.8% (n=5), but that's only about .3% of all cycles spent according to the fixed shader_time. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp58
1 files changed, 56 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index fa9c155..45be376 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -70,7 +70,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
v->dump_instructions();
}
- v->opt_compute_to_mrf();
+ v->opt_register_coalesce();
if (print) {
printf("%s: instructions after:\n", func);
@@ -78,7 +78,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
}
}
-TEST_F(register_coalesce_test, test_easy_success)
+TEST_F(register_coalesce_test, test_compute_to_mrf)
{
src_reg something = src_reg(v, glsl_type::float_type);
dst_reg temp = dst_reg(v, glsl_type::float_type);
@@ -143,3 +143,57 @@ TEST_F(register_coalesce_test, test_dp4_mrf)
EXPECT_EQ(dp4->dst.file, MRF);
EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
}
+
+TEST_F(register_coalesce_test, test_dp4_grf)
+{
+ src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+ src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+ dst_reg init;
+
+ dst_reg to = dst_reg(v, glsl_type::vec4_type);
+ dst_reg temp = dst_reg(v, glsl_type::float_type);
+
+ vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
+ to.writemask = WRITEMASK_Y;
+ v->emit(v->MOV(to, src_reg(temp)));
+
+ /* if we don't do something with the result, the automatic dead code
+ * elimination will remove all our instructions.
+ */
+ src_reg src = src_reg(to);
+ src.negate = true;
+ v->emit(v->MOV(dst_reg(MRF, 0), src));
+
+ register_coalesce(v);
+
+ EXPECT_EQ(dp4->dst.reg, to.reg);
+ EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
+}
+
+TEST_F(register_coalesce_test, test_channel_mul_grf)
+{
+ src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+ src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+ dst_reg init;
+
+ dst_reg to = dst_reg(v, glsl_type::vec4_type);
+ dst_reg temp = dst_reg(v, glsl_type::float_type);
+
+ vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
+ to.writemask = WRITEMASK_Y;
+ v->emit(v->MOV(to, src_reg(temp)));
+
+ /* if we don't do something with the result, the automatic dead code
+ * elimination will remove all our instructions.
+ */
+ src_reg src = src_reg(to);
+ src.negate = true;
+ v->emit(v->MOV(dst_reg(MRF, 0), src));
+
+ register_coalesce(v);
+
+ /* This path isn't supported yet in the reswizzling code, so we're checking
+ * that we haven't done anything bad to scalar non-DP[234]s.
+ */
+ EXPECT_NE(mul->dst.reg, to.reg);
+}