summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-11-19 13:07:12 -0800
committerEric Anholt <eric@anholt.net>2013-11-22 16:36:27 -0800
commit46cf80fb366cb14827724a7fea004e81400cc602 (patch)
tree122db915622540c2c2cd09a163a9c2b9c2027c82 /src/mesa/drivers
parent09db4940eede4236b47e1328503a719719f5c981 (diff)
downloadexternal_mesa3d-46cf80fb366cb14827724a7fea004e81400cc602.zip
external_mesa3d-46cf80fb366cb14827724a7fea004e81400cc602.tar.gz
external_mesa3d-46cf80fb366cb14827724a7fea004e81400cc602.tar.bz2
i965/fs: Make the first pre-allocation heuristic be the post heuristic.
I recently made us try two different things that tried to reduce register pressure so that we would be more likely to allocate successfully. But now that we have the logic for trying two, we can make the first thing we try be the normal, not-prioritizing-register-pressure heuristic. This means one less scheduling pass in the common case of that heuristic not producing spills, plus the best schedule we know how to produce, if that one happens to succeed. This is important, because our register allocation produces a lot of possibly avoidable dependencies for the post-register-allocation schedule, despite ra_set_allocate_round_robin(). GLB2.7: 1.04127% +/- 0.732461% fps improvement (n=31) nexuiz: No difference (n=5) lightsmark: 0.838512% +/- 0.300147% fps improvement (n=86) minecraft apitrace: No difference (n=15) Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp62
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h1
3 files changed, 44 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d004521..261f906 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3192,6 +3192,7 @@ fs_visitor::run()
{
sanity_param_count = fp->Base.Parameters->NumParameters;
uint32_t orig_nr_params = c->prog_data.nr_params;
+ bool allocated_without_spills;
assign_binding_table_offsets();
@@ -3276,27 +3277,45 @@ fs_visitor::run()
assign_curb_setup();
assign_urb_setup();
- schedule_instructions(SCHEDULE_PRE_NON_LIFO);
+ static enum instruction_scheduler_mode pre_modes[] = {
+ SCHEDULE_PRE,
+ SCHEDULE_PRE_NON_LIFO,
+ SCHEDULE_PRE_LIFO,
+ };
- if (0)
- assign_regs_trivial();
- else {
- if (!assign_regs(false)) {
- /* Try a non-spilling register allocation again with a different
- * scheduling heuristic.
- */
- schedule_instructions(SCHEDULE_PRE_LIFO);
- if (!assign_regs(false)) {
- if (dispatch_width == 16) {
- fail("Failure to register allocate. Reduce number of "
- "live scalar values to avoid this.");
- } else {
- while (!assign_regs(true)) {
- if (failed)
- break;
- }
- }
- }
+ /* Try each scheduling heuristic to see if it can successfully register
+ * allocate without spilling. They should be ordered by decreasing
+ * performance but increasing likelihood of allocating.
+ */
+ for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+ schedule_instructions(pre_modes[i]);
+
+ if (0) {
+ assign_regs_trivial();
+ allocated_without_spills = true;
+ } else {
+ allocated_without_spills = assign_regs(false);
+ }
+ if (allocated_without_spills)
+ break;
+ }
+
+ if (!allocated_without_spills) {
+ /* We assume that any spilling is worse than just dropping back to
+ * SIMD8. There's probably actually some intermediate point where
+ * SIMD16 with a couple of spills is still better.
+ */
+ if (dispatch_width == 16) {
+ fail("Failure to register allocate. Reduce number of "
+ "live scalar values to avoid this.");
+ }
+
+ /* Since we're out of heuristics, just go spill registers until we
+ * get an allocation.
+ */
+ while (!assign_regs(true)) {
+ if (failed)
+ break;
}
}
}
@@ -3311,7 +3330,8 @@ fs_visitor::run()
if (failed)
return false;
- schedule_instructions(SCHEDULE_POST);
+ if (!allocated_without_spills)
+ schedule_instructions(SCHEDULE_POST);
if (dispatch_width == 8) {
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 1050d67..baf67fb 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1140,10 +1140,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
{
schedule_node *chosen = NULL;
- if (post_reg_alloc) {
+ if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
int chosen_time = 0;
- /* Of the instructions closest ready to execute or the closest to
+ /* Of the instructions ready to execute or the closest to
* being ready, choose the oldest one.
*/
foreach_list(node, &instructions) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index ae7823e..ff5af93 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -60,6 +60,7 @@ public:
};
enum instruction_scheduler_mode {
+ SCHEDULE_PRE,
SCHEDULE_PRE_NON_LIFO,
SCHEDULE_PRE_LIFO,
SCHEDULE_POST,