summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2016-05-18 22:13:52 -0700
committerFrancisco Jerez <currojerez@riseup.net>2016-05-27 23:29:04 -0700
commitecd7a7255aa1d6c313ead14e1b472c073c7111ac (patch)
treebb4b0b92bad47f89218304954818ae63f5da6c3e /src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
parent0fec265373f269d116f6d4de900b208fffabe2a1 (diff)
downloadexternal_mesa3d-ecd7a7255aa1d6c313ead14e1b472c073c7111ac.zip
external_mesa3d-ecd7a7255aa1d6c313ead14e1b472c073c7111ac.tar.gz
external_mesa3d-ecd7a7255aa1d6c313ead14e1b472c073c7111ac.tar.bz2
i965/fs: Keep track of flag dependencies with byte granularity during scheduling.
This prevents false dependencies from being created between instructions that write disjoint 8-bit portions of the flag register and OTOH should make sure that the scheduler considers dependencies between instructions that write or read multiple flag subregisters at once (e.g. 32-wide predication or conditional mods). Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp41
1 files changed, 31 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 6d6a19d..8afdc25 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -914,7 +914,7 @@ fs_instruction_scheduler::calculate_deps()
*/
schedule_node *last_grf_write[grf_count * 16];
schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
- schedule_node *last_conditional_mod[2] = { NULL, NULL };
+ schedule_node *last_conditional_mod[4] = {};
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
@@ -968,8 +968,13 @@ fs_instruction_scheduler::calculate_deps()
}
}
- if (inst->reads_flag()) {
- add_dep(last_conditional_mod[inst->flag_subreg], n);
+ if (const unsigned mask = inst->flags_read(v->devinfo)) {
+ assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+ for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+ if (mask & (1 << i))
+ add_dep(last_conditional_mod[i], n);
+ }
}
if (inst->reads_accumulator_implicitly()) {
@@ -1023,9 +1028,15 @@ fs_instruction_scheduler::calculate_deps()
}
}
- if (inst->writes_flag()) {
- add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
- last_conditional_mod[inst->flag_subreg] = n;
+ if (const unsigned mask = inst->flags_written()) {
+ assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+ for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+ if (mask & (1 << i)) {
+ add_dep(last_conditional_mod[i], n, 0);
+ last_conditional_mod[i] = n;
+ }
+ }
}
if (inst->writes_accumulator_implicitly(v->devinfo) &&
@@ -1080,8 +1091,13 @@ fs_instruction_scheduler::calculate_deps()
}
}
- if (inst->reads_flag()) {
- add_dep(n, last_conditional_mod[inst->flag_subreg]);
+ if (const unsigned mask = inst->flags_read(v->devinfo)) {
+ assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+ for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+ if (mask & (1 << i))
+ add_dep(n, last_conditional_mod[i]);
+ }
}
if (inst->reads_accumulator_implicitly()) {
@@ -1132,8 +1148,13 @@ fs_instruction_scheduler::calculate_deps()
}
}
- if (inst->writes_flag()) {
- last_conditional_mod[inst->flag_subreg] = n;
+ if (const unsigned mask = inst->flags_written()) {
+ assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+ for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+ if (mask & (1 << i))
+ last_conditional_mod[i] = n;
+ }
}
if (inst->writes_accumulator_implicitly(v->devinfo)) {