summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2015-02-11 18:15:44 +0200
committerFrancisco Jerez <currojerez@riseup.net>2015-02-19 14:09:12 +0200
commit6c34fd20beb74e009778870a8e30811b393f745c (patch)
treed04b93eec4c9ce12cbdb66807f8248904eeaf539 /src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
parent35a77a148f8b7ef03fe3b31d63719e0bfdf4b783 (diff)
downloadexternal_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.zip
external_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.tar.gz
external_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.tar.bz2
i965/vec4: Calculate register allocation q values manually.
This fixes a regression in the running time of Piglit introduced by commit 78e9043475d4bed8b50f7e413963c960fa0935bb, which increased the number of register allocation classes set up by the VEC4 back-end from 2 to 16. The algorithm used by ra_set_finalize() to calculate them is unnecessarily expensive, do it manually like the FS back-end does. Reported-by: Mark Janes <mark.a.janes@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp18
1 files changed, 17 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 46f0bfd..a286f8a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -129,10 +129,13 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen)
* between them and the base GRF registers (and also each other).
*/
int reg = 0;
+ unsigned *q_values[MAX_VGRF_SIZE];
for (int i = 0; i < class_count; i++) {
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs);
+ q_values[i] = new unsigned[MAX_VGRF_SIZE];
+
for (int j = 0; j < class_reg_count; j++) {
ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg);
@@ -146,10 +149,23 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen)
reg++;
}
+
+ for (int j = 0; j < class_count; j++) {
+ /* Calculate the q values manually because the algorithm used by
+ * ra_set_finalize() to do it has higher complexity affecting the
+ * start-up time of some applications. q(i, j) is just the maximum
+ * number of registers from class i a register from class j can
+ * conflict with.
+ */
+ q_values[i][j] = class_sizes[i] + class_sizes[j] - 1;
+ }
}
assert(reg == ra_reg_count);
- ra_set_finalize(screen->vec4_reg_set.regs, NULL);
+ ra_set_finalize(screen->vec4_reg_set.regs, q_values);
+
+ for (int i = 0; i < MAX_VGRF_SIZE; i++)
+ delete[] q_values[i];
}
void