diff options
author | Francisco Jerez <currojerez@riseup.net> | 2015-02-11 18:15:44 +0200 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2015-02-19 14:09:12 +0200 |
commit | 6c34fd20beb74e009778870a8e30811b393f745c (patch) | |
tree | d04b93eec4c9ce12cbdb66807f8248904eeaf539 /src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | |
parent | 35a77a148f8b7ef03fe3b31d63719e0bfdf4b783 (diff) | |
download | external_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.zip external_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.tar.gz external_mesa3d-6c34fd20beb74e009778870a8e30811b393f745c.tar.bz2 |
i965/vec4: Calculate register allocation q values manually.
This fixes a regression in the running time of Piglit introduced by
commit 78e9043475d4bed8b50f7e413963c960fa0935bb, which increased the
number of register allocation classes set up by the VEC4 back-end
from 2 to 16. The algorithm used by ra_set_finalize() to calculate
them is unnecessarily expensive, do it manually like the FS back-end
does.
Reported-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 46f0bfd..a286f8a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -129,10 +129,13 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) * between them and the base GRF registers (and also each other). */ int reg = 0; + unsigned *q_values[MAX_VGRF_SIZE]; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs); + q_values[i] = new unsigned[MAX_VGRF_SIZE]; + for (int j = 0; j < class_reg_count; j++) { ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg); @@ -146,10 +149,23 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) reg++; } + + for (int j = 0; j < class_count; j++) { + /* Calculate the q values manually because the algorithm used by + * ra_set_finalize() to do it has higher complexity affecting the + * start-up time of some applications. q(i, j) is just the maximum + * number of registers from class i a register from class j can + * conflict with. + */ + q_values[i][j] = class_sizes[i] + class_sizes[j] - 1; + } } assert(reg == ra_reg_count); - ra_set_finalize(screen->vec4_reg_set.regs, NULL); + ra_set_finalize(screen->vec4_reg_set.regs, q_values); + + for (int i = 0; i < MAX_VGRF_SIZE; i++) + delete[] q_values[i]; } void |