diff options
author | Eric Anholt <eric@anholt.net> | 2011-01-18 00:19:48 -0800 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2011-01-18 10:17:40 -0800 |
commit | 7cf648da632595d1997d50a9fa92701ade61ff63 (patch) | |
tree | 097554fdcc250ba6428e26860b6970d0ec2ddcaa /src/mesa/program | |
parent | 58c988ada56114b56477983f66b4039219f1a82c (diff) | |
download | external_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.zip external_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.tar.gz external_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.tar.bz2 |
ra: Add an adjacency list to trade space for time in ra_simplify().
This was recommended in the original paper, but I figued "make it run"
before "make it fast". Now we make it fast. Reduces the runtime of
glsl-fs-convolution-1 by 12.7% +/- 0.6% (n=5).
Diffstat (limited to 'src/mesa/program')
-rw-r--r-- | src/mesa/program/register_allocate.c | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index 3d8ccb3..5de929e 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -71,6 +71,7 @@ struct ra_class { struct ra_node { GLboolean *adjacency; + unsigned int *adjacency_list; unsigned int class; unsigned int adjacency_count; unsigned int reg; @@ -204,6 +205,14 @@ ra_set_finalize(struct ra_regs *regs) } } +static void +ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) +{ + g->nodes[n1].adjacency[n2] = GL_TRUE; + g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; + g->nodes[n1].adjacency_count++; +} + struct ra_graph * ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) { @@ -219,7 +228,9 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) for (i = 0; i < count; i++) { g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count); - g->nodes[i].adjacency[i] = GL_TRUE; + g->nodes[i].adjacency_list = talloc_array(g, unsigned int, count); + g->nodes[i].adjacency_count = 0; + ra_add_node_adjacency(g, i, i); g->nodes[i].reg = ~0; } @@ -237,13 +248,10 @@ void ra_add_node_interference(struct ra_graph *g, unsigned int n1, unsigned int n2) { - if (g->nodes[n1].adjacency[n2]) - return; - - g->nodes[n1].adjacency[n2] = GL_TRUE; - g->nodes[n2].adjacency_count++; - g->nodes[n2].adjacency[n1] = GL_TRUE; - g->nodes[n2].adjacency_count++; + if (!g->nodes[n1].adjacency[n2]) { + ra_add_node_adjacency(g, n1, n2); + ra_add_node_adjacency(g, n2, n1); + } } static GLboolean pq_test(struct ra_graph *g, unsigned int n) @@ -252,13 +260,12 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n) unsigned int q = 0; int n_class = g->nodes[n].class; - for (j = 0; j < g->count; j++) { - if (j == n || g->nodes[j].in_stack) - continue; + for (j = 0; j < g->nodes[n].adjacency_count; j++) { + unsigned int n2 = g->nodes[n].adjacency_list[j]; + unsigned int n2_class = g->nodes[n2].class; - if (g->nodes[n].adjacency[j]) { - unsigned int j_class = g->nodes[j].class; - q += g->regs->classes[n_class]->q[j_class]; + if (n != n2 && !g->nodes[n2].in_stack) { + q += g->regs->classes[n_class]->q[n2_class]; } } |