summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-01-18 00:19:48 -0800
committerEric Anholt <eric@anholt.net>2011-01-18 10:17:40 -0800
commit7cf648da632595d1997d50a9fa92701ade61ff63 (patch)
tree097554fdcc250ba6428e26860b6970d0ec2ddcaa /src
parent58c988ada56114b56477983f66b4039219f1a82c (diff)
downloadexternal_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.zip
external_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.tar.gz
external_mesa3d-7cf648da632595d1997d50a9fa92701ade61ff63.tar.bz2
ra: Add an adjacency list to trade space for time in ra_simplify().
This was recommended in the original paper, but I figued "make it run" before "make it fast". Now we make it fast. Reduces the runtime of glsl-fs-convolution-1 by 12.7% +/- 0.6% (n=5).
Diffstat (limited to 'src')
-rw-r--r--src/mesa/program/register_allocate.c35
1 files changed, 21 insertions, 14 deletions
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index 3d8ccb3..5de929e 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -71,6 +71,7 @@ struct ra_class {
struct ra_node {
GLboolean *adjacency;
+ unsigned int *adjacency_list;
unsigned int class;
unsigned int adjacency_count;
unsigned int reg;
@@ -204,6 +205,14 @@ ra_set_finalize(struct ra_regs *regs)
}
}
+static void
+ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
+{
+ g->nodes[n1].adjacency[n2] = GL_TRUE;
+ g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
+ g->nodes[n1].adjacency_count++;
+}
+
struct ra_graph *
ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
{
@@ -219,7 +228,9 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
for (i = 0; i < count; i++) {
g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count);
- g->nodes[i].adjacency[i] = GL_TRUE;
+ g->nodes[i].adjacency_list = talloc_array(g, unsigned int, count);
+ g->nodes[i].adjacency_count = 0;
+ ra_add_node_adjacency(g, i, i);
g->nodes[i].reg = ~0;
}
@@ -237,13 +248,10 @@ void
ra_add_node_interference(struct ra_graph *g,
unsigned int n1, unsigned int n2)
{
- if (g->nodes[n1].adjacency[n2])
- return;
-
- g->nodes[n1].adjacency[n2] = GL_TRUE;
- g->nodes[n2].adjacency_count++;
- g->nodes[n2].adjacency[n1] = GL_TRUE;
- g->nodes[n2].adjacency_count++;
+ if (!g->nodes[n1].adjacency[n2]) {
+ ra_add_node_adjacency(g, n1, n2);
+ ra_add_node_adjacency(g, n2, n1);
+ }
}
static GLboolean pq_test(struct ra_graph *g, unsigned int n)
@@ -252,13 +260,12 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n)
unsigned int q = 0;
int n_class = g->nodes[n].class;
- for (j = 0; j < g->count; j++) {
- if (j == n || g->nodes[j].in_stack)
- continue;
+ for (j = 0; j < g->nodes[n].adjacency_count; j++) {
+ unsigned int n2 = g->nodes[n].adjacency_list[j];
+ unsigned int n2_class = g->nodes[n2].class;
- if (g->nodes[n].adjacency[j]) {
- unsigned int j_class = g->nodes[j].class;
- q += g->regs->classes[n_class]->q[j_class];
+ if (n != n2 && !g->nodes[n2].in_stack) {
+ q += g->regs->classes[n_class]->q[n2_class];
}
}