summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-10-05 09:07:46 -0700
committerEric Anholt <eric@anholt.net>2016-10-06 18:09:24 -0700
commitd4ae5ca823227214dd1f536e5f4058bede20b2dd (patch)
treef020f96d93ff05007d78f954100852b82faaae5f
parent06cc3dfda49b557b177ea6a5bc4fb87e087df21a (diff)
downloadexternal_mesa3d-d4ae5ca823227214dd1f536e5f4058bede20b2dd.zip
external_mesa3d-d4ae5ca823227214dd1f536e5f4058bede20b2dd.tar.gz
external_mesa3d-d4ae5ca823227214dd1f536e5f4058bede20b2dd.tar.bz2
vc4: Fix live intervals analysis for screening defs in if statements.
If a conditional assignment is only conditioned on the exec mask, that's still screening off the value in the executed channels (and, since we're not storing to the unexcuted channels, we don't care what's in there). Fixes a bunch of extra register pressure on Processing's Ribbons demo, which is failing to allocate.
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h7
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_live_variables.c13
3 files changed, 20 insertions, 5 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a91e620..81ac070 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -169,8 +169,11 @@ ntq_store_dest(struct vc4_compile *c, nir_dest *dest, int chan,
* channel is active.
*/
if (c->execute.file != QFILE_NULL) {
+ struct qinst *mov;
+
qir_SF(c, c->execute);
- qir_MOV_cond(c, QPU_COND_ZS, qregs[chan], result);
+ mov = qir_MOV_cond(c, QPU_COND_ZS, qregs[chan], result);
+ mov->cond_is_exec_mask = true;
} else {
qir_MOV_dest(c, qregs[chan], result);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 3665216..4d41c42 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -195,6 +195,7 @@ struct qinst {
struct qreg dst;
struct qreg *src;
bool sf;
+ bool cond_is_exec_mask;
uint8_t cond;
};
@@ -835,11 +836,13 @@ qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
QPU_SMALL_IMM_MUL_ROT + rot)));
}
-static inline void
+static inline struct qinst *
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
struct qreg dest, struct qreg src)
{
- qir_MOV_dest(c, dest, src)->cond = cond;
+ struct qinst *mov = qir_MOV_dest(c, dest, src);
+ mov->cond = cond;
+ return mov;
}
static inline struct qinst *
diff --git a/src/gallium/drivers/vc4/vc4_qir_live_variables.c b/src/gallium/drivers/vc4/vc4_qir_live_variables.c
index eac350a..beefb0d 100644
--- a/src/gallium/drivers/vc4/vc4_qir_live_variables.c
+++ b/src/gallium/drivers/vc4/vc4_qir_live_variables.c
@@ -113,8 +113,17 @@ qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
return;
- /* Easy, common case: unconditional full register update. */
- if (inst->cond == QPU_COND_ALWAYS && !inst->dst.pack) {
+ /* Easy, common case: unconditional full register update.
+ *
+ * We treat conditioning on the exec mask as the same as not being
+ * conditional. This makes sure that if the register gets set on
+ * either side of an if, it is treated as being screened off before
+ * the if. Otherwise, if there was no intervening def, its live
+ * interval doesn't extend back to the start of he program, and if too
+ * many registers did that we'd fail to register allocate.
+ */
+ if ((inst->cond == QPU_COND_ALWAYS ||
+ inst->cond_is_exec_mask) && !inst->dst.pack) {
BITSET_SET(block->def, var);
return;
}