summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qir.h
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-12-22 13:37:36 -0800
committerEric Anholt <eric@anholt.net>2016-01-06 12:39:51 -0800
commit71db7d3dc577e48da3689fd66989ec3b0a069089 (patch)
tree450fb776d1617cbdec4160d81a87480599c7dbb1 /src/gallium/drivers/vc4/vc4_qir.h
parent0a89f307f95de3a3357d834f36c60fe803895f8a (diff)
downloadexternal_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.zip
external_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.tar.gz
external_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.tar.bz2
vc4: Replace the SSA-style SEL operators with conditional MOVs.
I'm moving away from QIR being SSA (since NIR is doing lots of SSA optimization for us now) and instead having QIR just be QPU operations with virtual registers. By making our SELs be composed of two MOVs, we could potentially coalesce the registers for the MOV's src and dst and eliminate the MOV. total instructions in shared programs: 88448 -> 88028 (-0.47%) instructions in affected programs: 39845 -> 39425 (-1.05%) total estimated cycles in shared programs: 246306 -> 245762 (-0.22%) estimated cycles in affected programs: 162887 -> 162343 (-0.33%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qir.h')
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h50
1 files changed, 18 insertions, 32 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index b0fbb4c..9dad80d 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -93,23 +93,6 @@ enum qop {
QOP_XOR,
QOP_NOT,
- /* Note: Orderings of these compares must be the same as in
- * qpu_defines.h. Selects the src[0] if the ns flag bit is set,
- * otherwise 0. */
- QOP_SEL_X_0_ZS,
- QOP_SEL_X_0_ZC,
- QOP_SEL_X_0_NS,
- QOP_SEL_X_0_NC,
- QOP_SEL_X_0_CS,
- QOP_SEL_X_0_CC,
- /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
- QOP_SEL_X_Y_ZS,
- QOP_SEL_X_Y_ZC,
- QOP_SEL_X_Y_NS,
- QOP_SEL_X_Y_NC,
- QOP_SEL_X_Y_CS,
- QOP_SEL_X_Y_CC,
-
QOP_FTOI,
QOP_ITOF,
QOP_RCP,
@@ -170,6 +153,7 @@ struct qinst {
struct qreg dst;
struct qreg *src;
bool sf;
+ uint8_t cond;
};
enum qstage {
@@ -463,9 +447,11 @@ void qir_schedule_instructions(struct vc4_compile *c);
void qir_reorder_uniforms(struct vc4_compile *c);
void qir_emit(struct vc4_compile *c, struct qinst *inst);
-static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
+static inline struct qinst *
+qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
{
list_addtail(&inst->link, &c->instructions);
+ return inst;
}
struct qreg qir_get_temp(struct vc4_compile *c);
@@ -536,11 +522,12 @@ qir_##name(struct vc4_compile *c, struct qreg a) \
qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \
return t; \
} \
-static inline void \
+static inline struct qinst * \
qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
struct qreg a) \
{ \
- qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \
+ return qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, \
+ c->undef)); \
}
#define QIR_ALU2(name) \
@@ -592,18 +579,6 @@ QIR_ALU2(V8MAX)
QIR_ALU2(V8ADDS)
QIR_ALU2(V8SUBS)
QIR_ALU2(MUL24)
-QIR_ALU1(SEL_X_0_ZS)
-QIR_ALU1(SEL_X_0_ZC)
-QIR_ALU1(SEL_X_0_NS)
-QIR_ALU1(SEL_X_0_NC)
-QIR_ALU1(SEL_X_0_CS)
-QIR_ALU1(SEL_X_0_CC)
-QIR_ALU2(SEL_X_Y_ZS)
-QIR_ALU2(SEL_X_Y_ZC)
-QIR_ALU2(SEL_X_Y_NS)
-QIR_ALU2(SEL_X_Y_NC)
-QIR_ALU2(SEL_X_Y_CS)
-QIR_ALU2(SEL_X_Y_CC)
QIR_ALU2(FMIN)
QIR_ALU2(FMAX)
QIR_ALU2(FMINABS)
@@ -648,6 +623,17 @@ QIR_NODST_1(TLB_STENCIL_SETUP)
QIR_NODST_1(MS_MASK)
static inline struct qreg
+qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
+{
+ struct qreg t = qir_get_temp(c);
+ struct qinst *a = qir_MOV_dest(c, t, src0);
+ struct qinst *b = qir_MOV_dest(c, t, src1);
+ a->cond = cond;
+ b->cond = cond ^ 1;
+ return t;
+}
+
+static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
struct qreg t = qir_FMOV(c, src);