summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qpu.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2014-12-16 11:58:58 -0800
committerEric Anholt <eric@anholt.net>2014-12-16 13:45:41 -0800
commit1f0e1060503e9e700c22a07fa050c47ef5257a40 (patch)
tree65cc0af21837c1bdd4be36d4c0dc0acae0a3ac87 /src/gallium/drivers/vc4/vc4_qpu.c
parentf96bd9673edd79e4304d8e60a4cb4a0119b12a28 (diff)
downloadexternal_mesa3d-1f0e1060503e9e700c22a07fa050c47ef5257a40.zip
external_mesa3d-1f0e1060503e9e700c22a07fa050c47ef5257a40.tar.gz
external_mesa3d-1f0e1060503e9e700c22a07fa050c47ef5257a40.tar.bz2
vc4: Add support for turning add-based MOVs to muls for pairing.
total instructions in shared programs: 43053 -> 40795 (-5.24%) instructions in affected programs: 37996 -> 35738 (-5.94%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qpu.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.c51
1 files changed, 49 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c
index 0d9f5ec..52c06ae 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -338,6 +338,46 @@ try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
return true;
}
+static bool
+convert_mov(uint64_t *inst)
+{
+ uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
+ uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
+ uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
+
+ /* Is it a MOV? */
+ if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
+ (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
+ return false;
+ }
+
+ if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
+ return false;
+
+ /* We could maybe support this in the .8888 and .8a-.8d cases. */
+ if (*inst & QPU_PM)
+ return false;
+
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
+
+ *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
+ *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
+
+ *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
+
+ *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
+
+ if (!qpu_waddr_ignores_ws(waddr_add))
+ *inst ^= QPU_WS;
+
+ return true;
+}
+
uint64_t
qpu_merge_inst(uint64_t a, uint64_t b)
{
@@ -345,8 +385,15 @@ qpu_merge_inst(uint64_t a, uint64_t b)
bool ok = true;
if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
- QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP)
- return 0;
+ QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
+ if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
+ !(convert_mov(&a) || convert_mov(&b))) {
+ return 0;
+ } else {
+ merge = a | b;
+ }
+ }
if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)