summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-08-25 12:15:29 -0700
committerEric Anholt <eric@anholt.net>2016-08-25 17:24:11 -0700
commit074f1f3c0c2cd15213a62eb7f589423ece6391c8 (patch)
treea9539881a030d76ae91c997df80ddd3fecc505e1 /src/gallium/drivers/vc4
parent3da4e38f4864f0ae4eeeea588a000204042de5ea (diff)
downloadexternal_mesa3d-074f1f3c0c2cd15213a62eb7f589423ece6391c8.zip
external_mesa3d-074f1f3c0c2cd15213a62eb7f589423ece6391c8.tar.gz
external_mesa3d-074f1f3c0c2cd15213a62eb7f589423ece6391c8.tar.bz2
vc4: Add support for the 2-bit LOAD_IMM variants.
Extracted and fixed up from a patch by jonasarrow on github. This ended up not getting used for ddx/ddy, but seems like it might still be useful.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h26
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.c14
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_defines.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c8
6 files changed, 58 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 0919d32..9b4a28e 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -83,6 +83,8 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
[QOP_LOAD_IMM] = { "load_imm", 0, 1 },
+ [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
+ [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
[QOP_BRANCH] = { "branch", 0, 0, true },
[QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 9e61200..90cc138 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -156,8 +156,18 @@ enum qop {
*/
QOP_TEX_RESULT,
+ /* 32-bit immediate loaded to each SIMD channel */
QOP_LOAD_IMM,
+ /* 32-bit immediate divided into 16 2-bit unsigned int values and
+ * loaded to each corresponding SIMD channel.
+ */
+ QOP_LOAD_IMM_U2,
+ /* 32-bit immediate divided into 16 2-bit signed int values and
+ * loaded to each corresponding SIMD channel.
+ */
+ QOP_LOAD_IMM_I2,
+
/* Jumps to block->successor[0] if the qinst->cond (as a
* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note
* that block->successor[1] may be unset if the condition is ALWAYS.
@@ -796,6 +806,22 @@ qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)
qir_reg(QFILE_LOAD_IMM, val), c->undef));
}
+static inline struct qreg
+qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)
+{
+ return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,
+ qir_reg(QFILE_LOAD_IMM, val),
+ c->undef));
+}
+
+static inline struct qreg
+qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
+{
+ return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,
+ qir_reg(QFILE_LOAD_IMM, val),
+ c->undef));
+}
+
static inline void
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
struct qreg dest, struct qreg src)
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c
index cf74c42..d022d10 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -165,6 +165,20 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
}
uint64_t
+qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
+{
+ return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
+ QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
+qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
+{
+ return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
+ QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
qpu_branch(uint32_t cond, uint32_t target)
{
uint64_t inst = 0;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index a0aac15..437e4f5 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -143,6 +143,8 @@ uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
uint64_t qpu_branch(uint32_t cond, uint32_t target) ATTRIBUTE_CONST;
uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index 3ca5aba..e6ca345 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -246,6 +246,12 @@ enum qpu_unpack {
#define QPU_UNPACK_SHIFT 57
#define QPU_UNPACK_MASK QPU_MASK(59, 57)
+#define QPU_LOAD_IMM_MODE_SHIFT 57
+#define QPU_LOAD_IMM_MODE_MASK QPU_MASK(59, 57)
+# define QPU_LOAD_IMM_MODE_U32 0
+# define QPU_LOAD_IMM_MODE_I2 1
+# define QPU_LOAD_IMM_MODE_U2 3
+
/**
* If set, the pack field means PACK_MUL or R4 packing, instead of normal
* regfile a packing.
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 77aa4f6..f5a5b8a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -428,6 +428,14 @@ vc4_generate_code_block(struct vc4_compile *c,
queue(block, qpu_load_imm_ui(dst, qinst->src[0].index));
break;
+ case QOP_LOAD_IMM_U2:
+ queue(block, qpu_load_imm_u2(dst, qinst->src[0].index));
+ break;
+
+ case QOP_LOAD_IMM_I2:
+ queue(block, qpu_load_imm_i2(dst, qinst->src[0].index));
+ break;
+
case QOP_MS_MASK:
src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
fixup_raddr_conflict(block, dst, &src[0], &src[1],