summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2015-04-06 10:48:11 -0400
committerRob Clark <robclark@freedesktop.org>2015-04-11 11:39:46 -0400
commitf0e9a632a12798bd727799e396cde665bd960665 (patch)
tree2c34e7378cd03c5424cec08873d76aaea4ae96bc /src/gallium/drivers
parentf59613561694cc4a4b81db8a73f8afe893dbacac (diff)
downloadexternal_mesa3d-f0e9a632a12798bd727799e396cde665bd960665.zip
external_mesa3d-f0e9a632a12798bd727799e396cde665bd960665.tar.gz
external_mesa3d-f0e9a632a12798bd727799e396cde665bd960665.tar.bz2
freedreno/ir3/cp: support to swap mad src's
For a normal MAD (ie. not MADSH), if first source is gpr and second source is const, we can swap the first two sources to avoid needing a mov instruction. This gives back the biggest advantage TGSI f/e had over NIR f/e for common shaders, since TGSI f/e had this logic in the f/e. Note that doing this in copy-prop step has the advantage that it will also work for cases like: MOV TEMP[b], CONST[x] MAD TEMP[d], TEMP[a], TEMP[b], TEMP[c] Signed-off-by: Rob Clark <robclark@freedesktop.org>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/ir3/instr-a3xx.h13
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c32
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c3
4 files changed, 43 insertions, 9 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index 4d75d77..98637c7 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -676,9 +676,7 @@ static inline bool is_mad(opc_t opc)
{
switch (opc) {
case OPC_MAD_U16:
- case OPC_MADSH_U16:
case OPC_MAD_S16:
- case OPC_MADSH_M16:
case OPC_MAD_U24:
case OPC_MAD_S24:
case OPC_MAD_F16:
@@ -689,4 +687,15 @@ static inline bool is_mad(opc_t opc)
}
}
+static inline bool is_madsh(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MADSH_U16:
+ case OPC_MADSH_M16:
+ return true;
+ default:
+ return false;
+ }
+}
+
#endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index f424f73..1a8bead 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -540,10 +540,10 @@ static inline bool reg_gpr(struct ir3_register *r)
return true;
}
-/* some cat2 instructions (ie. those which are not float can embed an
+/* some cat2 instructions (ie. those which are not float) can embed an
* immediate:
*/
-static inline bool ir3_cat2_immed(opc_t opc)
+static inline bool ir3_cat2_int(opc_t opc)
{
switch (opc) {
case OPC_ADD_U:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 3eb85f6..77bfbc5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -115,7 +115,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
case 2:
valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST;
- if (ir3_cat2_immed(instr->opc))
+ if (ir3_cat2_int(instr->opc))
valid_flags |= IR3_REG_IMMED;
if (flags & ~valid_flags)
@@ -199,6 +199,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
+/* the "plain" MAD's (ie. the ones that don't shift first src prior to
+ * multiply) can swap their first two srcs if src[0] is !CONST and
+ * src[1] is CONST:
+ */
+static bool is_valid_mad(struct ir3_instruction *instr)
+{
+ return (instr->category == 3) && is_mad(instr->opc);
+}
+
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
@@ -255,8 +264,23 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
combine_flags(&new_flags, reg->flags);
- if (!valid_flags(instr, n, new_flags))
- return;
+ if (!valid_flags(instr, n, new_flags)) {
+ /* special case for "normal" mad instructions, we can
+ * try swapping the first two args if that fits better.
+ */
+ if ((n == 1) && is_valid_mad(instr) &&
+ !(instr->regs[0 + 1]->flags & IR3_REG_CONST) &&
+ valid_flags(instr, 0, new_flags)) {
+ /* swap src[0] and src[1]: */
+ struct ir3_register *tmp;
+ tmp = instr->regs[0 + 1];
+ instr->regs[0 + 1] = instr->regs[1 + 1];
+ instr->regs[1 + 1] = tmp;
+ n = 0;
+ } else {
+ return;
+ }
+ }
/* Here we handle the special case of mov from
* CONST and/or RELATIV. These need to be handled
@@ -305,7 +329,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
debug_assert((instr->category == 6) ||
((instr->category == 2) &&
- ir3_cat2_immed(instr->opc)));
+ ir3_cat2_int(instr->opc)));
if (new_flags & IR3_REG_SABS)
iim_val = abs(iim_val);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 0cda62b..9e1f45d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -74,7 +74,8 @@ int ir3_delayslots(struct ir3_instruction *assigner,
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) {
return 6;
} else if ((consumer->category == 3) &&
- is_mad(consumer->opc) && (n == 2)) {
+ (is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
+ (n == 2)) {
/* special case, 3rd src to cat3 not required on first cycle */
return 1;
} else {