summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/compiler/radeon_optimize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_optimize.c')
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_optimize.c170
1 files changed, 169 insertions, 1 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
index 39dcb21..7df9681 100644
--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -30,7 +30,9 @@
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
+#include "radeon_list.h"
#include "radeon_swizzle.h"
+#include "radeon_variable.h"
struct src_clobbered_reads_cb_data {
rc_register_file File;
@@ -519,7 +521,8 @@ static int is_presub_candidate(
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|| inst->U.I.SaturateMode
- || inst->U.I.WriteALUResult) {
+ || inst->U.I.WriteALUResult
+ || inst->U.I.Omod) {
return 0;
}
@@ -658,6 +661,156 @@ static int peephole_add_presub_inv(
return 0;
}
+struct peephole_mul_cb_data {
+ struct rc_dst_register * Writer;
+ unsigned int Clobbered;
+};
+
+static void omod_filter_reader_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct peephole_mul_cb_data * d = userdata;
+ if (rc_src_reads_dst_mask(file, mask, index,
+ d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
+
+ d->Clobbered = 1;
+ }
+}
+
+static int peephole_mul_omod(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_mul,
+ struct rc_list * var_list)
+{
+ unsigned int chan, swz, i;
+ int const_index = -1;
+ int temp_index = -1;
+ float const_value;
+ rc_omod_op omod_op = RC_OMOD_DISABLE;
+ struct rc_list * writer_list;
+ struct rc_variable * var;
+ struct peephole_mul_cb_data cb_data;
+
+ for (i = 0; i < 2; i++) {
+ unsigned int j;
+ if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
+ && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
+ return 0;
+ }
+ if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (temp_index != -1) {
+ /* The instruction has two temp sources */
+ return 0;
+ } else {
+ temp_index = i;
+ continue;
+ }
+ }
+ /* If we get this far Src[i] must be a constant src */
+ if (inst_mul->U.I.SrcReg[i].Negate) {
+ return 0;
+ }
+ /* The constant src needs to read from the same swizzle */
+ swz = RC_SWIZZLE_UNUSED;
+ chan = 0;
+ for (j = 0; j < 4; j++) {
+ unsigned int j_swz =
+ GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
+ if (j_swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ if (swz == RC_SWIZZLE_UNUSED) {
+ swz = j_swz;
+ chan = j;
+ } else if (j_swz != swz) {
+ return 0;
+ }
+ }
+
+ if (const_index != -1) {
+ /* The instruction has two constant sources */
+ return 0;
+ } else {
+ const_index = i;
+ }
+ }
+
+ if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
+ inst_mul->U.I.SrcReg[const_index].Index)) {
+ return 0;
+ }
+ const_value = rc_get_constant_value(c,
+ inst_mul->U.I.SrcReg[const_index].Index,
+ inst_mul->U.I.SrcReg[const_index].Swizzle,
+ inst_mul->U.I.SrcReg[const_index].Negate,
+ chan);
+
+ if (const_value == 2.0f) {
+ omod_op = RC_OMOD_MUL_2;
+ } else if (const_value == 4.0f) {
+ omod_op = RC_OMOD_MUL_4;
+ } else if (const_value == 8.0f) {
+ omod_op = RC_OMOD_MUL_8;
+ } else if (const_value == (1.0f / 2.0f)) {
+ omod_op = RC_OMOD_DIV_2;
+ } else if (const_value == (1.0f / 4.0f)) {
+ omod_op = RC_OMOD_DIV_4;
+ } else if (const_value == (1.0f / 8.0f)) {
+ omod_op = RC_OMOD_DIV_8;
+ } else {
+ return 0;
+ }
+
+ writer_list = rc_variable_list_get_writers_one_reader(var_list,
+ RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
+
+ if (!writer_list) {
+ return 0;
+ }
+
+ cb_data.Clobbered = 0;
+ cb_data.Writer = &inst_mul->U.I.DstReg;
+ for (var = writer_list->Item; var; var = var->Friend) {
+ struct rc_instruction * inst;
+ const struct rc_opcode_info * info = rc_get_opcode_info(
+ var->Inst->U.I.Opcode);
+ if (info->HasTexture) {
+ return 0;
+ }
+ if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
+ return 0;
+ }
+ for (inst = inst_mul->Prev; inst != var->Inst;
+ inst = inst->Prev) {
+ rc_for_all_reads_mask(inst, omod_filter_reader_cb,
+ &cb_data);
+ if (cb_data.Clobbered) {
+ break;
+ }
+ }
+ }
+
+ if (cb_data.Clobbered) {
+ return 0;
+ }
+
+ /* Rewrite the instructions */
+ for (var = writer_list->Item; var; var = var->Friend) {
+ struct rc_variable * writer = writer_list->Item;
+ writer->Inst->U.I.Omod = omod_op;
+ writer->Inst->U.I.DstReg = inst_mul->U.I.DstReg;
+ writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
+ }
+
+ rc_remove_instruction(inst_mul);
+
+ return 1;
+}
+
/**
* @return
* 0 if inst is still part of the program.
@@ -683,6 +836,7 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
+ struct rc_list * var_list;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
@@ -697,4 +851,18 @@ void rc_optimize(struct radeon_compiler * c, void *user)
/* cur may no longer be part of the program */
}
}
+
+ if (!c->has_omod) {
+ return;
+ }
+
+ inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+ if (cur->U.I.Opcode == RC_OPCODE_MUL) {
+ var_list = rc_get_variables(c);
+ peephole_mul_omod(c, cur, var_list);
+ }
+ }
}