summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/i915/i915_fpc_optimize.c
diff options
context:
space:
mode:
authorStéphane Marchesin <marcheu@chromium.org>2011-07-04 20:21:37 -0700
committerStéphane Marchesin <marcheu@chromium.org>2011-07-06 02:49:48 -0700
commit2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b (patch)
tree8421bf310dfd85869d0f63c950957099a3b7f9b9 /src/gallium/drivers/i915/i915_fpc_optimize.c
parentcc78eb63cdc0022684ca6816f258ea4492431916 (diff)
downloadexternal_mesa3d-2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b.zip
external_mesa3d-2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b.tar.gz
external_mesa3d-2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b.tar.bz2
i915g: introduce the tiny shader optimizer.
Diffstat (limited to 'src/gallium/drivers/i915/i915_fpc_optimize.c')
-rw-r--r--src/gallium/drivers/i915/i915_fpc_optimize.c182
1 files changed, 182 insertions, 0 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
new file mode 100644
index 0000000..5c60d95
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -0,0 +1,182 @@
+/**************************************************************************
+ *
+ * Copyright 2011 The Chromium OS authors.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
+{
+ return (d1->Register.File == d2->Register.File &&
+ d1->Register.Indirect == d2->Register.Indirect &&
+ d1->Register.Dimension == d2->Register.Dimension &&
+ d1->Register.Index == d2->Register.Index);
+}
+
+static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
+{
+ return (d1->Register.File == d2->Register.File &&
+ d1->Register.Indirect == d2->Register.Indirect &&
+ d1->Register.Dimension == d2->Register.Dimension &&
+ d1->Register.Index == d2->Register.Index &&
+ d1->Register.Absolute == d2->Register.Absolute &&
+ d1->Register.Negate == d2->Register.Negate);
+}
+
+
+/*
+ * Optimize away things like:
+ * MUL OUT[0].xyz, TEMP[1], TEMP[2]
+ * MOV OUT[0].w, TEMP[2]
+ * into:
+ * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
+ * This is useful for optimizing texenv.
+ */
+static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next)
+{
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
+ next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
+ same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+ same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) )
+ {
+ next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+ current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE;
+ return;
+ }
+
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
+ next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
+ same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+ same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) )
+ {
+ next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+ current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE;
+ return;
+ }
+}
+
+static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+{
+ o->File = i->File;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+ o->SwizzleX = i->SwizzleX;
+ o->SwizzleY = i->SwizzleY;
+ o->SwizzleZ = i->SwizzleZ;
+ o->SwizzleW = i->SwizzleW;
+ o->Absolute = i->Absolute;
+ o->Negate = i->Negate;
+}
+
+static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+{
+ o->File = i->File;
+ o->WriteMask = i->WriteMask;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+}
+
+static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+{
+ memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
+ memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
+
+ copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
+
+ copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
+ copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
+ copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
+}
+
+static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+{
+ if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ memcpy(o, i, sizeof(*o));
+ else
+ copy_instruction(&o->FullInstruction, &i->FullInstruction);
+
+}
+
+struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
+{
+ struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
+ out_tokens->NumTokens = 0;
+ struct tgsi_parse_context parse;
+ int i = 0;
+
+ /* Count the tokens */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+ out_tokens->NumTokens++;
+ }
+ tgsi_parse_free (&parse);
+
+ /* Allocate our tokens */
+ out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
+
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+ copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
+
+ if (i > 0)
+ i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+
+ i++;
+ }
+ tgsi_parse_free (&parse);
+
+ return out_tokens;
+}
+
+void i915_optimize_free(struct i915_token_list* tokens)
+{
+ free(tokens->Tokens);
+ free(tokens);
+}
+
+