summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/i915
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/i915')
-rw-r--r--src/gallium/drivers/i915/Makefile1
-rw-r--r--src/gallium/drivers/i915/SConscript1
-rw-r--r--src/gallium/drivers/i915/i915_batch.h18
-rw-r--r--src/gallium/drivers/i915/i915_clear.c5
-rw-r--r--src/gallium/drivers/i915/i915_context.h2
-rw-r--r--src/gallium/drivers/i915/i915_flush.c1
-rw-r--r--src/gallium/drivers/i915/i915_fpc.h88
-rw-r--r--src/gallium/drivers/i915/i915_fpc_emit.c1
-rw-r--r--src/gallium/drivers/i915/i915_fpc_optimize.c259
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c196
-rw-r--r--src/gallium/drivers/i915/i915_prim_emit.c2
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c3
-rw-r--r--src/gallium/drivers/i915/i915_state.c2
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c97
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h6
15 files changed, 527 insertions, 155 deletions
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile
index 7781247..36197fb 100644
--- a/src/gallium/drivers/i915/Makefile
+++ b/src/gallium/drivers/i915/Makefile
@@ -27,6 +27,7 @@ C_SOURCES = \
i915_resource_buffer.c \
i915_fpc_emit.c \
i915_fpc_translate.c \
+ i915_fpc_optimize.c \
i915_surface.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript
index 9837060..76f5970 100644
--- a/src/gallium/drivers/i915/SConscript
+++ b/src/gallium/drivers/i915/SConscript
@@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary(
'i915_flush.c',
'i915_fpc_emit.c',
'i915_fpc_translate.c',
+ 'i915_fpc_optimize.c',
'i915_prim_emit.c',
'i915_prim_vbuf.c',
'i915_query.c',
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index ce2691b..a1f8bca 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -29,6 +29,7 @@
#define I915_BATCH_H
#include "i915_batchbuffer.h"
+#include "i915_context.h"
#define BEGIN_BATCH(dwords) \
@@ -49,11 +50,26 @@
#define FLUSH_BATCH(fence) \
i915_flush(i915, fence)
-
/************************************************************************
* i915_flush.c
*/
void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence);
+/*
+ * Flush if the current color buf is idle and we have more than 256 vertices
+ * queued, or if the current color buf is busy and we have more than 4096
+ * vertices queued.
+ */
+static INLINE void i915_flush_heuristically(struct i915_context* i915,
+ int num_vertex)
+{
+ struct i915_winsys *iws = i915->iws;
+ i915->vertices_since_last_flush += num_vertex;
+ if ( i915->vertices_since_last_flush > 4096
+ || ( i915->vertices_since_last_flush > 256 &&
+ !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) )
+ FLUSH_BATCH(NULL);
+}
+
#endif
diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c
index fcb208d..e1d6a74 100644
--- a/src/gallium/drivers/i915/i915_clear.c
+++ b/src/gallium/drivers/i915/i915_clear.c
@@ -120,6 +120,11 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba,
OUT_BATCH_F(desty + height);
OUT_BATCH_F(destx);
OUT_BATCH_F(desty);
+
+ /* Flush after clear, its expected to be a costly operation.
+ * This is not required, just a heuristic
+ */
+ FLUSH_BATCH(NULL);
}
/**
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index c964208..8486235 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -264,6 +264,8 @@ struct i915_context {
struct util_slab_mempool transfer_pool;
struct util_slab_mempool texture_transfer_pool;
+ int vertices_since_last_flush;
+
/** blitter/hw-clear */
struct blitter_context* blitter;
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index b4e8114..6d76afa 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -77,4 +77,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
i915->static_dirty = ~0;
/* kernel emits flushes in between batchbuffers */
i915->flush_dirty = 0;
+ i915->vertices_since_last_flush = 0;
}
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index 509395c..b760bc4 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -33,7 +33,9 @@
#include "i915_context.h"
#include "i915_reg.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
#define I915_PROGRAM_SIZE 192
@@ -207,4 +209,90 @@ extern void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
+/*======================================================================
+ * i915_fpc_optimize.c
+ */
+
+
+struct i915_src_register
+{
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned Dimension : 1; /* BOOL */
+ int Index : 16; /* SINT */
+ unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */
+ unsigned Absolute : 1; /* BOOL */
+ unsigned Negate : 1; /* BOOL */
+};
+
+/* Additional swizzle supported in i915 */
+#define TGSI_SWIZZLE_ZERO 4
+#define TGSI_SWIZZLE_ONE 5
+
+struct i915_dst_register
+{
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned Dimension : 1; /* BOOL */
+ int Index : 16; /* SINT */
+ unsigned Padding : 6;
+};
+
+
+struct i915_full_dst_register
+{
+ struct i915_dst_register Register;
+/*
+ struct tgsi_src_register Indirect;
+ struct tgsi_dimension Dimension;
+ struct tgsi_src_register DimIndirect;
+*/
+};
+
+struct i915_full_src_register
+{
+ struct i915_src_register Register;
+/*
+ struct tgsi_src_register Indirect;
+ struct tgsi_dimension Dimension;
+ struct tgsi_src_register DimIndirect;
+*/
+};
+
+struct i915_full_instruction
+{
+ struct tgsi_instruction Instruction;
+/*
+ struct tgsi_instruction_predicate Predicate;
+ struct tgsi_instruction_label Label;
+*/
+ struct tgsi_instruction_texture Texture;
+ struct i915_full_dst_register Dst[1];
+ struct i915_full_src_register Src[3];
+};
+
+
+union i915_full_token
+{
+ struct tgsi_token Token;
+ struct tgsi_full_declaration FullDeclaration;
+ struct tgsi_full_immediate FullImmediate;
+ struct i915_full_instruction FullInstruction;
+ struct tgsi_full_property FullProperty;
+};
+
+struct i915_token_list
+{
+ union i915_full_token* Tokens;
+ unsigned NumTokens;
+};
+
+extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens);
+
+extern void i915_optimize_free(struct i915_token_list* tokens);
+
#endif
diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c
index d28595e..c4a42df 100644
--- a/src/gallium/drivers/i915/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915/i915_fpc_emit.c
@@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p,
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
- printf("const %f %f %f %f\n",c0,c1,c2,c3);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf &&
ifs->constants[reg][0] == c0 &&
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
new file mode 100644
index 0000000..2b739e9
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -0,0 +1,259 @@
+/**************************************************************************
+ *
+ * Copyright 2011 The Chromium OS authors.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
+{
+ return (d1->Register.File == d2->Register.File &&
+ d1->Register.Indirect == d2->Register.Indirect &&
+ d1->Register.Dimension == d2->Register.Dimension &&
+ d1->Register.Index == d2->Register.Index);
+}
+
+static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
+{
+ return (d1->Register.File == d2->Register.File &&
+ d1->Register.Indirect == d2->Register.Indirect &&
+ d1->Register.Dimension == d2->Register.Dimension &&
+ d1->Register.Index == d2->Register.Index &&
+ d1->Register.Absolute == d2->Register.Absolute &&
+ d1->Register.Negate == d2->Register.Negate);
+}
+
+static boolean is_unswizzled(struct i915_full_src_register* r,
+ unsigned write_mask)
+{
+ if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
+ return FALSE;
+ if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
+ return FALSE;
+ if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
+ return FALSE;
+ if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
+ return FALSE;
+ return TRUE;
+}
+
+static boolean op_commutes(unsigned opcode)
+{
+ if (opcode == TGSI_OPCODE_ADD) return TRUE;
+ if (opcode == TGSI_OPCODE_MUL) return TRUE;
+ return FALSE;
+}
+
+static unsigned op_neutral_element(unsigned opcode)
+{
+ if (opcode == TGSI_OPCODE_ADD)
+ return TGSI_SWIZZLE_ZERO;
+ if (opcode == TGSI_OPCODE_MUL)
+ return TGSI_SWIZZLE_ONE;
+
+ debug_printf("Unknown opcode %d\n",opcode);
+ return TGSI_SWIZZLE_ZERO;
+}
+
+/*
+ * Sets the swizzle to the neutral element for the operation for the bits
+ * of writemask which are set, swizzle to identity otherwise.
+ */
+static void set_neutral_element_swizzle(struct i915_full_src_register* r,
+ unsigned write_mask,
+ unsigned neutral)
+{
+ if ( write_mask & TGSI_WRITEMASK_X )
+ r->Register.SwizzleX = neutral;
+ else
+ r->Register.SwizzleX = TGSI_SWIZZLE_X;
+
+ if ( write_mask & TGSI_WRITEMASK_Y )
+ r->Register.SwizzleY = neutral;
+ else
+ r->Register.SwizzleY = TGSI_SWIZZLE_Y;
+
+ if ( write_mask & TGSI_WRITEMASK_Z )
+ r->Register.SwizzleZ = neutral;
+ else
+ r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
+
+ if ( write_mask & TGSI_WRITEMASK_W )
+ r->Register.SwizzleW = neutral;
+ else
+ r->Register.SwizzleW = TGSI_SWIZZLE_W;
+}
+
+/*
+ * Optimize away things like:
+ * MUL OUT[0].xyz, TEMP[1], TEMP[2]
+ * MOV OUT[0].w, TEMP[2]
+ * into:
+ * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
+ * This is useful for optimizing texenv.
+ */
+static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
+{
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ op_commutes(current->FullInstruction.Instruction.Opcode) &&
+ current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+ same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
+ is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
+ is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
+ is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
+ {
+ next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+
+ set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
+ set_neutral_element_swizzle(&current->FullInstruction.Src[0],
+ next->FullInstruction.Dst[0].Register.WriteMask,
+ op_neutral_element(current->FullInstruction.Instruction.Opcode));
+
+ current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
+ next->FullInstruction.Dst[0].Register.WriteMask;
+ return;
+ }
+
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ op_commutes(current->FullInstruction.Instruction.Opcode) &&
+ current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+ same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
+ is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
+ is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
+ is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
+ {
+ next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+
+ set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
+ set_neutral_element_swizzle(&current->FullInstruction.Src[1],
+ next->FullInstruction.Dst[0].Register.WriteMask,
+ op_neutral_element(current->FullInstruction.Instruction.Opcode));
+
+ current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
+ next->FullInstruction.Dst[0].Register.WriteMask;
+ return;
+ }
+}
+
+static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+{
+ o->File = i->File;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+ o->SwizzleX = i->SwizzleX;
+ o->SwizzleY = i->SwizzleY;
+ o->SwizzleZ = i->SwizzleZ;
+ o->SwizzleW = i->SwizzleW;
+ o->Absolute = i->Absolute;
+ o->Negate = i->Negate;
+}
+
+static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+{
+ o->File = i->File;
+ o->WriteMask = i->WriteMask;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+}
+
+static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+{
+ memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
+ memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
+
+ copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
+
+ copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
+ copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
+ copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
+}
+
+static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+{
+ if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ memcpy(o, i, sizeof(*o));
+ else
+ copy_instruction(&o->FullInstruction, &i->FullInstruction);
+
+}
+
+struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
+{
+ struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
+ struct tgsi_parse_context parse;
+ int i = 0;
+
+ out_tokens->NumTokens = 0;
+
+ /* Count the tokens */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+ out_tokens->NumTokens++;
+ }
+ tgsi_parse_free (&parse);
+
+ /* Allocate our tokens */
+ out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
+
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+ copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
+
+ if (i > 0)
+ i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+
+ i++;
+ }
+ tgsi_parse_free (&parse);
+
+ return out_tokens;
+}
+
+void i915_optimize_free(struct i915_token_list* tokens)
+{
+ free(tokens->Tokens);
+ free(tokens);
+}
+
+
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 0cbd4f2..e19d9be 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit)
*/
static uint
src_vector(struct i915_fp_compile *p,
- const struct tgsi_full_src_register *source,
+ const struct i915_full_src_register *source,
struct i915_fragment_shader* fs)
{
uint index = source->Register.Index;
@@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p,
*/
static uint
get_result_vector(struct i915_fp_compile *p,
- const struct tgsi_full_dst_register *dest)
+ const struct i915_full_dst_register *dest)
{
switch (dest->Register.File) {
case TGSI_FILE_OUTPUT:
@@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p,
* Compute flags for saturation and writemask.
*/
static uint
-get_result_flags(const struct tgsi_full_instruction *inst)
+get_result_flags(const struct i915_full_instruction *inst)
{
const uint writeMask
= inst->Dst[0].Register.WriteMask;
@@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
*/
static void
emit_tex(struct i915_fp_compile *p,
- const struct tgsi_full_instruction *inst,
+ const struct i915_full_instruction *inst,
uint opcode,
struct i915_fragment_shader* fs)
{
@@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p,
*/
static void
emit_simple_arith(struct i915_fp_compile *p,
- const struct tgsi_full_instruction *inst,
+ const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
@@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p,
/** As above, but swap the first two src regs */
static void
emit_simple_arith_swap2(struct i915_fp_compile *p,
- const struct tgsi_full_instruction *inst,
+ const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
- struct tgsi_full_instruction inst2;
+ struct i915_full_instruction inst2;
assert(numArgs == 2);
@@ -450,13 +450,14 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
*
* Possible concerns:
*
+ * DDX, DDY -- return 0
* SIN, COS -- could use another taylor step?
* LIT -- results seem a little different to sw mesa
* LOG -- different to mesa on negative numbers, but this is conformant.
*/
static void
i915_translate_instruction(struct i915_fp_compile *p,
- const struct tgsi_full_instruction *inst,
+ const struct i915_full_instruction *inst,
struct i915_fragment_shader *fs)
{
uint writemask;
@@ -727,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_MUL, 2, fs);
break;
+ case TGSI_OPCODE_NOP:
+ break;
+
case TGSI_OPCODE_POW:
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
@@ -1043,107 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p,
}
-/**
- * Translate TGSI fragment shader into i915 hardware instructions.
- * \param p the translation state
- * \param tokens the TGSI token array
- */
-static void
-i915_translate_instructions(struct i915_fp_compile *p,
- const struct tgsi_token *tokens,
- struct i915_fragment_shader *fs)
+static void i915_translate_token(struct i915_fp_compile *p,
+ const union i915_full_token* token,
+ struct i915_fragment_shader *fs)
{
struct i915_fragment_shader *ifs = p->shader;
- struct tgsi_parse_context parse;
-
- tgsi_parse_init( &parse, tokens );
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
- tgsi_parse_token( &parse );
+ switch( token->Token.Type ) {
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ /*
+ * We only support one cbuf, but we still need to ignore the property
+ * correctly so we don't hit the assert at the end of the switch case.
+ */
+ assert(token->FullProperty.Property.PropertyName ==
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
+ break;
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_PROPERTY:
- /*
- * We only support one cbuf, but we still need to ignore the property
- * correctly so we don't hit the assert at the end of the switch case.
- */
- assert(parse.FullToken.FullProperty.Property.PropertyName ==
- TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
- break;
- case TGSI_TOKEN_TYPE_DECLARATION:
- if (parse.FullToken.FullDeclaration.Declaration.File
- == TGSI_FILE_CONSTANT) {
- uint i;
- for (i = parse.FullToken.FullDeclaration.Range.First;
- i <= parse.FullToken.FullDeclaration.Range.Last;
- i++) {
- assert(ifs->constant_flags[i] == 0x0);
- ifs->constant_flags[i] = I915_CONSTFLAG_USER;
- ifs->num_constants = MAX2(ifs->num_constants, i + 1);
- }
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (token->FullDeclaration.Declaration.File
+ == TGSI_FILE_CONSTANT) {
+ uint i;
+ for (i = token->FullDeclaration.Range.First;
+ i <= token->FullDeclaration.Range.Last;
+ i++) {
+ assert(ifs->constant_flags[i] == 0x0);
+ ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+ ifs->num_constants = MAX2(ifs->num_constants, i + 1);
}
- else if (parse.FullToken.FullDeclaration.Declaration.File
- == TGSI_FILE_TEMPORARY) {
- uint i;
- for (i = parse.FullToken.FullDeclaration.Range.First;
- i <= parse.FullToken.FullDeclaration.Range.Last;
- i++) {
- if (i >= I915_MAX_TEMPORARY)
- debug_printf("Too many temps (%d)\n",i);
- else
- /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
- p->temp_flag |= (1 << i); /* mark temp as used */
- }
+ }
+ else if (token->FullDeclaration.Declaration.File
+ == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = token->FullDeclaration.Range.First;
+ i <= token->FullDeclaration.Range.Last;
+ i++) {
+ if (i >= I915_MAX_TEMPORARY)
+ debug_printf("Too many temps (%d)\n",i);
+ else
+ /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
+ p->temp_flag |= (1 << i); /* mark temp as used */
}
- break;
+ }
+ break;
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- const struct tgsi_full_immediate *imm
- = &parse.FullToken.FullImmediate;
- const uint pos = p->num_immediates++;
- uint j;
- assert( imm->Immediate.NrTokens <= 4 + 1 );
- for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- p->immediates[pos][j] = imm->u[j].Float;
- }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm
+ = &token->FullImmediate;
+ const uint pos = p->num_immediates++;
+ uint j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
+ p->immediates[pos][j] = imm->u[j].Float;
}
- break;
+ }
+ break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (p->first_instruction) {
- /* resolve location of immediates */
- uint i, j;
- for (i = 0; i < p->num_immediates; i++) {
- /* find constant slot for this immediate */
- for (j = 0; j < I915_MAX_CONSTANT; j++) {
- if (ifs->constant_flags[j] == 0x0) {
- memcpy(ifs->constants[j],
- p->immediates[i],
- 4 * sizeof(float));
- /*printf("immediate %d maps to const %d\n", i, j);*/
- ifs->constant_flags[j] = 0xf; /* all four comps used */
- p->immediates_map[i] = j;
- ifs->num_constants = MAX2(ifs->num_constants, j + 1);
- break;
- }
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (p->first_instruction) {
+ /* resolve location of immediates */
+ uint i, j;
+ for (i = 0; i < p->num_immediates; i++) {
+ /* find constant slot for this immediate */
+ for (j = 0; j < I915_MAX_CONSTANT; j++) {
+ if (ifs->constant_flags[j] == 0x0) {
+ memcpy(ifs->constants[j],
+ p->immediates[i],
+ 4 * sizeof(float));
+ /*printf("immediate %d maps to const %d\n", i, j);*/
+ ifs->constant_flags[j] = 0xf; /* all four comps used */
+ p->immediates_map[i] = j;
+ ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+ break;
}
}
-
- p->first_instruction = FALSE;
}
- i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
- break;
-
- default:
- assert( 0 );
+ p->first_instruction = FALSE;
}
- } /* while */
+ i915_translate_instruction(p, &token->FullInstruction, fs);
+ break;
+
+ default:
+ assert( 0 );
+ }
- tgsi_parse_free (&parse);
+}
+
+/**
+ * Translate TGSI fragment shader into i915 hardware instructions.
+ * \param p the translation state
+ * \param tokens the TGSI token array
+ */
+static void
+i915_translate_instructions(struct i915_fp_compile *p,
+ const struct i915_token_list *tokens,
+ struct i915_fragment_shader *fs)
+{
+ int i;
+ for(i = 0; i<tokens->NumTokens; i++) {
+ i915_translate_token(p, &tokens->Tokens[i], fs);
+ }
}
@@ -1302,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915,
p = i915_init_compile(i915, fs);
- i915_translate_instructions(p, tokens, fs);
+ struct i915_token_list* i_tokens = i915_optimize(tokens);
+ i915_translate_instructions(p, i_tokens, fs);
i915_fixup_depth_write(p);
i915_fini_compile(i915, p);
+ i915_optimize_free(i_tokens);
}
diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c
index 85656cd..1acde97 100644
--- a/src/gallium/drivers/i915/i915_prim_emit.c
+++ b/src/gallium/drivers/i915/i915_prim_emit.c
@@ -166,6 +166,8 @@ emit_prim( struct draw_stage *stage,
for (i = 0; i < nr; i++)
emit_hw_vertex(i915, prim->v[i]);
+
+ i915_flush_heuristically(i915, nr);
}
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index 79db3b6..d8ae1de 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -487,6 +487,7 @@ draw_arrays_fallback(struct vbuf_render *render,
draw_arrays_generate_indices(render, start, nr, i915_render->fallback);
+ i915_flush_heuristically(i915, nr_indices);
out:
return;
}
@@ -534,6 +535,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
nr);
OUT_BATCH(start); /* Beginning vertex index */
+ i915_flush_heuristically(i915, nr);
out:
return;
}
@@ -657,6 +659,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render,
save_nr_indices,
i915_render->fallback);
+ i915_flush_heuristically(i915, nr_indices);
out:
return;
}
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index f412626..2812de1 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -244,7 +244,7 @@ i915_create_sampler_state(struct pipe_context *pipe,
/* Shadow:
*/
- if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
{
cso->state[0] |= (SS2_SHADOW_ENABLE |
i915_translate_shadow_compare_func(sampler->compare_func));
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 39fb13a..4f44796 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -346,97 +346,80 @@ emit_constants(struct i915_context *i915)
static const struct
{
enum pipe_format format;
- uint hw_shift_R;
- uint hw_shift_G;
- uint hw_shift_B;
- uint hw_shift_A;
+ uint hw_swizzle;
} fixup_formats[] = {
- { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */},
- { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */},
- { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */},
- { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */},
- { PIPE_FORMAT_NONE, 0, 0, 0, 0},
+ { PIPE_FORMAT_R8G8B8A8_UNORM, 0x21030000 /* BGRA */},
+ { PIPE_FORMAT_L8_UNORM, 0x00030000 /* RRRA */},
+ { PIPE_FORMAT_I8_UNORM, 0x00030000 /* RRRA */},
+ { PIPE_FORMAT_A8_UNORM, 0x33330000 /* AAAA */},
+ { PIPE_FORMAT_NONE, 0x00000000},
};
-static boolean need_fixup(struct pipe_surface* p)
+static uint need_target_fixup(struct pipe_surface* p)
{
enum pipe_format f;
-
/* if we don't have a surface bound yet, we don't need to fixup the shader */
if (!p)
- return FALSE;
+ return 0;
f = p->format;
for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
if (fixup_formats[i].format == f)
- return TRUE;
+ return 1;
- return FALSE;
+ return 0;
}
-static uint fixup_swizzle(enum pipe_format f, uint v)
+static uint fixup_swizzle(enum pipe_format f)
{
- int i;
-
- for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
+ for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
if (fixup_formats[i].format == f)
- break;
-
- if (fixup_formats[i].format == PIPE_FORMAT_NONE)
- return v;
-
- uint rgba = v & 0xFFFF0000;
+ return fixup_formats[i].hw_swizzle;
- v &= 0xFFFF;
- v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28;
- v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24;
- v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20;
- v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16;
-
- return v;
+ return 0;
}
static void
validate_program(struct i915_context *i915, unsigned *batch_space)
{
- *batch_space = i915->fs->program_len;
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ uint additional_size = need_target_fixup(cbuf_surface);
+
+ /* we need more batch space if we want to emulate rgba framebuffers */
+ *batch_space = i915->fs->program_len + 3 * additional_size;
}
static void
emit_program(struct i915_context *i915)
{
struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- boolean need_format_fixup = need_fixup(cbuf_surface);
- int i;
- int fixup_offset = -1;
+ uint target_fixup = need_target_fixup(cbuf_surface);
+ uint i;
/* we should always have, at least, a pass-through program */
assert(i915->fs->program_len > 0);
- if (need_format_fixup) {
- /* Find where we emit the output color */
- for (i = i915->fs->program_len - 3; i>0; i-=3) {
- uint instr = i915->fs->program[i];
- if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) ==
- (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) {
- /* Found it! */
- fixup_offset = i + 1;
- break;
- }
- }
- if (fixup_offset == -1) {
- need_format_fixup = FALSE;
- debug_printf("couldn't find fixup offset\n");
- }
+ {
+ /* first word has the size, we have to adjust that */
+ uint size = (i915->fs->program[0]);
+ size += target_fixup * 3;
+ OUT_BATCH(size);
}
- /* emit the program to the hw */
- for (i = 0; i < i915->fs->program_len; i++) {
- if (need_format_fixup && (i == fixup_offset) ) {
- uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]);
- OUT_BATCH(v);
- } else
- OUT_BATCH(i915->fs->program[i]);
+ /* output the declarations of the program */
+ for (i=1 ; i < i915->fs->program_len; i++)
+ OUT_BATCH(i915->fs->program[i]);
+
+ /* we emit an additional mov with swizzle to fake RGBA framebuffers */
+ if (target_fixup) {
+ /* mov out_color, out_color.zyxw */
+ OUT_BATCH(A0_MOV |
+ (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
+ A0_DEST_CHANNEL_ALL |
+ (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
+ (T_DIFFUSE << A0_SRC0_NR_SHIFT));
+ OUT_BATCH(fixup_swizzle(cbuf_surface->format));
+ OUT_BATCH(0);
}
}
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 21cfdc9..2043860 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -207,6 +207,12 @@ struct i915_winsys {
void (*buffer_destroy)(struct i915_winsys *iws,
struct i915_winsys_buffer *buffer);
+
+ /**
+ * Check if a buffer is busy.
+ */
+ boolean (*buffer_is_busy)(struct i915_winsys *iws,
+ struct i915_winsys_buffer *buffer);
/*@}*/