diff options
Diffstat (limited to 'src/gallium/drivers/i915')
-rw-r--r-- | src/gallium/drivers/i915/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/drivers/i915/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_batch.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_clear.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_context.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_flush.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc.h | 88 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_emit.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_optimize.c | 259 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_translate.c | 196 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_prim_emit.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_prim_vbuf.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_state.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_state_emit.c | 97 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_winsys.h | 6 |
15 files changed, 527 insertions, 155 deletions
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index 7781247..36197fb 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ i915_resource_buffer.c \ i915_fpc_emit.c \ i915_fpc_translate.c \ + i915_fpc_optimize.c \ i915_surface.c include ../../Makefile.template diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 9837060..76f5970 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary( 'i915_flush.c', 'i915_fpc_emit.c', 'i915_fpc_translate.c', + 'i915_fpc_optimize.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', 'i915_query.c', diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index ce2691b..a1f8bca 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -29,6 +29,7 @@ #define I915_BATCH_H #include "i915_batchbuffer.h" +#include "i915_context.h" #define BEGIN_BATCH(dwords) \ @@ -49,11 +50,26 @@ #define FLUSH_BATCH(fence) \ i915_flush(i915, fence) - /************************************************************************ * i915_flush.c */ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence); +/* + * Flush if the current color buf is idle and we have more than 256 vertices + * queued, or if the current color buf is busy and we have more than 4096 + * vertices queued. + */ +static INLINE void i915_flush_heuristically(struct i915_context* i915, + int num_vertex) +{ + struct i915_winsys *iws = i915->iws; + i915->vertices_since_last_flush += num_vertex; + if ( i915->vertices_since_last_flush > 4096 + || ( i915->vertices_since_last_flush > 256 && + !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) ) + FLUSH_BATCH(NULL); +} + #endif diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index fcb208d..e1d6a74 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -120,6 +120,11 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, OUT_BATCH_F(desty + height); OUT_BATCH_F(destx); OUT_BATCH_F(desty); + + /* Flush after clear, its expected to be a costly operation. + * This is not required, just a heuristic + */ + FLUSH_BATCH(NULL); } /** diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index c964208..8486235 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -264,6 +264,8 @@ struct i915_context { struct util_slab_mempool transfer_pool; struct util_slab_mempool texture_transfer_pool; + int vertices_since_last_flush; + /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index b4e8114..6d76afa 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -77,4 +77,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; + i915->vertices_since_last_flush = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 509395c..b760bc4 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -33,7 +33,9 @@ #include "i915_context.h" #include "i915_reg.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #define I915_PROGRAM_SIZE 192 @@ -207,4 +209,90 @@ extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); +/*====================================================================== + * i915_fpc_optimize.c + */ + + +struct i915_src_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ +}; + +/* Additional swizzle supported in i915 */ +#define TGSI_SWIZZLE_ZERO 4 +#define TGSI_SWIZZLE_ONE 5 + +struct i915_dst_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Padding : 6; +}; + + +struct i915_full_dst_register +{ + struct i915_dst_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_src_register +{ + struct i915_src_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_instruction +{ + struct tgsi_instruction Instruction; +/* + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; +*/ + struct tgsi_instruction_texture Texture; + struct i915_full_dst_register Dst[1]; + struct i915_full_src_register Src[3]; +}; + + +union i915_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct i915_full_instruction FullInstruction; + struct tgsi_full_property FullProperty; +}; + +struct i915_token_list +{ + union i915_full_token* Tokens; + unsigned NumTokens; +}; + +extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); + +extern void i915_optimize_free(struct i915_token_list* tokens); + #endif diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index d28595e..c4a42df 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p, // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that - printf("const %f %f %f %f\n",c0,c1,c2,c3); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c new file mode 100644 index 0000000..2b739e9 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -0,0 +1,259 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index); +} + +static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index && + d1->Register.Absolute == d2->Register.Absolute && + d1->Register.Negate == d2->Register.Negate); +} + +static boolean is_unswizzled(struct i915_full_src_register* r, + unsigned write_mask) +{ + if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) + return FALSE; + if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) + return FALSE; + return TRUE; +} + +static boolean op_commutes(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) return TRUE; + if (opcode == TGSI_OPCODE_MUL) return TRUE; + return FALSE; +} + +static unsigned op_neutral_element(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) + return TGSI_SWIZZLE_ZERO; + if (opcode == TGSI_OPCODE_MUL) + return TGSI_SWIZZLE_ONE; + + debug_printf("Unknown opcode %d\n",opcode); + return TGSI_SWIZZLE_ZERO; +} + +/* + * Sets the swizzle to the neutral element for the operation for the bits + * of writemask which are set, swizzle to identity otherwise. + */ +static void set_neutral_element_swizzle(struct i915_full_src_register* r, + unsigned write_mask, + unsigned neutral) +{ + if ( write_mask & TGSI_WRITEMASK_X ) + r->Register.SwizzleX = neutral; + else + r->Register.SwizzleX = TGSI_SWIZZLE_X; + + if ( write_mask & TGSI_WRITEMASK_Y ) + r->Register.SwizzleY = neutral; + else + r->Register.SwizzleY = TGSI_SWIZZLE_Y; + + if ( write_mask & TGSI_WRITEMASK_Z ) + r->Register.SwizzleZ = neutral; + else + r->Register.SwizzleZ = TGSI_SWIZZLE_Z; + + if ( write_mask & TGSI_WRITEMASK_W ) + r->Register.SwizzleW = neutral; + else + r->Register.SwizzleW = TGSI_SWIZZLE_W; +} + +/* + * Optimize away things like: + * MUL OUT[0].xyz, TEMP[1], TEMP[2] + * MOV OUT[0].w, TEMP[2] + * into: + * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] + * This is useful for optimizing texenv. + */ +static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) +{ + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; + return; + } + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; + return; + } +} + +static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +{ + o->File = i->File; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; + o->SwizzleX = i->SwizzleX; + o->SwizzleY = i->SwizzleY; + o->SwizzleZ = i->SwizzleZ; + o->SwizzleW = i->SwizzleW; + o->Absolute = i->Absolute; + o->Negate = i->Negate; +} + +static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +{ + o->File = i->File; + o->WriteMask = i->WriteMask; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; +} + +static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +{ + memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); + memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); + + copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); + + copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); + copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); + copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); +} + +static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +{ + if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + memcpy(o, i, sizeof(*o)); + else + copy_instruction(&o->FullInstruction, &i->FullInstruction); + +} + +struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) +{ + struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); + struct tgsi_parse_context parse; + int i = 0; + + out_tokens->NumTokens = 0; + + /* Count the tokens */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + out_tokens->NumTokens++; + } + tgsi_parse_free (&parse); + + /* Allocate our tokens */ + out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); + + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + copy_token(&out_tokens->Tokens[i] , &parse.FullToken); + + if (i > 0) + i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + + i++; + } + tgsi_parse_free (&parse); + + return out_tokens; +} + +void i915_optimize_free(struct i915_token_list* tokens) +{ + free(tokens->Tokens); + free(tokens); +} + + diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 0cbd4f2..e19d9be 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source, + const struct i915_full_src_register *source, struct i915_fragment_shader* fs) { uint index = source->Register.Index; @@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p, */ static uint get_result_vector(struct i915_fp_compile *p, - const struct tgsi_full_dst_register *dest) + const struct i915_full_dst_register *dest) { switch (dest->Register.File) { case TGSI_FILE_OUTPUT: @@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p, * Compute flags for saturation and writemask. */ static uint -get_result_flags(const struct tgsi_full_instruction *inst) +get_result_flags(const struct i915_full_instruction *inst) { const uint writeMask = inst->Dst[0].Register.WriteMask; @@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) */ static void emit_tex(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, struct i915_fragment_shader* fs) { @@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p, */ static void emit_simple_arith(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { @@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p, /** As above, but swap the first two src regs */ static void emit_simple_arith_swap2(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { - struct tgsi_full_instruction inst2; + struct i915_full_instruction inst2; assert(numArgs == 2); @@ -450,13 +450,14 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, * * Possible concerns: * + * DDX, DDY -- return 0 * SIN, COS -- could use another taylor step? * LIT -- results seem a little different to sw mesa * LOG -- different to mesa on negative numbers, but this is conformant. */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, struct i915_fragment_shader *fs) { uint writemask; @@ -727,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_MUL, 2, fs); break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -1043,107 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p, } -/** - * Translate TGSI fragment shader into i915 hardware instructions. - * \param p the translation state - * \param tokens the TGSI token array - */ -static void -i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens, - struct i915_fragment_shader *fs) +static void i915_translate_token(struct i915_fp_compile *p, + const union i915_full_token* token, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; - struct tgsi_parse_context parse; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); + switch( token->Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(token->FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_PROPERTY: - /* - * We only support one cbuf, but we still need to ignore the property - * correctly so we don't hit the assert at the end of the switch case. - */ - assert(parse.FullToken.FullProperty.Property.PropertyName == - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_CONSTANT) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - assert(ifs->constant_flags[i] == 0x0); - ifs->constant_flags[i] = I915_CONSTFLAG_USER; - ifs->num_constants = MAX2(ifs->num_constants, i + 1); - } + case TGSI_TOKEN_TYPE_DECLARATION: + if (token->FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_TEMPORARY) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - if (i >= I915_MAX_TEMPORARY) - debug_printf("Too many temps (%d)\n",i); - else - /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ - p->temp_flag |= (1 << i); /* mark temp as used */ - } + } + else if (token->FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + if (i >= I915_MAX_TEMPORARY) + debug_printf("Too many temps (%d)\n",i); + else + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ } - break; + } + break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm - = &parse.FullToken.FullImmediate; - const uint pos = p->num_immediates++; - uint j; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u[j].Float; - } + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &token->FullImmediate; + const uint pos = p->num_immediates++; + uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { + p->immediates[pos][j] = imm->u[j].Float; } - break; + } + break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (p->first_instruction) { - /* resolve location of immediates */ - uint i, j; - for (i = 0; i < p->num_immediates; i++) { - /* find constant slot for this immediate */ - for (j = 0; j < I915_MAX_CONSTANT; j++) { - if (ifs->constant_flags[j] == 0x0) { - memcpy(ifs->constants[j], - p->immediates[i], - 4 * sizeof(float)); - /*printf("immediate %d maps to const %d\n", i, j);*/ - ifs->constant_flags[j] = 0xf; /* all four comps used */ - p->immediates_map[i] = j; - ifs->num_constants = MAX2(ifs->num_constants, j + 1); - break; - } + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; } } - - p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); - break; - - default: - assert( 0 ); + p->first_instruction = FALSE; } - } /* while */ + i915_translate_instruction(p, &token->FullInstruction, fs); + break; + + default: + assert( 0 ); + } - tgsi_parse_free (&parse); +} + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct i915_token_list *tokens, + struct i915_fragment_shader *fs) +{ + int i; + for(i = 0; i<tokens->NumTokens; i++) { + i915_translate_token(p, &tokens->Tokens[i], fs); + } } @@ -1302,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); - i915_translate_instructions(p, tokens, fs); + struct i915_token_list* i_tokens = i915_optimize(tokens); + i915_translate_instructions(p, i_tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); + i915_optimize_free(i_tokens); } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index 85656cd..1acde97 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -166,6 +166,8 @@ emit_prim( struct draw_stage *stage, for (i = 0; i < nr; i++) emit_hw_vertex(i915, prim->v[i]); + + i915_flush_heuristically(i915, nr); } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 79db3b6..d8ae1de 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -487,6 +487,7 @@ draw_arrays_fallback(struct vbuf_render *render, draw_arrays_generate_indices(render, start, nr, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } @@ -534,6 +535,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, nr); OUT_BATCH(start); /* Beginning vertex index */ + i915_flush_heuristically(i915, nr); out: return; } @@ -657,6 +659,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, save_nr_indices, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index f412626..2812de1 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -244,7 +244,7 @@ i915_create_sampler_state(struct pipe_context *pipe, /* Shadow: */ - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { cso->state[0] |= (SS2_SHADOW_ENABLE | i915_translate_shadow_compare_func(sampler->compare_func)); diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 39fb13a..4f44796 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -346,97 +346,80 @@ emit_constants(struct i915_context *i915) static const struct { enum pipe_format format; - uint hw_shift_R; - uint hw_shift_G; - uint hw_shift_B; - uint hw_shift_A; + uint hw_swizzle; } fixup_formats[] = { - { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */}, - { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */}, - { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */}, - { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */}, - { PIPE_FORMAT_NONE, 0, 0, 0, 0}, + { PIPE_FORMAT_R8G8B8A8_UNORM, 0x21030000 /* BGRA */}, + { PIPE_FORMAT_L8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_I8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_A8_UNORM, 0x33330000 /* AAAA */}, + { PIPE_FORMAT_NONE, 0x00000000}, }; -static boolean need_fixup(struct pipe_surface* p) +static uint need_target_fixup(struct pipe_surface* p) { enum pipe_format f; - /* if we don't have a surface bound yet, we don't need to fixup the shader */ if (!p) - return FALSE; + return 0; f = p->format; for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) if (fixup_formats[i].format == f) - return TRUE; + return 1; - return FALSE; + return 0; } -static uint fixup_swizzle(enum pipe_format f, uint v) +static uint fixup_swizzle(enum pipe_format f) { - int i; - - for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) if (fixup_formats[i].format == f) - break; - - if (fixup_formats[i].format == PIPE_FORMAT_NONE) - return v; - - uint rgba = v & 0xFFFF0000; + return fixup_formats[i].hw_swizzle; - v &= 0xFFFF; - v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28; - v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24; - v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20; - v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16; - - return v; + return 0; } static void validate_program(struct i915_context *i915, unsigned *batch_space) { - *batch_space = i915->fs->program_len; + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + uint additional_size = need_target_fixup(cbuf_surface); + + /* we need more batch space if we want to emulate rgba framebuffers */ + *batch_space = i915->fs->program_len + 3 * additional_size; } static void emit_program(struct i915_context *i915) { struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - boolean need_format_fixup = need_fixup(cbuf_surface); - int i; - int fixup_offset = -1; + uint target_fixup = need_target_fixup(cbuf_surface); + uint i; /* we should always have, at least, a pass-through program */ assert(i915->fs->program_len > 0); - if (need_format_fixup) { - /* Find where we emit the output color */ - for (i = i915->fs->program_len - 3; i>0; i-=3) { - uint instr = i915->fs->program[i]; - if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) == - (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) { - /* Found it! */ - fixup_offset = i + 1; - break; - } - } - if (fixup_offset == -1) { - need_format_fixup = FALSE; - debug_printf("couldn't find fixup offset\n"); - } + { + /* first word has the size, we have to adjust that */ + uint size = (i915->fs->program[0]); + size += target_fixup * 3; + OUT_BATCH(size); } - /* emit the program to the hw */ - for (i = 0; i < i915->fs->program_len; i++) { - if (need_format_fixup && (i == fixup_offset) ) { - uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]); - OUT_BATCH(v); - } else - OUT_BATCH(i915->fs->program[i]); + /* output the declarations of the program */ + for (i=1 ; i < i915->fs->program_len; i++) + OUT_BATCH(i915->fs->program[i]); + + /* we emit an additional mov with swizzle to fake RGBA framebuffers */ + if (target_fixup) { + /* mov out_color, out_color.zyxw */ + OUT_BATCH(A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)); + OUT_BATCH(fixup_swizzle(cbuf_surface->format)); + OUT_BATCH(0); } } diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 21cfdc9..2043860 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -207,6 +207,12 @@ struct i915_winsys { void (*buffer_destroy)(struct i915_winsys *iws, struct i915_winsys_buffer *buffer); + + /** + * Check if a buffer is busy. + */ + boolean (*buffer_is_busy)(struct i915_winsys *iws, + struct i915_winsys_buffer *buffer); /*@}*/ |