diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
12 files changed, 156 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 28d132a..8b73409 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -34,8 +34,6 @@ * \author Ben Skeggs <darktama@iinet.net.au> * * \author Jerome Glisse <j.glisse@gmail.com> - * - * \todo FogOption */ #include "r300_fragprog.h" @@ -108,6 +106,7 @@ static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * { switch(opcode) { case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; @@ -127,6 +126,7 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler { switch(opcode) { case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 8ad2175..654f9a0 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -273,6 +273,7 @@ static void ei_mad(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int * inst) { + unsigned int i; /* Remarks about hardware limitations of MAD * (please preserve this comment, as this information is _NOT_ * in the documentation provided by AMD). @@ -318,6 +319,23 @@ static void ei_mad(struct r300_vertex_program_code *vp, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; + + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = + vpi->SrcReg[j].Index; + break; + } + } + } } inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 301b444..c7f79bc 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -93,6 +93,7 @@ static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, r { switch(opcode) { case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; @@ -114,6 +115,7 @@ static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, { switch(opcode) { case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; case RC_OPCODE_COS: return R500_ALPHA_OP_COS; case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; @@ -197,11 +199,14 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ if (!src.Used) - return 0; + return 1 << 7; if (src.File == RC_FILE_CONSTANT) { - return src.Index | 0x100; + return src.Index | R500_RGB_ADDR0_CONST; } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); return src.Index; @@ -259,7 +264,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair } code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; - code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Nop) { code->inst[ip].inst0 |= R500_INST_NOP; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 35360aa..67e6acf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -132,10 +132,10 @@ typedef enum { struct r300_fragment_program_external_state { struct { /** - * If the sampler is used as a shadow sampler, - * this field contains swizzle depending on the depth texture mode. + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) */ - unsigned depth_texture_swizzle:12; + unsigned texture_swizzle:12; /** * If the sampler is used as a shadow sampler, @@ -172,6 +172,12 @@ struct r300_fragment_program_external_state { * and right before texture fetch. The scaling factor is given by * RC_STATE_R300_TEXSCALE_FACTOR. */ unsigned clamp_and_scale_before_fetch : 1; + + /** + * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM + * in the shader. + */ + unsigned convert_unorm_to_snorm:1; } unit[16]; unsigned frag_clamp:1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 79cd799..b793672 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -483,7 +483,7 @@ void rc_validate_final_shader(struct radeon_compiler *c, void *user) { /* Check the number of constants. */ if (c->Program.Constants.Count > c->max_constants) { - rc_error(c, "Too many constants. Max: 256, Got: %i\n", - c->Program.Constants.Count); + rc_error(c, "Too many constants. Max: %i, Got: %i\n", + c->max_constants, c->Program.Constants.Count); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index c080d5a..d1a7eab 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -704,9 +704,16 @@ static void get_readers_for_single_write( &d->BranchMasks[branch_depth]; if (masks->HasElse) { + /* Abort on read for components that + * were written in the IF block. */ d->ReaderData->AbortOnRead |= masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that + * were written in the ELSE block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask + & ~d->AliveWriteMask; d->AliveWriteMask = masks->IfWriteMask ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 25afd27..e3e498e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -81,6 +81,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_COS, .Name = "COS", .NumSrcRegs = 1, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 7e66610..b586882 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -56,6 +56,9 @@ typedef enum { /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, + /** scalar instruction: dst = cos(src0.x) */ RC_OPCODE_COS, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index c4e6a5e..79898e1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -509,10 +509,34 @@ static int is_presub_candidate( { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode) return 0; + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + for(i = 0; i < info->NumSrcRegs; i++) { struct rc_src_register src = inst->U.I.SrcReg[i]; if (src_reads_dst_mask(src, inst->U.I.DstReg)) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index 9e03eb1..2dae56a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -99,6 +99,7 @@ static void classify_instruction(struct rc_sub_instruction * inst, switch(inst->Opcode) { case RC_OPCODE_ADD: case RC_OPCODE_CMP: + case RC_OPCODE_CND: case RC_OPCODE_DDX: case RC_OPCODE_DDY: case RC_OPCODE_FRC: @@ -289,7 +290,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } if (needalpha) { - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); if (pair->Alpha.WriteMask) { pair->Alpha.DestIndex = inst->DstReg.Index; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 6708b16..d1a435f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -71,7 +71,7 @@ struct rc_pair_instruction_arg { struct rc_pair_sub_instruction { unsigned int Opcode:8; unsigned int DestIndex:RC_REGISTER_INDEX_BITS; - unsigned int WriteMask:3; + unsigned int WriteMask:4; unsigned int Target:2; unsigned int OutputWriteMask:3; unsigned int DepthWriteMask:1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 1cf77d9..cef448e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -32,8 +32,8 @@ /* Series of transformations to be done on textures. */ -static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, - int tmu) +static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, + int tmu) { struct rc_src_register reg = { 0, }; @@ -46,6 +46,20 @@ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compil reg.File = RC_FILE_NONE; reg.Swizzle = RC_SWIZZLE_0000; } + + reg.Swizzle = combine_swizzles(reg.Swizzle, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, + compiler->state.unit[tmu].texture_swizzle); return reg; } @@ -141,10 +155,9 @@ int radeonTransformTEX( inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); } else { - inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); } return 1; @@ -157,9 +170,11 @@ int radeonTransformTEX( /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; /* Redirect TEX to a new temp. */ tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; @@ -235,15 +250,15 @@ int radeonTransformTEX( inst_cmp = rc_insert_new_instruction(c, inst_add); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[0].Swizzle = combine_swizzles(RC_SWIZZLE_WWWW, - compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle); - inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); assert(tmp_texsample != tmp_sum); } @@ -396,6 +411,7 @@ int radeonTransformTEX( inst->U.I.SrcReg[0].Index = temp; } + /* NPOT -> POT conversion for 3D textures. */ if (inst->U.I.Opcode != RC_OPCODE_KIL && compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { struct rc_instruction *inst_mov; @@ -425,6 +441,53 @@ int radeonTransformTEX( scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); } + /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. + * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { + unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); + + inst_mul = rc_insert_new_instruction(c, inst); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ + inst_mul->U.I.SrcReg[1].Index = two; + inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; + + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; + + inst_cnd = rc_insert_new_instruction(c, inst_mad); + inst_cnd->U.I.Opcode = RC_OPCODE_CND; + inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_cnd->U.I.DstReg = inst->U.I.DstReg; + inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + /* Cannot write texture to output registers or with saturate (all chips), * or with masks (non-r500). */ if (inst->U.I.Opcode != RC_OPCODE_KIL && |