summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c24
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c81
12 files changed, 156 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 28d132a..8b73409 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -34,8 +34,6 @@
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
- *
- * \todo FogOption
*/
#include "r300_fragprog.h"
@@ -108,6 +106,7 @@ static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler *
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
@@ -127,6 +126,7 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 8ad2175..654f9a0 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -273,6 +273,7 @@ static void ei_mad(struct r300_vertex_program_code *vp,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
+ unsigned int i;
/* Remarks about hardware limitations of MAD
* (please preserve this comment, as this information is _NOT_
* in the documentation provided by AMD).
@@ -318,6 +319,23 @@ static void ei_mad(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
+
+ /* Arguments with constant swizzles still count as a unique
+ * temporary, so we should make sure these arguments share a
+ * register index with one of the other arguments. */
+ for (i = 0; i < 3; i++) {
+ unsigned int j;
+ if (vpi->SrcReg[i].File != RC_FILE_NONE)
+ continue;
+
+ for (j = 0; j < 3; j++) {
+ if (i != j) {
+ vpi->SrcReg[i].Index =
+ vpi->SrcReg[j].Index;
+ break;
+ }
+ }
+ }
}
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 301b444..c7f79bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -93,6 +93,7 @@ static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, r
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
@@ -114,6 +115,7 @@ static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c,
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
@@ -197,11 +199,14 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ /* From docs:
+ * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
+ * MSB = 1 << 7 */
if (!src.Used)
- return 0;
+ return 1 << 7;
if (src.File == RC_FILE_CONSTANT) {
- return src.Index | 0x100;
+ return src.Index | R500_RGB_ADDR0_CONST;
} else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
return src.Index;
@@ -259,7 +264,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
}
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
- code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+ code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
+ code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Nop) {
code->inst[ip].inst0 |= R500_INST_NOP;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 35360aa..67e6acf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -132,10 +132,10 @@ typedef enum {
struct r300_fragment_program_external_state {
struct {
/**
- * If the sampler is used as a shadow sampler,
- * this field contains swizzle depending on the depth texture mode.
+ * This field contains swizzle for some lowering passes
+ * (shadow comparison, unorm->snorm conversion)
*/
- unsigned depth_texture_swizzle:12;
+ unsigned texture_swizzle:12;
/**
* If the sampler is used as a shadow sampler,
@@ -172,6 +172,12 @@ struct r300_fragment_program_external_state {
* and right before texture fetch. The scaling factor is given by
* RC_STATE_R300_TEXSCALE_FACTOR. */
unsigned clamp_and_scale_before_fetch : 1;
+
+ /**
+ * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
+ * in the shader.
+ */
+ unsigned convert_unorm_to_snorm:1;
} unit[16];
unsigned frag_clamp:1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 79cd799..b793672 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -483,7 +483,7 @@ void rc_validate_final_shader(struct radeon_compiler *c, void *user)
{
/* Check the number of constants. */
if (c->Program.Constants.Count > c->max_constants) {
- rc_error(c, "Too many constants. Max: 256, Got: %i\n",
- c->Program.Constants.Count);
+ rc_error(c, "Too many constants. Max: %i, Got: %i\n",
+ c->max_constants, c->Program.Constants.Count);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index c080d5a..d1a7eab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -704,9 +704,16 @@ static void get_readers_for_single_write(
&d->BranchMasks[branch_depth];
if (masks->HasElse) {
+ /* Abort on read for components that
+ * were written in the IF block. */
d->ReaderData->AbortOnRead |=
masks->IfWriteMask
& ~masks->ElseWriteMask;
+ /* Abort on read for components that
+ * were written in the ELSE block. */
+ d->ReaderData->AbortOnRead |=
+ masks->ElseWriteMask
+ & ~d->AliveWriteMask;
d->AliveWriteMask = masks->IfWriteMask
^ ((masks->IfWriteMask ^
masks->ElseWriteMask)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 25afd27..e3e498e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -81,6 +81,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_CND,
+ .Name = "CND",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_COS,
.Name = "COS",
.NumSrcRegs = 1,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 7e66610..b586882 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -56,6 +56,9 @@ typedef enum {
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
+ /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
+ RC_OPCODE_CND,
+
/** scalar instruction: dst = cos(src0.x) */
RC_OPCODE_COS,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index c4e6a5e..79898e1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -509,10 +509,34 @@ static int is_presub_candidate(
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
+ unsigned int is_constant[2] = {0, 0};
+
+ assert(inst->U.I.Opcode == RC_OPCODE_ADD);
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
return 0;
+ /* If both sources use a constant swizzle, then we can't convert it to
+ * a presubtract operation. In fact for the ADD and SUB presubtract
+ * operations neither source can contain a constant swizzle. This
+ * specific case is checked in peephole_add_presub_add() when
+ * we make sure the swizzles for both sources are equal, so we
+ * don't need to worry about it here. */
+ for (i = 0; i < 2; i++) {
+ int chan;
+ for (chan = 0; chan < 4; chan++) {
+ rc_swizzle swz =
+ get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
+ if (swz == RC_SWIZZLE_ONE
+ || swz == RC_SWIZZLE_ZERO
+ || swz == RC_SWIZZLE_HALF) {
+ is_constant[i] = 1;
+ }
+ }
+ }
+ if (is_constant[0] && is_constant[1])
+ return 0;
+
for(i = 0; i < info->NumSrcRegs; i++) {
struct rc_src_register src = inst->U.I.SrcReg[i];
if (src_reads_dst_mask(src, inst->U.I.DstReg))
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index 9e03eb1..2dae56a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -99,6 +99,7 @@ static void classify_instruction(struct rc_sub_instruction * inst,
switch(inst->Opcode) {
case RC_OPCODE_ADD:
case RC_OPCODE_CMP:
+ case RC_OPCODE_CND:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
case RC_OPCODE_FRC:
@@ -289,7 +290,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
if (needalpha) {
- pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
if (pair->Alpha.WriteMask) {
pair->Alpha.DestIndex = inst->DstReg.Index;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 6708b16..d1a435f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -71,7 +71,7 @@ struct rc_pair_instruction_arg {
struct rc_pair_sub_instruction {
unsigned int Opcode:8;
unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
- unsigned int WriteMask:3;
+ unsigned int WriteMask:4;
unsigned int Target:2;
unsigned int OutputWriteMask:3;
unsigned int DepthWriteMask:1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 1cf77d9..cef448e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -32,8 +32,8 @@
/* Series of transformations to be done on textures. */
-static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
- int tmu)
+static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
+ int tmu)
{
struct rc_src_register reg = { 0, };
@@ -46,6 +46,20 @@ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compil
reg.File = RC_FILE_NONE;
reg.Swizzle = RC_SWIZZLE_0000;
}
+
+ reg.Swizzle = combine_swizzles(reg.Swizzle,
+ compiler->state.unit[tmu].texture_swizzle);
+ return reg;
+}
+
+static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
+ int tmu)
+{
+ struct rc_src_register reg = { 0, };
+
+ reg.File = RC_FILE_NONE;
+ reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
+ compiler->state.unit[tmu].texture_swizzle);
return reg;
}
@@ -141,10 +155,9 @@ int radeonTransformTEX(
inst->U.I.Opcode = RC_OPCODE_MOV;
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
- inst->U.I.SrcReg[0].File = RC_FILE_NONE;
- inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
} else {
- inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
}
return 1;
@@ -157,9 +170,11 @@ int radeonTransformTEX(
/* Save the output register. */
struct rc_dst_register output_reg = inst->U.I.DstReg;
+ unsigned saturate_mode = inst->U.I.SaturateMode;
/* Redirect TEX to a new temp. */
tmp_texsample = rc_find_free_temporary(c);
+ inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
@@ -235,15 +250,15 @@ int radeonTransformTEX(
inst_cmp = rc_insert_new_instruction(c, inst_add);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.SaturateMode = saturate_mode;
inst_cmp->U.I.DstReg = output_reg;
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
inst_cmp->U.I.SrcReg[0].Swizzle =
combine_swizzles(RC_SWIZZLE_WWWW,
- compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle);
- inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
- inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
- inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
+ inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+ inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
assert(tmp_texsample != tmp_sum);
}
@@ -396,6 +411,7 @@ int radeonTransformTEX(
inst->U.I.SrcReg[0].Index = temp;
}
+ /* NPOT -> POT conversion for 3D textures. */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
struct rc_instruction *inst_mov;
@@ -425,6 +441,53 @@ int radeonTransformTEX(
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
}
+ /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
+ * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
+ */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
+ unsigned two, two_swizzle;
+ struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
+
+ two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
+
+ inst_mul = rc_insert_new_instruction(c, inst);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
+ inst_mul->U.I.SrcReg[1].Index = two;
+ inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
+
+ inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+ inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
+ inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
+ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
+
+ inst_cnd = rc_insert_new_instruction(c, inst_mad);
+ inst_cnd->U.I.Opcode = RC_OPCODE_CND;
+ inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
+ inst_cnd->U.I.DstReg = inst->U.I.DstReg;
+ inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
+ inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+ inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
+ inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+ inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+
+ inst->U.I.SaturateMode = 0;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
/* Cannot write texture to output registers or with saturate (all chips),
* or with masks (non-r500). */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&