summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/compiler
diff options
context:
space:
mode:
authorTom Stellard <tstellar@gmail.com>2011-09-05 06:57:36 -0700
committerTom Stellard <tstellar@gmail.com>2011-09-10 06:36:53 -0700
commit3d32e589879806297258e36ea80aae5044293ca3 (patch)
tree5c4fd44acd9583947515e5afc74fd752c9c92bcb /src/gallium/drivers/r300/compiler
parent2a5cbc5306686a5ad210317843bd0dc7950b6ce9 (diff)
downloadexternal_mesa3d-3d32e589879806297258e36ea80aae5044293ca3.zip
external_mesa3d-3d32e589879806297258e36ea80aae5044293ca3.tar.gz
external_mesa3d-3d32e589879806297258e36ea80aae5044293ca3.tar.bz2
r300/compiler: Implement ROUND
According to the GLSL spec, the implementor can decide which way to round when the fraction is .5. The r300 compiler will round down.
Diffstat (limited to 'src/gallium/drivers/r300/compiler')
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_opcodes.c7
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_alu.c45
3 files changed, 55 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index afd78ad..527db9a 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsStandardScalar = 1
},
{
+ .Opcode = RC_OPCODE_ROUND,
+ .Name = "ROUND",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index b586882..0b881c2 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -133,6 +133,9 @@ typedef enum {
/** scalar instruction: dst = 1 / src0.x */
RC_OPCODE_RCP,
+ /** vec4 instruction: dst.c = floor(src0.c + 0.5) */
+ RC_OPCODE_ROUND,
+
/** scalar instruction: dst = 1 / sqrt(src0.x) */
RC_OPCODE_RSQ,
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index e273bc4..dd1dfb3 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = {
.Index = 0,
.Swizzle = RC_SWIZZLE_1111
};
+
+static const struct rc_src_register builtin_half = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_HHHH
+};
+
static const struct rc_src_register srcreg_undefined = {
.File = RC_FILE_NONE,
.Index = 0,
@@ -416,6 +423,43 @@ static void transform_POW(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+/* dst = ROUND(src) :
+ * add = src + .5
+ * frac = FRC(add)
+ * dst = add - frac
+ *
+ * According to the GLSL spec, the implementor can decide which way to round
+ * when the fraction is .5. We round down for .5.
+ *
+ */
+static void transform_ROUND(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int mask = inst->U.I.DstReg.WriteMask;
+ unsigned int frac_index, add_index;
+ struct rc_dst_register frac_dst, add_dst;
+ struct rc_src_register frac_src, add_src;
+
+ /* add = src + .5 */
+ add_index = rc_find_free_temporary(c);
+ add_dst = dstregtmpmask(add_index, mask);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
+ builtin_half);
+ add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
+
+
+ /* frac = FRC(add) */
+ frac_index = rc_find_free_temporary(c);
+ frac_dst = dstregtmpmask(frac_index, mask);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
+ frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
+
+ /* dst = add - frac */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
+ add_src, negate(frac_src));
+ rc_remove_instruction(inst);
+}
+
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -599,6 +643,7 @@ int radeonTransformALU(
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;