summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <tstellar@gmail.com>2011-09-20 21:05:55 -0700
committerTom Stellard <thomas.stellard@amd.com>2012-04-13 22:24:16 -0400
commitb2df031a959f36743527b9abc89913ce4f895de3 (patch)
treeadf844d0e5fd6708f78c2c4422f367cb4c9df75d
parent4a269a8dc0170c75ff22af3910786228727ea41e (diff)
downloadexternal_mesa3d-b2df031a959f36743527b9abc89913ce4f895de3.zip
external_mesa3d-b2df031a959f36743527b9abc89913ce4f895de3.tar.gz
external_mesa3d-b2df031a959f36743527b9abc89913ce4f895de3.tar.bz2
r300/compiler: Fix nested flow control in r500 vertex shaders
-rw-r--r--src/gallium/drivers/r300/Makefile.sources1
-rw-r--r--src/gallium/drivers/r300/compiler/r3xx_vertprog.c217
-rw-r--r--src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c15
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_code.h6
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_compiler.h3
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_opcodes.c72
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_opcodes.h15
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program.h1
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_constants.h6
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_print.c6
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_vert_fc.c274
11 files changed, 438 insertions, 178 deletions
diff --git a/src/gallium/drivers/r300/Makefile.sources b/src/gallium/drivers/r300/Makefile.sources
index e27b14e..1e7d31b 100644
--- a/src/gallium/drivers/r300/Makefile.sources
+++ b/src/gallium/drivers/r300/Makefile.sources
@@ -46,6 +46,7 @@ C_SOURCES := \
compiler/radeon_optimize.c \
compiler/radeon_remove_constants.c \
compiler/radeon_rename_regs.c \
+ compiler/radeon_vert_fc.c \
compiler/radeon_variable.c \
compiler/r3xx_fragprog.c \
compiler/r300_fragprog.c \
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
index a8d8ebc..94733d7 100644
--- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
@@ -28,17 +28,13 @@
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
+#include "radeon_program.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
#include "radeon_remove_constants.h"
-struct loop {
- int BgnLoop;
-
-};
-
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -359,140 +355,13 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
-static void mark_write(void * userdata, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
-{
- unsigned int * writemasks = userdata;
-
- if (file != RC_FILE_TEMPORARY)
- return;
-
- if (index >= R300_VS_MAX_TEMPS)
- return;
-
- writemasks[index] |= mask;
-}
-
-static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
-{
- return PVS_SRC_OPERAND(compiler->PredicateIndex,
- t_swizzle(RC_SWIZZLE_ZERO),
- t_swizzle(RC_SWIZZLE_ZERO),
- t_swizzle(RC_SWIZZLE_ZERO),
- t_swizzle(RC_SWIZZLE_W),
- t_src_class(RC_FILE_TEMPORARY),
- 0);
-}
-
-static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
- unsigned int hw_opcode, int is_math)
-{
- return PVS_OP_DST_OPERAND(hw_opcode,
- is_math,
- 0,
- compiler->PredicateIndex,
- RC_MASK_W,
- t_dst_class(RC_FILE_TEMPORARY));
-
-}
-
-static void ei_if(struct r300_vertex_program_compiler * compiler,
- struct rc_instruction *rci,
- unsigned int * inst,
- unsigned int branch_depth)
-{
- unsigned int predicate_opcode;
- int is_math = 0;
-
- if (!compiler->Base.is_r500) {
- rc_error(&compiler->Base,"Opcode IF not supported\n");
- return;
- }
-
- /* Reserve a temporary to use as our predicate stack counter, if we
- * don't already have one. */
- if (!compiler->PredicateMask) {
- unsigned int writemasks[RC_REGISTER_MAX_INDEX];
- struct rc_instruction * inst;
- unsigned int i;
- memset(writemasks, 0, sizeof(writemasks));
- for(inst = compiler->Base.Program.Instructions.Next;
- inst != &compiler->Base.Program.Instructions;
- inst = inst->Next) {
- rc_for_all_writes_mask(inst, mark_write, writemasks);
- }
- for(i = 0; i < compiler->Base.max_temp_regs; i++) {
- unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
- /* Only the W component can be used fo the predicate
- * stack counter. */
- if (mask & RC_MASK_W) {
- compiler->PredicateMask = RC_MASK_W;
- compiler->PredicateIndex = i;
- break;
- }
- }
- if (i == compiler->Base.max_temp_regs) {
- rc_error(&compiler->Base, "No free temporary to use for"
- " predicate stack counter.\n");
- return;
- }
- }
- predicate_opcode =
- branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
-
- rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
- if (branch_depth == 0) {
- is_math = 1;
- predicate_opcode = ME_PRED_SET_NEQ;
- inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
- inst[2] = 0;
- } else {
- predicate_opcode = VE_PRED_SET_NEQ_PUSH;
- inst[1] = t_pred_src(compiler);
- inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
- }
-
- inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
- inst[3] = 0;
-
-}
-
-static void ei_else(struct r300_vertex_program_compiler * compiler,
- unsigned int * inst)
-{
- if (!compiler->Base.is_r500) {
- rc_error(&compiler->Base,"Opcode ELSE not supported\n");
- return;
- }
- inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
- inst[1] = t_pred_src(compiler);
- inst[2] = 0;
- inst[3] = 0;
-}
-
-static void ei_endif(struct r300_vertex_program_compiler *compiler,
- unsigned int * inst)
-{
- if (!compiler->Base.is_r500) {
- rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
- return;
- }
- inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
- inst[1] = t_pred_src(compiler);
- inst[2] = 0;
- inst[3] = 0;
-}
-
static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *rci;
- struct loop * loops = NULL;
- int current_loop_depth = 0;
- int loops_reserved = 0;
-
- unsigned int branch_depth = 0;
+ unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
+ unsigned loop_depth = 0;
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -532,12 +401,9 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
- case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
- case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
- case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -556,37 +422,27 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
case RC_OPCODE_BGNLOOP:
{
- struct loop * l;
-
if ((!compiler->Base.is_r500
- && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
- || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ && loop_depth >= R300_VS_MAX_LOOP_DEPTH)
+ || loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
rc_error(&compiler->Base,
"Loops are nested too deep.");
return;
}
- memory_pool_array_reserve(&compiler->Base.Pool,
- struct loop, loops, current_loop_depth,
- loops_reserved, 1);
- l = &loops[current_loop_depth++];
- memset(l , 0, sizeof(struct loop));
- l->BgnLoop = (compiler->code->length / 4);
- continue;
+ loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
+ break;
}
case RC_OPCODE_ENDLOOP:
{
- struct loop * l;
unsigned int act_addr;
unsigned int last_addr;
unsigned int ret_addr;
- assert(loops);
- l = &loops[current_loop_depth - 1];
- act_addr = l->BgnLoop - 1;
+ ret_addr = loops[--loop_depth];
+ act_addr = ret_addr - 1;
last_addr = (compiler->code->length / 4) - 1;
- ret_addr = l->BgnLoop;
- if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ if (loop_depth >= R300_VS_MAX_FC_OPS) {
rc_error(&compiler->Base,
"Too many flow control instructions.");
return;
@@ -595,7 +451,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].lw =
R500_PVS_FC_ACT_ADRS(act_addr)
- | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
;
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].uw =
@@ -618,26 +474,51 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
compiler->code->num_fc_ops);
compiler->code->num_fc_ops++;
- current_loop_depth--;
- continue;
+
+ break;
}
+ case RC_ME_PRED_SET_CLR:
+ ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
+ break;
+
+ case RC_ME_PRED_SET_INV:
+ ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
+ break;
+
+ case RC_ME_PRED_SET_POP:
+ ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
+ break;
+
+ case RC_ME_PRED_SET_RESTORE:
+ ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
+ break;
+
+ case RC_ME_PRED_SEQ:
+ ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
+ break;
+
+ case RC_ME_PRED_SNEQ:
+ ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
+ break;
+
+ case RC_VE_PRED_SNEQ_PUSH:
+ ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
+ vpi, inst);
+ break;
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
return;
}
- /* Non-flow control instructions that are inside an if statement
- * need to pay attention to the predicate bit. */
- if (branch_depth
- && vpi->Opcode != RC_OPCODE_IF
- && vpi->Opcode != RC_OPCODE_ELSE
- && vpi->Opcode != RC_OPCODE_ENDIF) {
-
+ if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
inst[0] |= (PVS_DST_PRED_ENABLE_MASK
<< PVS_DST_PRED_ENABLE_SHIFT);
- inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ if (vpi->DstReg.Pred == RC_PRED_SET) {
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
<< PVS_DST_PRED_SENSE_SHIFT);
+ }
}
/* Update the number of temporaries. */
@@ -650,10 +531,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
- if (compiler->PredicateMask)
- if (compiler->PredicateIndex >= compiler->code->num_temporaries)
- compiler->code->num_temporaries = compiler->PredicateIndex + 1;
-
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
rc_error(&compiler->Base, "Too many temporaries.\n");
return;
@@ -1018,7 +895,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
struct radeon_compiler_pass vs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
{"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL},
- {"transform loops", 1, 1, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL},
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
@@ -1030,6 +906,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
index 2bc0a87..a41559c8 100644
--- a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
@@ -190,16 +190,25 @@ void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
for(i = 0; i < vs->num_fc_ops; i++) {
+ unsigned is_loop = 0;
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
case 0: fprintf(stderr, "NOP"); break;
case 1: fprintf(stderr, "JUMP"); break;
- case 2: fprintf(stderr, "LOOP"); break;
+ case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
case 3: fprintf(stderr, "JSR"); break;
}
if (c->Base.is_r500) {
- fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
+ "loop data->0x%08x\n",
vs->fc_op_addrs.r500[i].uw,
- vs->fc_op_addrs.r500[i].lw);
+ vs->fc_op_addrs.r500[i].lw,
+ vs->fc_loop_index[i]);
+ if (is_loop) {
+ fprintf(stderr, "Before = %u First = %u Last = %u\n",
+ vs->fc_op_addrs.r500[i].lw & 0xffff,
+ (vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
+ vs->fc_op_addrs.r500[i].uw & 0xffff);
+ }
} else {
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h
index 4280d66..44d5500 100644
--- a/src/gallium/drivers/r300/compiler/radeon_code.h
+++ b/src/gallium/drivers/r300/compiler/radeon_code.h
@@ -40,6 +40,9 @@
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_PVS_MAX_LOOP_DEPTH 8
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
@@ -262,9 +265,6 @@ struct rX00_fragment_program_code {
#define R300_VS_MAX_TEMPS 32
/* This is the max for all chipsets (r300-r500) */
#define R300_VS_MAX_FC_OPS 16
-/* The r500 maximum depth is not just for loops, but any combination of loops
- * and subroutine jumps. */
-#define R500_VS_MAX_FC_DEPTH 8
#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
index e7ccbb7..d42cee9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_compiler.h
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h
@@ -137,11 +137,10 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
- int PredicateIndex;
- unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void rc_vert_fc(struct radeon_compiler *compiler, void *user);
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
struct radeon_compiler_pass {
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index 3b49ad7..9bcb3c9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -437,6 +437,78 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_KILP,
.Name = "KILP",
+ },
+ {
+ .Opcode = RC_ME_PRED_SEQ,
+ .Name = "ME_PRED_SEQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SGT,
+ .Name = "ME_PRED_SGT",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SGE,
+ .Name = "ME_PRED_SGE",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SNEQ,
+ .Name = "ME_PRED_SNEQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SET_CLR,
+ .Name = "ME_PRED_SET_CLEAR",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SET_INV,
+ .Name = "ME_PRED_SET_INV",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SET_POP,
+ .Name = "ME_PRED_SET_POP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_ME_PRED_SET_RESTORE,
+ .Name = "ME_PRED_SET_RESTORE",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_VE_PRED_SEQ_PUSH,
+ .Name = "VE_PRED_SEQ_PUSH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_VE_PRED_SGT_PUSH,
+ .Name = "VE_PRED_SGT_PUSH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_VE_PRED_SGE_PUSH,
+ .Name = "VE_PRED_SGE_PUSH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_VE_PRED_SNEQ_PUSH,
+ .Name = "VE_PRED_SNEQ_PUSH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
}
};
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index 0b881c2..9c4b456 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -217,6 +217,21 @@ typedef enum {
/** Stop execution of the shader (GLSL discard) */
RC_OPCODE_KILP,
+ /* Vertex shader CF Instructions */
+ RC_ME_PRED_SEQ,
+ RC_ME_PRED_SGT,
+ RC_ME_PRED_SGE,
+ RC_ME_PRED_SNEQ,
+ RC_ME_PRED_SET_CLR,
+ RC_ME_PRED_SET_INV,
+ RC_ME_PRED_SET_POP,
+ RC_ME_PRED_SET_RESTORE,
+
+ RC_VE_PRED_SEQ_PUSH,
+ RC_VE_PRED_SGT_PUSH,
+ RC_VE_PRED_SGE_PUSH,
+ RC_VE_PRED_SNEQ_PUSH,
+
MAX_RC_OPCODE
} rc_opcode;
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
index e68be93..67be1b9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program.h
@@ -58,6 +58,7 @@ struct rc_dst_register {
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
+ unsigned int Pred:2;
};
struct rc_presub_instruction {
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
index c07c492..4dbf649 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_constants.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
@@ -203,4 +203,10 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
#define RC_SOURCE_RGB 0x1
#define RC_SOURCE_ALPHA 0x2
+typedef enum {
+ RC_PRED_DISABLED,
+ RC_PRED_SET,
+ RC_PRED_INV
+} rc_predicate_mode;
+
#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
index e3d2104..29a349e 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_print.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -329,6 +329,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
fprintf(f, ")]");
}
+ if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
+ fprintf(f, " PRED_SET");
+ } else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
+ fprintf(f, " PRED_INV");
+ }
+
fprintf(f, "\n");
}
diff --git a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
new file mode 100644
index 0000000..3568b23
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
@@ -0,0 +1,274 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+
+struct vert_fc_state {
+ struct radeon_compiler *C;
+ unsigned BranchDepth;
+ unsigned LoopDepth;
+ unsigned LoopsReserved;
+ int PredStack[R500_PVS_MAX_LOOP_DEPTH];
+ int PredicateReg;
+ unsigned InCFBreak;
+};
+
+static void build_pred_src(
+ struct rc_src_register * src,
+ struct vert_fc_state * fc_state)
+{
+ src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
+ src->File = RC_FILE_TEMPORARY;
+ src->Index = fc_state->PredicateReg;
+}
+
+static void build_pred_dst(
+ struct rc_dst_register * dst,
+ struct vert_fc_state * fc_state)
+{
+ dst->WriteMask = RC_MASK_W;
+ dst->File = RC_FILE_TEMPORARY;
+ dst->Index = fc_state->PredicateReg;
+}
+
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static int reserve_predicate_reg(struct vert_fc_state * fc_state)
+{
+ int i;
+ unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+ struct rc_instruction * inst;
+ memset(writemasks, 0, sizeof(writemasks));
+ for(inst = fc_state->C->Program.Instructions.Next;
+ inst != &fc_state->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+
+ for(i = 0; i < fc_state->C->max_temp_regs; i++) {
+ /* Most of the control flow instructions only write the
+ * W component of the Predicate Register, but
+ * the docs say that ME_PRED_SET_CLR and
+ * ME_PRED_SET_RESTORE write all components of the
+ * register, so we must reserve a register that has
+ * all its components free. */
+ if (!writemasks[i]) {
+ fc_state->PredicateReg = i;
+ break;
+ }
+ }
+ if (i == fc_state->C->max_temp_regs) {
+ rc_error(fc_state->C, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return -1;
+ }
+ return 1;
+}
+
+static void lower_bgnloop(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ struct rc_instruction * new_inst =
+ rc_insert_new_instruction(fc_state->C, inst->Prev);
+
+ if ((!fc_state->C->is_r500
+ && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
+ || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
+ rc_error(fc_state->C, "Loops are nested too deep.");
+ return;
+ }
+
+ if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
+ if (fc_state->PredicateReg == -1) {
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+ }
+
+ /* Initialize the predicate bit to true. */
+ new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ new_inst->U.I.SrcReg[0].Index = 0;
+ new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ } else {
+ fc_state->PredStack[fc_state->LoopDepth] =
+ fc_state->PredicateReg;
+ /* Copy the the current predicate value to this loop's
+ * predicate register */
+
+ /* Use the old predicate value for src0 */
+ build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+
+ /* Reserve this loop's predicate register */
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+
+ /* Copy the old predicate value to the new register */
+ new_inst->U.I.Opcode = RC_OPCODE_ADD;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ new_inst->U.I.SrcReg[1].Index = 0;
+ new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
+ new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
+ }
+
+}
+
+static void lower_brk(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ if (fc_state->LoopDepth == 1) {
+ inst->U.I.Opcode = RC_OPCODE_RCP;
+ inst->U.I.DstReg.Pred = RC_PRED_INV;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ }
+
+ build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+static void lower_endloop(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ struct rc_instruction * new_inst =
+ rc_insert_new_instruction(fc_state->C, inst);
+
+ new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ /* Restore the previous predicate register. */
+ fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
+ build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+}
+
+static void lower_if(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (fc_state->PredicateReg == -1) {
+ /* If we are inside a loop, the Predicate Register should
+ * have already been defined. */
+ assert(fc_state->LoopDepth == 0);
+
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+ }
+
+ if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
+ fc_state->InCFBreak = 1;
+ }
+ if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
+ || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
+ if (fc_state->InCFBreak) {
+ inst->U.I.Opcode = RC_ME_PRED_SEQ;
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SNEQ;
+ }
+ } else {
+ unsigned swz;
+ inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
+ memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
+ sizeof(inst->U.I.SrcReg[1]));
+ swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
+ /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
+ * w component */
+ inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
+ build_pred_src(&inst->U.I.SrcReg[0], fc_state);
+ }
+ build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+void rc_vert_fc(struct radeon_compiler *c, void *user)
+{
+ struct rc_instruction * inst;
+ struct vert_fc_state fc_state;
+
+ memset(&fc_state, 0, sizeof(fc_state));
+ fc_state.PredicateReg = -1;
+ fc_state.C = c;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+
+ switch (inst->U.I.Opcode) {
+
+ case RC_OPCODE_BGNLOOP:
+ lower_bgnloop(inst, &fc_state);
+ fc_state.LoopDepth++;
+ break;
+
+ case RC_OPCODE_BRK:
+ lower_brk(inst, &fc_state);
+ break;
+
+ case RC_OPCODE_ENDLOOP:
+ if (fc_state.BranchDepth != 0
+ || fc_state.LoopDepth != 1) {
+ lower_endloop(inst, &fc_state);
+ }
+ fc_state.LoopDepth--;
+ /* Skip PRED_RESTORE */
+ inst = inst->Next;
+ break;
+ case RC_OPCODE_IF:
+ lower_if(inst, &fc_state);
+ fc_state.BranchDepth++;
+ break;
+
+ case RC_OPCODE_ELSE:
+ inst->U.I.Opcode = RC_ME_PRED_SET_INV;
+ build_pred_dst(&inst->U.I.DstReg, &fc_state);
+ build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+ break;
+
+ case RC_OPCODE_ENDIF:
+ if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
+ struct rc_instruction * to_delete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(to_delete);
+ /* XXX: Delete the endif instruction */
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SET_POP;
+ build_pred_dst(&inst->U.I.DstReg, &fc_state);
+ build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+ }
+ fc_state.InCFBreak = 0;
+ fc_state.BranchDepth--;
+ break;
+
+ default:
+ if (fc_state.BranchDepth || fc_state.LoopDepth) {
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ }
+ break;
+ }
+ }
+}