diff options
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_pair_schedule.c')
-rw-r--r-- | src/gallium/drivers/r300/compiler/radeon_pair_schedule.c | 328 |
1 files changed, 286 insertions, 42 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c index ee868cc..e03afb4 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -32,7 +32,10 @@ #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "util/u_debug.h" #define VERBOSE 0 @@ -65,6 +68,17 @@ struct schedule_instruction { * PairedInst references the alpha insturction's dependency information. */ struct schedule_instruction * PairedInst; + + /** This scheduler uses the value of Score to determine which + * instruction to schedule. Instructions with a higher value of Score + * will be scheduled first. */ + int Score; + + /** The number of components that read from a TEX instruction. */ + unsigned TexReadCount; + + /** For TEX instructions a list of readers */ + struct rc_list * TexReaders; }; @@ -131,6 +145,12 @@ struct schedule_state { struct schedule_instruction *ReadyAlpha; struct schedule_instruction *ReadyTEX; /*@}*/ + struct rc_list *PendingTEX; + + void (*CalcScore)(struct schedule_instruction *); + long max_tex_group; + unsigned PrevBlockHasTex:1; + unsigned TEXCount; }; static struct reg_value ** get_reg_valuep(struct schedule_state * s, @@ -147,6 +167,29 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } +static unsigned get_tex_read_count(struct schedule_instruction * sinst) +{ + unsigned tex_read_count = sinst->TexReadCount; + if (sinst->PairedInst) { + tex_read_count += sinst->PairedInst->TexReadCount; + } + return tex_read_count; +} + +#if VERBOSE +static void print_list(struct schedule_instruction * sinst) +{ + struct schedule_instruction * ptr; + for (ptr = sinst; ptr; ptr=ptr->NextReady) { + unsigned tex_read_count = get_tex_read_count(ptr); + unsigned score = sinst->Score; + fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, + tex_read_count); + } + fprintf(stderr, "\n"); +} +#endif + static void remove_inst_from_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { @@ -172,17 +215,28 @@ static void add_inst_to_list(struct schedule_instruction ** list, struct schedul *list = inst; } -static void add_inst_to_list_end(struct schedule_instruction ** list, +static void add_inst_to_list_score(struct schedule_instruction ** list, struct schedule_instruction * inst) { - if(!*list){ + struct schedule_instruction * temp; + struct schedule_instruction * prev; + if (!*list) { *list = inst; - }else{ - struct schedule_instruction * temp = *list; - while(temp->NextReady){ - temp = temp->NextReady; - } - temp->NextReady = inst; + return; + } + temp = *list; + prev = NULL; + while(temp && inst->Score <= temp->Score) { + prev = temp; + temp = temp->NextReady; + } + + if (!prev) { + inst->NextReady = temp; + *list = inst; + } else { + prev->NextReady = inst; + inst->NextReady = temp; } } @@ -193,13 +247,13 @@ static void instruction_ready(struct schedule_state * s, struct schedule_instruc /* Adding Ready TEX instructions to the end of the "Ready List" helps * us emit TEX instructions in blocks without losing our place. */ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) - add_inst_to_list_end(&s->ReadyTEX, sinst); + add_inst_to_list_score(&s->ReadyTEX, sinst); else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyRGB, sinst); + add_inst_to_list_score(&s->ReadyRGB, sinst); else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyAlpha, sinst); + add_inst_to_list_score(&s->ReadyAlpha, sinst); else - add_inst_to_list(&s->ReadyFullALU, sinst); + add_inst_to_list_score(&s->ReadyFullALU, sinst); } static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) @@ -210,6 +264,68 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins instruction_ready(s, sinst); } +/* These functions provide different heuristics for scheduling instructions. + * The default is calc_score_readers. */ + +#if 0 + +static void calc_score_zero(struct schedule_instruction * sinst) +{ + sinst->Score = 0; +} + +static void calc_score_deps(struct schedule_instruction * sinst) +{ + int i; + sinst->Score = 0; + for (i = 0; i < sinst->NumWriteValues; i++) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + struct reg_value_reader * r; + for (r = v->Readers; r; r = r->Next) { + if (r->Reader->NumDependencies == 1) { + sinst->Score += 100; + } + sinst->Score += r->Reader->NumDependencies; + } + } + } +} + +#endif + +#define NO_READ_TEX_SCORE (1 << 16) +#define NO_OUTPUT_SCORE (1 << 24) + +static void calc_score_readers(struct schedule_instruction * sinst) +{ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + } else { + sinst->Score = sinst->NumReadValues; + if (sinst->PairedInst) { + sinst->Score += sinst->PairedInst->NumReadValues; + } + if (get_tex_read_count(sinst) == 0) { + sinst->Score |= NO_READ_TEX_SCORE; + } + if (!sinst->Instruction->U.P.RGB.OutputWriteMask && + !sinst->Instruction->U.P.Alpha.OutputWriteMask) { + if (sinst->PairedInst) { + if (!sinst->PairedInst->Instruction->U.P. + RGB.OutputWriteMask + && !sinst->PairedInst->Instruction->U.P. + Alpha.OutputWriteMask) { + sinst->Score |= NO_OUTPUT_SCORE; + } + + } else { + sinst->Score |= NO_OUTPUT_SCORE; + } + } + } +} + /** * This function decreases the dependencies of the next instruction that * wants to write to each of sinst's read values. @@ -222,8 +338,9 @@ static void commit_update_reads(struct schedule_state * s, assert(v->NumReaders > 0); v->NumReaders--; if (!v->NumReaders) { - if (v->Next) + if (v->Next) { decrease_dependencies(s, v->Next->Writer); + } } } if (sinst->PairedInst) { @@ -256,13 +373,33 @@ static void commit_update_writes(struct schedule_state * s, } } +static void notify_sem_wait(struct schedule_state *s) +{ + struct rc_list * pend_ptr; + for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { + struct rc_list * read_ptr; + struct schedule_instruction * pending = pend_ptr->Item; + for (read_ptr = pending->TexReaders; read_ptr; + read_ptr = read_ptr->Next) { + struct schedule_instruction * reader = read_ptr->Item; + reader->TexReadCount--; + } + } + s->PendingTEX = NULL; +} + static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) { - DBG("%i: commit\n", sinst->Instruction->IP); + DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); commit_update_reads(s, sinst); commit_update_writes(s, sinst); + + if (get_tex_read_count(sinst) > 0) { + sinst->Instruction->U.P.SemWait = 1; + notify_sem_wait(s); + } } /** @@ -277,6 +414,7 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo struct rc_instruction * inst_begin; assert(s->ReadyTEX); + notify_sem_wait(s); /* Node marker for R300 */ inst_begin = rc_insert_new_instruction(s->C, before->Prev); @@ -308,6 +446,12 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo while(readytex){ DBG("%i: commit TEX writes\n", readytex->Instruction->IP); commit_update_writes(s, readytex); + /* Set semaphore bits for last TEX instruction in the block */ + if (!readytex->NextReady) { + readytex->Instruction->U.I.TexSemAcquire = 1; + readytex->Instruction->U.I.TexSemWait = 1; + } + rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); readytex = readytex->NextReady; } } @@ -491,6 +635,9 @@ static int destructive_merge_instructions( rgb->ALUResultCompare = alpha->ALUResultCompare; } + /* Copy SemWait */ + rgb->SemWait |= alpha->SemWait; + return 1; } @@ -824,14 +971,14 @@ static void pair_instructions(struct schedule_state * s) && convert_rgb_to_alpha(s, rgb_ptr)) { struct schedule_instruction * pair_ptr; + remove_inst_from_list(&s->ReadyRGB, rgb_ptr); + add_inst_to_list_score(&s->ReadyAlpha, rgb_ptr); + for (pair_ptr = s->ReadyRGB; pair_ptr; pair_ptr = pair_ptr->NextReady) { - if (pair_ptr == rgb_ptr) { - continue; - } if (merge_instructions(&pair_ptr->Instruction->U.P, &rgb_ptr->Instruction->U.P)) { - remove_inst_from_list(&s->ReadyRGB, rgb_ptr); + remove_inst_from_list(&s->ReadyAlpha, rgb_ptr); remove_inst_from_list(&s->ReadyRGB, pair_ptr); pair_ptr->PairedInst = rgb_ptr; @@ -849,6 +996,68 @@ static void pair_instructions(struct schedule_state * s) } } +static void update_max_score( + struct schedule_state * s, + struct schedule_instruction ** list, + int * max_score, + struct schedule_instruction ** max_inst_out, + struct schedule_instruction *** list_out) +{ + struct schedule_instruction * list_ptr; + for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { + int score; + s->CalcScore(list_ptr); + score = list_ptr->Score; + if (!*max_inst_out || score > *max_score) { + *max_score = score; + *max_inst_out = list_ptr; + *list_out = list; + } + } +} + +static void emit_instruction( + struct schedule_state * s, + struct rc_instruction * before) +{ + int max_score = -1; + struct schedule_instruction * max_inst = NULL; + struct schedule_instruction ** max_list = NULL; + unsigned tex_count = 0; + struct schedule_instruction * tex_ptr; + + pair_instructions(s); +#if VERBOSE + fprintf(stderr, "Full:\n"); + print_list(s->ReadyFullALU); + fprintf(stderr, "RGB:\n"); + print_list(s->ReadyRGB); + fprintf(stderr, "Alpha:\n"); + print_list(s->ReadyAlpha); + fprintf(stderr, "TEX:\n"); + print_list(s->ReadyTEX); +#endif + + for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { + tex_count++; + } + update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); + update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); + + if (tex_count >= s->max_tex_group || max_score == -1 + || (s->TEXCount > 0 && tex_count == s->TEXCount)) { + emit_all_tex(s, before); + } else { + + + remove_inst_from_list(max_list, max_inst); + rc_insert_instruction(before->Prev, max_inst->Instruction); + commit_alu_instruction(s, max_inst); + + presub_nop(before->Prev); + } +} /** * Find a good ALU instruction or pair of ALU instruction and emit it. @@ -860,6 +1069,7 @@ static void pair_instructions(struct schedule_state * s) static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) { struct schedule_instruction * sinst; + int rgb_score = -1, alpha_score = -1; /* Try to merge RGB and Alpha instructions together. */ pair_instructions(s); @@ -871,6 +1081,12 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor commit_alu_instruction(s, sinst); } else { if (s->ReadyRGB) { + rgb_score = s->ReadyRGB->Score; + } + if (s->ReadyAlpha) { + alpha_score = s->ReadyAlpha->Score; + } + if (rgb_score > alpha_score) { sinst = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; } else if (s->ReadyAlpha) { @@ -924,6 +1140,12 @@ static void scan_read(void * data, struct rc_instruction * inst, /* Only update the current instruction's dependencies if the * register it reads from has been written to in this block. */ if ((*v)->Writer) { + if ((*v)->Writer->Instruction->Type == + RC_INSTRUCTION_NORMAL) { + s->Current->TexReadCount++; + rc_list_add(&((*v)->Writer->TexReaders), + rc_list(&s->C->Pool, s->Current)); + } s->Current->NumDependencies++; } } @@ -977,22 +1199,33 @@ static void is_rgb_to_alpha_possible_normal( } -static void schedule_block(struct r300_fragment_program_compiler * c, +static void schedule_block(struct schedule_state * s, struct rc_instruction * begin, struct rc_instruction * end) { - struct schedule_state s; unsigned int ip; - memset(&s, 0, sizeof(s)); - s.C = &c->Base; - /* Scan instructions for data dependencies */ ip = 0; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); - memset(s.Current, 0, sizeof(struct schedule_instruction)); + s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); + memset(s->Current, 0, sizeof(struct schedule_instruction)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->HasTexture) { + s->TEXCount++; + } + } - s.Current->Instruction = inst; + /* XXX: This causes SemWait to be set for all instructions in + * a block if the previous block contained a TEX instruction. + * We can do better here, but it will take a lot of work. */ + if (s->PrevBlockHasTex) { + s->Current->TexReadCount = 1; + } + + s->Current->Instruction = inst; inst->IP = ip++; DBG("%i: Scanning\n", inst->IP); @@ -1001,17 +1234,18 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes_chan(inst, &scan_write, &s); - rc_for_all_reads_chan(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, s); + rc_for_all_reads_chan(inst, &scan_read, s); - DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); - if (!s.Current->NumDependencies) - instruction_ready(&s, s.Current); + if (!s->Current->NumDependencies) { + instruction_ready(s, s->Current); + } /* Get global readers for possible RGB->Alpha conversion. */ - s.Current->GlobalReaders.ExitOnAbort = 1; - rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + s->Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal, is_rgb_to_alpha_possible, NULL); } @@ -1021,13 +1255,17 @@ static void schedule_block(struct r300_fragment_program_compiler * c, end->Prev = begin->Prev; /* Schedule instructions back */ - while(!s.C->Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s, end); + while(!s->C->Error && + (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { + if (s->C->is_r500) { + emit_instruction(s, end); + } else { + if (s->ReadyTEX) + emit_all_tex(s, end); - while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) - emit_one_alu(&s, end); + while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha)) + emit_one_alu(s, end); + } } } @@ -1042,13 +1280,14 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { - struct schedule_state s; - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct schedule_state s; struct rc_instruction * inst = c->Base.Program.Instructions.Next; memset(&s, 0, sizeof(s)); s.C = &c->Base; + s.CalcScore = calc_score_readers; + s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; @@ -1063,6 +1302,11 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user) inst = inst->Next; DBG("Schedule one block\n"); - schedule_block(c, first, inst); + memset(s.Temporary, 0, sizeof(s.Temporary)); + s.TEXCount = 0; + schedule_block(&s, first, inst); + if (s.PendingTEX) { + s.PrevBlockHasTex = 1; + } } } |