diff options
author | Eric Anholt <eric@anholt.net> | 2012-11-27 14:10:52 -0800 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2012-12-05 14:29:44 -0800 |
commit | 71f06344a0d72a6bd27750ceca571fc016b8de85 (patch) | |
tree | 4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_program.c | |
parent | ef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff) | |
download | external_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.zip external_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.tar.gz external_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.tar.bz2 |
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine
per-instruction timings, or figuring out which shader to optimize in a
whole application.
Note that this doesn't cover the instructions that set up the message to
the URB/FB write -- we'd need to convert the MRF usage in these
instructions to GRFs so that our offsets/times don't overwrite our
shader outputs.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
v2: Check the timestamp reset flag in the VS, which is apparently
getting set fairly regularly in the range we watch, resulting in
negative numbers getting added to our 32-bit counter, and thus large
values added to our uint64_t.
v3: Rebase on reladdr changes, removing a new safety check that proved
impossible to satisfy. Add a comment to the AOP defs from Ken's
review, and put them in a slightly more sensible spot.
v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_program.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6bf5a6a..1859041 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -189,3 +189,130 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->LinkShader = brw_link_shader; } +void +brw_init_shader_time(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + const int max_entries = 4096; + brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time", + max_entries * 4, 4096); + brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *, + max_entries); + brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, + max_entries); + brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, + max_entries); + brw->shader_time.max_entries = max_entries; +} + +static int +compare_time(const void *a, const void *b) +{ + uint64_t * const *a_val = a; + uint64_t * const *b_val = b; + + /* We don't just subtract because we're turning the value to an int. */ + if (**a_val < **b_val) + return -1; + else if (**a_val == **b_val) + return 0; + else + return 1; +} + +static void +brw_report_shader_time(struct brw_context *brw) +{ + if (!brw->shader_time.bo || !brw->shader_time.num_entries) + return; + + uint64_t *sorted[brw->shader_time.num_entries]; + double total = 0; + for (int i = 0; i < brw->shader_time.num_entries; i++) { + sorted[i] = &brw->shader_time.cumulative[i]; + total += brw->shader_time.cumulative[i]; + } + + if (total == 0) { + printf("No shader time collected yet\n"); + return; + } + + qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time); + + printf("\n"); + printf("type ID cycles spent %% of total\n"); + for (int s = 0; s < brw->shader_time.num_entries; s++) { + /* Work back from the sorted pointers times to a time to print. */ + int i = sorted[s] - brw->shader_time.cumulative; + + int shader_num = -1; + if (brw->shader_time.programs[i]) { + shader_num = brw->shader_time.programs[i]->Name; + } + + switch (brw->shader_time.types[i]) { + case ST_VS: + printf("vs %4d: ", shader_num); + break; + case ST_FS8: + printf("fs8 %4d: ", shader_num); + break; + case ST_FS16: + printf("fs16 %4d: ", shader_num); + break; + default: + printf("other: "); + break; + } + + printf("%16lld (%7.2f Gcycles) %4.1f%%\n", + (long long)brw->shader_time.cumulative[i], + (double)brw->shader_time.cumulative[i] / 1000000000.0, + (double)brw->shader_time.cumulative[i] / total * 100.0); + } +} + +static void +brw_collect_shader_time(struct brw_context *brw) +{ + if (!brw->shader_time.bo) + return; + + /* This probably stalls on the last rendering. We could fix that by + * delaying reading the reports, but it doesn't look like it's a big + * overhead compared to the cost of tracking the time in the first place. + */ + drm_intel_bo_map(brw->shader_time.bo, true); + + uint32_t *times = brw->shader_time.bo->virtual; + + for (int i = 0; i < brw->shader_time.num_entries; i++) { + brw->shader_time.cumulative[i] += times[i]; + } + + /* Zero the BO out to clear it out for our next collection. + */ + memset(times, 0, brw->shader_time.bo->size); + drm_intel_bo_unmap(brw->shader_time.bo); +} + +void +brw_collect_and_report_shader_time(struct brw_context *brw) +{ + brw_collect_shader_time(brw); + + if (brw->shader_time.report_time == 0 || + get_time() - brw->shader_time.report_time >= 1.0) { + brw_report_shader_time(brw); + brw->shader_time.report_time = get_time(); + } +} + +void +brw_destroy_shader_time(struct brw_context *brw) +{ + drm_intel_bo_unreference(brw->shader_time.bo); + brw->shader_time.bo = NULL; +} |