diff options
author | Matt Turner <mattst88@gmail.com> | 2015-07-08 19:00:48 -0700 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2015-07-15 13:09:22 -0700 |
commit | f11c6f09cf36909ff399353b20195a31cf0f1907 (patch) | |
tree | 7cb18d467c3646f37add23a0e63cb8940bb14a57 /src/mesa/drivers/dri/i965/intel_batchbuffer.c | |
parent | 131573df7aea0b10e97d9d5db0d26d89f8dfef54 (diff) | |
download | external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.zip external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.gz external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.bz2 |
i965: Optimize batchbuffer macros.
Previously OUT_BATCH was just a macro around an inline function which
does
brw->batch.map[brw->batch.used++] = dword;
When making consecutive calls to intel_batchbuffer_emit_dword() the
compiler isn't able to recognize that we're writing consecutive memory
locations or that it doesn't need to write batch.used back to memory
each time.
We can avoid both of these problems by making a local pointer to the
next location in the batch in BEGIN_BATCH().
Cuts 18k from the .text size.
text data bss dec hex filename
4946956 195152 26192 5168300 4edcac i965_dri.so before
4928956 195152 26192 5150300 4e965c i965_dri.so after
This series (including commit c0433948) improves performance of Synmark
OglBatch7 by 8.01389% +/- 0.63922% (n=83) on Ivybridge.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/mesa/drivers/dri/i965/intel_batchbuffer.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 628a7b7..088ffd2 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -48,6 +48,7 @@ intel_batchbuffer_init(struct brw_context *brw) if (!brw->has_llc) { brw->batch.cpu_map = malloc(BATCH_SZ); brw->batch.map = brw->batch.cpu_map; + brw->batch.map_next = brw->batch.cpu_map; } } @@ -68,10 +69,10 @@ intel_batchbuffer_reset(struct brw_context *brw) drm_intel_bo_map(brw->batch.bo, true); brw->batch.map = brw->batch.bo->virtual; } + brw->batch.map_next = brw->batch.map; brw->batch.reserved_space = BATCH_RESERVED; brw->batch.state_batch_offset = brw->batch.bo->size; - brw->batch.used = 0; brw->batch.needs_sol_reset = false; /* We don't know what ring the new batch will be sent to until we see the @@ -83,7 +84,7 @@ intel_batchbuffer_reset(struct brw_context *brw) void intel_batchbuffer_save_state(struct brw_context *brw) { - brw->batch.saved.used = brw->batch.used; + brw->batch.saved.map_next = brw->batch.map_next; brw->batch.saved.reloc_count = drm_intel_gem_bo_get_reloc_count(brw->batch.bo); } @@ -93,7 +94,7 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) { drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); - brw->batch.used = brw->batch.saved.used; + brw->batch.map_next = brw->batch.saved.map_next; if (USED_BATCH(brw->batch) == 0) brw->batch.ring = UNKNOWN_RING; } @@ -395,13 +396,13 @@ _intel_batchbuffer_flush(struct brw_context *brw, */ uint32_t intel_batchbuffer_reloc(struct brw_context *brw, - drm_intel_bo *buffer, + drm_intel_bo *buffer, uint32_t offset, uint32_t read_domains, uint32_t write_domain, uint32_t delta) { int ret; - ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -416,11 +417,11 @@ intel_batchbuffer_reloc(struct brw_context *brw, uint64_t intel_batchbuffer_reloc64(struct brw_context *brw, - drm_intel_bo *buffer, + drm_intel_bo *buffer, uint32_t offset, uint32_t read_domains, uint32_t write_domain, uint32_t delta) { - int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -440,8 +441,8 @@ intel_batchbuffer_data(struct brw_context *brw, { assert((bytes & 3) == 0); intel_batchbuffer_require_space(brw, bytes, ring); - memcpy(brw->batch.map + brw->batch.used, data, bytes); - brw->batch.used += bytes >> 2; + memcpy(brw->batch.map_next, data, bytes); + brw->batch.map_next += bytes >> 2; } static void |