summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/intel_batchbuffer.c
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2015-07-08 19:00:48 -0700
committerMatt Turner <mattst88@gmail.com>2015-07-15 13:09:22 -0700
commitf11c6f09cf36909ff399353b20195a31cf0f1907 (patch)
tree7cb18d467c3646f37add23a0e63cb8940bb14a57 /src/mesa/drivers/dri/i965/intel_batchbuffer.c
parent131573df7aea0b10e97d9d5db0d26d89f8dfef54 (diff)
downloadexternal_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.zip
external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.gz
external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.bz2
i965: Optimize batchbuffer macros.
Previously OUT_BATCH was just a macro around an inline function which does brw->batch.map[brw->batch.used++] = dword; When making consecutive calls to intel_batchbuffer_emit_dword() the compiler isn't able to recognize that we're writing consecutive memory locations or that it doesn't need to write batch.used back to memory each time. We can avoid both of these problems by making a local pointer to the next location in the batch in BEGIN_BATCH(). Cuts 18k from the .text size. text data bss dec hex filename 4946956 195152 26192 5168300 4edcac i965_dri.so before 4928956 195152 26192 5150300 4e965c i965_dri.so after This series (including commit c0433948) improves performance of Synmark OglBatch7 by 8.01389% +/- 0.63922% (n=83) on Ivybridge. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/mesa/drivers/dri/i965/intel_batchbuffer.c')
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c19
1 files changed, 10 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 628a7b7..088ffd2 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -48,6 +48,7 @@ intel_batchbuffer_init(struct brw_context *brw)
if (!brw->has_llc) {
brw->batch.cpu_map = malloc(BATCH_SZ);
brw->batch.map = brw->batch.cpu_map;
+ brw->batch.map_next = brw->batch.cpu_map;
}
}
@@ -68,10 +69,10 @@ intel_batchbuffer_reset(struct brw_context *brw)
drm_intel_bo_map(brw->batch.bo, true);
brw->batch.map = brw->batch.bo->virtual;
}
+ brw->batch.map_next = brw->batch.map;
brw->batch.reserved_space = BATCH_RESERVED;
brw->batch.state_batch_offset = brw->batch.bo->size;
- brw->batch.used = 0;
brw->batch.needs_sol_reset = false;
/* We don't know what ring the new batch will be sent to until we see the
@@ -83,7 +84,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
void
intel_batchbuffer_save_state(struct brw_context *brw)
{
- brw->batch.saved.used = brw->batch.used;
+ brw->batch.saved.map_next = brw->batch.map_next;
brw->batch.saved.reloc_count =
drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
}
@@ -93,7 +94,7 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
{
drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
- brw->batch.used = brw->batch.saved.used;
+ brw->batch.map_next = brw->batch.saved.map_next;
if (USED_BATCH(brw->batch) == 0)
brw->batch.ring = UNKNOWN_RING;
}
@@ -395,13 +396,13 @@ _intel_batchbuffer_flush(struct brw_context *brw,
*/
uint32_t
intel_batchbuffer_reloc(struct brw_context *brw,
- drm_intel_bo *buffer,
+ drm_intel_bo *buffer, uint32_t offset,
uint32_t read_domains, uint32_t write_domain,
uint32_t delta)
{
int ret;
- ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+ ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
buffer, delta,
read_domains, write_domain);
assert(ret == 0);
@@ -416,11 +417,11 @@ intel_batchbuffer_reloc(struct brw_context *brw,
uint64_t
intel_batchbuffer_reloc64(struct brw_context *brw,
- drm_intel_bo *buffer,
+ drm_intel_bo *buffer, uint32_t offset,
uint32_t read_domains, uint32_t write_domain,
uint32_t delta)
{
- int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+ int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
buffer, delta,
read_domains, write_domain);
assert(ret == 0);
@@ -440,8 +441,8 @@ intel_batchbuffer_data(struct brw_context *brw,
{
assert((bytes & 3) == 0);
intel_batchbuffer_require_space(brw, bytes, ring);
- memcpy(brw->batch.map + brw->batch.used, data, bytes);
- brw->batch.used += bytes >> 2;
+ memcpy(brw->batch.map_next, data, bytes);
+ brw->batch.map_next += bytes >> 2;
}
static void