diff options
author | Eric Anholt <eric@anholt.net> | 2011-04-27 13:33:10 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2011-06-18 16:00:45 -0700 |
commit | c173541d9769d41a85cc899bc49699a3587df4bf (patch) | |
tree | 0b445fd0db1f9eb806b7fe48fa8ac4fced4baa8a /src/mesa/drivers/dri/i965/brw_clip_state.c | |
parent | 962dab948609c97c1c01fde6a27e19307948d302 (diff) | |
download | external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.zip external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.tar.gz external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.tar.bz2 |
i965: Use state streaming on programs, and state base address on gen5+.
There will be a little bit of thrashing of the program cache BO as the
cache warms up, but once the application is in steady state, this
reduces relocations on gen5 and later.
On my T420 laptop, cairogl firefox-talos-gfx performance improves 2.6%
+/- 1.3% (n=6). No statistically significant performance difference
on nexuiz (n=5).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_clip_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_clip_state.c | 19 |
1 files changed, 8 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 6015c8c..b9efbb7 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,15 @@ brw_prepare_clip_unit(struct brw_context *brw) clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); - /* CACHE_NEW_CLIP_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_offset + + (clip->thread0.grf_reg_count << 1)) >> 6; clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip->thread1.single_program_flow = 1; @@ -110,14 +114,6 @@ brw_prepare_clip_unit(struct brw_context *brw) clip->viewport_ymin = -1; clip->viewport_ymax = 1; - /* Emit clip program relocation */ - assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, thread0)), - brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } @@ -125,6 +121,7 @@ const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG |