diff options
author | Eric Anholt <eric@anholt.net> | 2011-04-27 13:33:10 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2011-06-18 16:00:45 -0700 |
commit | c173541d9769d41a85cc899bc49699a3587df4bf (patch) | |
tree | 0b445fd0db1f9eb806b7fe48fa8ac4fced4baa8a /src/mesa/drivers/dri/i965/brw_sf_state.c | |
parent | 962dab948609c97c1c01fde6a27e19307948d302 (diff) | |
download | external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.zip external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.tar.gz external_mesa3d-c173541d9769d41a85cc899bc49699a3587df4bf.tar.bz2 |
i965: Use state streaming on programs, and state base address on gen5+.
There will be a little bit of thrashing of the program cache BO as the
cache warms up, but once the application is in steady state, this
reduces relocations on gen5 and later.
On my T420 laptop, cairogl firefox-talos-gfx performance improves 2.6%
+/- 1.3% (n=6). No statistically significant performance difference
on nexuiz (n=5).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_sf_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf_state.c | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 78b22c4..eb3d103 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -133,9 +133,14 @@ static void upload_sf_unit( struct brw_context *brw ) memset(sf, 0, sizeof(*sf)); - /* CACHE_NEW_SF_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */ sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + sf->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_offset + + (sf->thread0.grf_reg_count << 1)) >> 6; sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -282,11 +287,6 @@ static void upload_sf_unit( struct brw_context *brw ) /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ - /* Emit SF program relocation */ - drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + - offsetof(struct brw_sf_unit_state, thread0)), - brw->sf.prog_bo, sf->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); /* Emit SF viewport relocation */ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + @@ -308,6 +308,7 @@ const struct brw_tracked_state brw_sf_unit = { _NEW_SCISSOR | _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) |