summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-03-14 14:41:37 -0700
committerEric Anholt <eric@anholt.net>2013-04-01 16:17:25 -0700
commit740350c982bd2735b9eb9063c2b91856b6f1ad31 (patch)
tree4304356b9eefd81c2ab17382a022fb2290cea684 /src/mesa/drivers/dri/i965/brw_wm_surface_state.c
parent2f41a601455e6e0366e28b6b84871842cb4bd341 (diff)
downloadexternal_mesa3d-740350c982bd2735b9eb9063c2b91856b6f1ad31.zip
external_mesa3d-740350c982bd2735b9eb9063c2b91856b6f1ad31.tar.gz
external_mesa3d-740350c982bd2735b9eb9063c2b91856b6f1ad31.tar.bz2
i965: Make the fragment shader pull constants index by dwords, not vec4s.
We want to load vec4s, since loading a vec4 instead of a dword is basically no increased latency. But for variable indexed access, the previous requirement of aligned vec4s for a sampler LD was hard to implement. Note that this change only affects those messages that use the surface format, like sampler LDs, but not to the untyped data cache loads we've used in other cases. No significant performance difference on my GLSL demo with uniforms forced to take the varying pull constants path (n=4). NOTE: This is a candidate for the 9.1 branch. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_surface_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index e458da7..a74b2c7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -913,15 +913,16 @@ brw_update_texture_surface(struct gl_context *ctx,
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
-void
+static void
brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
uint32_t offset,
uint32_t size,
- uint32_t *out_offset)
+ uint32_t *out_offset,
+ bool dword_pitch)
{
struct intel_context *intel = &brw->intel;
- uint32_t stride = 16;
+ uint32_t stride = dword_pitch ? 4 : 16;
uint32_t elements = ALIGN(size, stride) / stride;
const GLint w = elements - 1;
uint32_t *surf;
@@ -1090,7 +1091,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
- &brw->wm.surf_offset[surf_index]);
+ &brw->wm.surf_offset[surf_index],
+ true);
brw->state.dirty.brw |= BRW_NEW_SURFACES;
}
@@ -1443,7 +1445,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
*/
intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
bo->size - binding->Offset,
- &surf_offsets[i]);
+ &surf_offsets[i],
+ shader->Type == GL_FRAGMENT_SHADER);
}
if (shader->NumUniformBlocks)