summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/intel_blit.c
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2015-07-08 19:00:48 -0700
committerMatt Turner <mattst88@gmail.com>2015-07-15 13:09:22 -0700
commitf11c6f09cf36909ff399353b20195a31cf0f1907 (patch)
tree7cb18d467c3646f37add23a0e63cb8940bb14a57 /src/mesa/drivers/dri/i965/intel_blit.c
parent131573df7aea0b10e97d9d5db0d26d89f8dfef54 (diff)
downloadexternal_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.zip
external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.gz
external_mesa3d-f11c6f09cf36909ff399353b20195a31cf0f1907.tar.bz2
i965: Optimize batchbuffer macros.
Previously OUT_BATCH was just a macro around an inline function which does brw->batch.map[brw->batch.used++] = dword; When making consecutive calls to intel_batchbuffer_emit_dword() the compiler isn't able to recognize that we're writing consecutive memory locations or that it doesn't need to write batch.used back to memory each time. We can avoid both of these problems by making a local pointer to the next location in the batch in BEGIN_BATCH(). Cuts 18k from the .text size. text data bss dec hex filename 4946956 195152 26192 5168300 4edcac i965_dri.so before 4928956 195152 26192 5150300 4e965c i965_dri.so after This series (including commit c0433948) improves performance of Synmark OglBatch7 by 8.01389% +/- 0.63922% (n=83) on Ivybridge. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/mesa/drivers/dri/i965/intel_blit.c')
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c19
1 files changed, 10 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index bc39053..4fc3fa8 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -176,9 +176,10 @@ get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
* tiling state would leak into other unsuspecting applications (like the X
* server).
*/
-static void
+static uint32_t *
set_blitter_tiling(struct brw_context *brw,
- bool dst_y_tiled, bool src_y_tiled)
+ bool dst_y_tiled, bool src_y_tiled,
+ uint32_t *__map)
{
assert(brw->gen >= 6);
@@ -193,19 +194,19 @@ set_blitter_tiling(struct brw_context *brw,
OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
(dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
(src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
+ return __map;
}
+#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \
BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \
if (dst_y_tiled || src_y_tiled) \
- set_blitter_tiling(brw, dst_y_tiled, src_y_tiled); \
- } while (0)
+ SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
if (dst_y_tiled || src_y_tiled) \
- set_blitter_tiling(brw, false, false); \
- ADVANCE_BATCH(); \
- } while (0)
+ SET_BLITTER_TILING(brw, false, false); \
+ ADVANCE_BATCH()
static int
blt_pitch(struct intel_mipmap_tree *mt)