diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_urb.c | 128 |
8 files changed, 162 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 849018b..a1bbc96 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ gen6_urb.c \ gen6_viewport_state.c \ gen6_vs_state.c \ - gen6_wm_state.c + gen6_wm_state.c \ + gen7_urb.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3a7e331..cd72bc5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -182,9 +182,15 @@ GLboolean brwCreateContext( int api, if (IS_IVB_GT1(intel->intelScreen->deviceID)) { brw->wm_max_threads = 86; brw->vs_max_threads = 36; + brw->urb.size = 128; + brw->urb.max_vs_entries = 512; + brw->urb.max_gs_entries = 192; } else if (IS_IVB_GT2(intel->intelScreen->deviceID)) { brw->wm_max_threads = 86; brw->vs_max_threads = 128; + brw->urb.size = 256; + brw->urb.max_vs_entries = 704; + brw->urb.max_gs_entries = 320; } else { assert(!"Unknown gen7 device."); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 94108de..b3d297d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -603,6 +603,8 @@ struct brw_context /* gen6: * The length of each URB entry owned by the VS (or GS), as * a number of 1024-bit (128-byte) rows. Should be >= 1. + * + * gen7: Same meaning, but in 512-bit (64-byte) rows. */ GLuint vs_size; GLuint gs_size; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fd5227d..8135b39 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -875,12 +875,23 @@ #define CMD_VF_STATISTICS_GM45 0x680b #define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ -#define _3DSTATE_URB 0x7805 /* GEN6+ */ +#define _3DSTATE_URB 0x7805 /* GEN6 */ # define GEN6_URB_VS_SIZE_SHIFT 16 # define GEN6_URB_VS_ENTRIES_SHIFT 0 # define GEN6_URB_GS_ENTRIES_SHIFT 8 # define GEN6_URB_GS_SIZE_SHIFT 0 +#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ +#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ +#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ +#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ +#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + #define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) # define GEN6_SF_VIEWPORT_MODIFY (1 << 11) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 8b9e3a4..d67524f 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -111,6 +111,7 @@ extern const struct brw_tracked_state gen6_vs_constants; extern const struct brw_tracked_state gen6_vs_state; extern const struct brw_tracked_state gen6_wm_constants; extern const struct brw_tracked_state gen6_wm_state; +extern const struct brw_tracked_state gen7_urb; /*********************************************************************** * brw_state.c diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7524b01..9476c2c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -191,7 +191,7 @@ const struct brw_tracked_state *gen7_atoms[] = &brw_cc_vp, &gen6_viewport_state, /* must do after *_vp stages */ - &gen6_urb, + &gen7_urb, &gen6_blend_state, /* must do before cc unit */ &gen6_color_calc_state, /* must do before cc unit */ &gen6_depth_stencil_state, /* must do before cc unit */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7267dea..7d5eb35 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -432,7 +432,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* See emit_vertex_write() for where the VUE's overhead on top of the * attributes comes from. */ - if (intel->gen >= 6) { + if (intel->gen >= 7) { + int header_regs = 2; + if (c->key.nr_userclip) + header_regs += 2; + + /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the + * number of 64-byte (512-bit) units. + */ + c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 3) / 4; + } else if (intel->gen == 6) { int header_regs = 2; if (c->key.nr_userclip) header_regs += 2; diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c new file mode 100644 index 0000000..3a61469 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -0,0 +1,128 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * The following diagram shows how we partition the URB: + * + * 8kB 8kB Rest of the URB space + * ____-____ ____-____ _________________-_________________ + * / \ / \ / \ + * +-------------------------------------------------------------+ + * | VS Push | FS Push | VS | + * | Constants | Constants | Handles | + * +-------------------------------------------------------------+ + * + * Notably, push constants must be stored at the beginning of the URB + * space, while entries can be stored anywhere. Ivybridge has a maximum + * constant buffer size of 16kB. + * + * Currently we split the constant buffer space evenly between VS and FS. + * This is probably not ideal, but simple. + * + * Ivybridge GT1 has 128kB of URB space. + * Ivybridge GT2 has 256kB of URB space. + * + * See "Volume 2a: 3D Pipeline," section 1.8. + */ +static void +prepare_urb(struct brw_context *brw) +{ + /* Total space for entries is URB size - 16kB for push constants */ + int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */ + + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); + + int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64); + if (nr_vs_entries > brw->urb.max_vs_entries) + nr_vs_entries = brw->urb.max_vs_entries; + + /* According to volume 2a, nr_vs_entries must be a multiple of 8. */ + brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8); + + /* URB Starting Addresses are specified in multiples of 8kB. */ + brw->urb.vs_start = 2; /* skip over push constants */ +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 8 == 0); + assert(brw->urb.nr_gs_entries % 8 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); + OUT_BATCH(8); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2)); + OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(brw->urb.nr_vs_entries | + ((brw->urb.vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | + (brw->urb.vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + /* Allocate the GS, HS, and DS zero space - we don't use them. */ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen7_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; |