summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChia-I Wu <olvaffe@gmail.com>2015-06-20 23:27:08 +0800
committerChia-I Wu <olvaffe@gmail.com>2015-06-22 12:56:55 +0800
commit57bdcae9e0fbf639014cd375543a8dd356406ac0 (patch)
treef913a713af549534c91ce55441db9954b3e7fd40
parent2bf5a4211ef305d90ca6133ca09c3b79e6088d50 (diff)
downloadexternal_mesa3d-57bdcae9e0fbf639014cd375543a8dd356406ac0.zip
external_mesa3d-57bdcae9e0fbf639014cd375543a8dd356406ac0.tar.gz
external_mesa3d-57bdcae9e0fbf639014cd375543a8dd356406ac0.tar.bz2
ilo: add ilo_state_compute
Replace gen6_idrt_data with ilo_state_compute, which has a bunch of validations and is now preferred.
-rw-r--r--src/gallium/drivers/ilo/Makefile.sources2
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_media.h106
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.c435
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.h92
-rw-r--r--src/gallium/drivers/ilo/ilo_render_dynamic.c36
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen.h3
-rw-r--r--src/gallium/drivers/ilo/ilo_render_media.c3
-rw-r--r--src/gallium/drivers/ilo/ilo_state.h1
8 files changed, 586 insertions, 92 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index e5a0950..95b6b7a 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -21,6 +21,8 @@ C_SOURCES := \
core/ilo_image.h \
core/ilo_state_cc.c \
core/ilo_state_cc.h \
+ core/ilo_state_compute.c \
+ core/ilo_state_compute.h \
core/ilo_state_raster.c \
core/ilo_state_raster.h \
core/ilo_state_sampler.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_media.h b/src/gallium/drivers/ilo/core/ilo_builder_media.h
index 7fbe6d4..7197104 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_media.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_media.h
@@ -29,57 +29,30 @@
#define ILO_BUILDER_MEDIA_H
#include "genhw/genhw.h"
-#include "../ilo_shader.h"
#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+#include "ilo_state_compute.h"
#include "ilo_builder.h"
-struct gen6_idrt_data {
- const struct ilo_shader_state *cs;
-
- uint32_t sampler_offset;
- uint32_t binding_table_offset;
-
- unsigned curbe_size;
- unsigned thread_group_size;
-};
-
static inline void
gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
- unsigned curbe_alloc, bool use_slm)
+ const struct ilo_state_compute *compute)
{
const uint8_t cmd_len = 8;
- const unsigned idrt_alloc =
- ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
- int max_threads;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- max_threads = builder->dev->thread_count;
-
- curbe_alloc = align(curbe_alloc, 32);
- assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
- dw[1] = 0; /* scratch */
-
- dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
- 0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
- GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
- GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
-
+ /* see compute_set_gen6_MEDIA_VFE_STATE() */
+ dw[1] = compute->vfe[0];
+ dw[2] = compute->vfe[1];
dw[3] = 0;
-
- dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
- (curbe_alloc / 32);
-
+ dw[4] = compute->vfe[2];
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
@@ -194,8 +167,10 @@ gen7_GPGPU_WALKER(struct ilo_builder *builder,
static inline uint32_t
gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
- const struct gen6_idrt_data *data,
- int idrt_count)
+ const struct ilo_state_compute *compute,
+ const uint32_t *kernel_offsets,
+ const uint32_t *sampler_offsets,
+ const uint32_t *binding_table_offsets)
{
/*
* From the Sandy Bridge PRM, volume 2 part 2, page 34:
@@ -211,61 +186,26 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
* aligned address of the Interface Descriptor data."
*/
const int state_align = 32;
- const int state_len = (32 / 4) * idrt_count;
+ const int state_len = (32 / 4) * compute->idrt_count;
uint32_t state_offset, *dw;
int i;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
- for (i = 0; i < idrt_count; i++) {
- const struct gen6_idrt_data *idrt = &data[i];
- const struct ilo_shader_state *cs = idrt->cs;
- unsigned sampler_count, bt_size, slm_size;
-
- sampler_count =
- ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
- assert(sampler_count <= 16);
- sampler_count = (sampler_count + 3) / 4;
-
- bt_size =
- ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
- if (bt_size > 31)
- bt_size = 31;
-
- slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
-
- assert(idrt->curbe_size / 32 <= 63);
-
- dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
+ for (i = 0; i < compute->idrt_count; i++) {
+ /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */
+ dw[0] = compute->idrt[i][0] + kernel_offsets[i];
dw[1] = 0;
- dw[2] = idrt->sampler_offset |
- sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
- dw[3] = idrt->binding_table_offset |
- bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
-
- dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
- 0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
-
- if (slm_size) {
- assert(slm_size <= 64 * 1024);
- slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
-
- dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
- slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
- idrt->thread_group_size <<
- GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
- }
- } else {
- dw[5] = 0;
- }
-
- dw[6] = 0;
+ dw[2] = compute->idrt[i][1] |
+ sampler_offsets[i];
+ dw[3] = compute->idrt[i][2] |
+ binding_table_offsets[i];
+ dw[4] = compute->idrt[i][3];
+ dw[5] = compute->idrt[i][4];
+ dw[6] = compute->idrt[i][5];
dw[7] = 0;
dw += 8;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
new file mode 100644
index 0000000..a5fe5e1
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -0,0 +1,435 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_compute.h"
+
+struct compute_urb_configuration {
+ int idrt_entry_count;
+ int curbe_entry_count;
+
+ int urb_entry_count;
+ /* in 256-bit register increments */
+ int urb_entry_size;
+};
+
+static int
+get_gen6_rob_entry_count(const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+ *
+ * "ROB has 64KB of storage; 2048 entries."
+ *
+ * From the valid ranges of "CURBE Allocation Size", we can also conclude
+ * that interface entries and CURBE data must be in ROB. And that ROB
+ * should be 16KB, or 512 entries, on Gen7 GT1.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ return 2048;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ return (dev->gt == 2) ? 2048 : 512;
+ else
+ return (dev->gt == 2) ? 2048 : 1024;
+}
+
+static int
+get_gen6_idrt_entry_count(const struct ilo_dev *dev)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+ *
+ * "The first 32 URB entries are reserved for the interface
+ * descriptor..."
+ *
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ * "The first 64 URB entries are reserved for the interface
+ * description..."
+ */
+ return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32;
+}
+
+static int
+get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+ *
+ * "(CURBE Allocation Size) Specifies the total length allocated for
+ * CURBE, in 256-bit register increments.
+ */
+ const int entry_count = (curbe_size + 31) / 32;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(get_gen6_idrt_entry_count(dev) + entry_count <=
+ get_gen6_rob_entry_count(dev));
+
+ return entry_count;
+}
+
+static bool
+compute_get_gen6_urb_configuration(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ struct compute_urb_configuration *urb)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ urb->idrt_entry_count = get_gen6_idrt_entry_count(dev);
+ urb->curbe_entry_count =
+ get_gen6_curbe_entry_count(dev, info->curbe_alloc_size);
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 451:
+ *
+ * "Please note that 0 is not allowed for this field (Number of URB
+ * Entries)."
+ */
+ urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 52:
+ *
+ * "(URB Entry Allocation Size) Specifies the length of each URB entry
+ * used by the unit, in 256-bit register increments - 1."
+ */
+ urb->urb_entry_size = 1;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 22:
+ *
+ * MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle
+ * size and the number of URB handles. The driver must ensure that
+ * ((URB_handle_size * URB_num_handle) - CURBE - 32) <=
+ * URB_allocation_in_L3."
+ */
+ assert(urb->idrt_entry_count + urb->curbe_entry_count +
+ urb->urb_entry_count * urb->urb_entry_size <=
+ info->cv_urb_alloc_size / 32);
+
+ return true;
+}
+
+static int
+compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ const int per_thread_read = (interface->curbe_read_length + 31) / 32;
+ const int cross_thread_read =
+ (interface->cross_thread_curbe_read_length + 31) / 32;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(interface->curbe_read_offset % 32 == 0);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+ *
+ * "(Constant URB Entry Read Length) [0,63]"
+ */
+ assert(per_thread_read <= 63);
+
+ /* From the Haswell PRM, volume 2d, page 199:
+ *
+ * "(Cross-Thread Constant Data Read Length) [0,127]"
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ assert(cross_thread_read <= 127);
+ else
+ assert(!cross_thread_read);
+
+ if (per_thread_read || cross_thread_read) {
+ return interface->curbe_read_offset / 32 + cross_thread_read +
+ per_thread_read * interface->thread_group_size;
+ } else {
+ return 0;
+ }
+}
+
+static bool
+compute_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ const struct compute_urb_configuration *urb)
+{
+ int min_curbe_entry_count;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ assert(info->interface_count <= urb->idrt_entry_count);
+
+ min_curbe_entry_count = 0;
+ for (i = 0; i < info->interface_count; i++) {
+ const int read_end =
+ compute_interface_get_gen6_read_end(dev, &info->interfaces[i]);
+
+ if (min_curbe_entry_count < read_end)
+ min_curbe_entry_count = read_end;
+ }
+
+ assert(min_curbe_entry_count <= urb->curbe_entry_count);
+
+ /*
+ * From the Broadwell PRM, volume 2b, page 452:
+ *
+ * "CURBE Allocation Size should be 0 for GPGPU workloads that uses
+ * indirect instead of CURBE."
+ */
+ if (!min_curbe_entry_count)
+ assert(!urb->curbe_entry_count);
+
+ return true;
+}
+
+static uint8_t
+compute_get_gen6_scratch_space(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ uint32_t scratch_size = 0;
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < info->interface_count; i++) {
+ if (scratch_size < info->interfaces[i].scratch_size)
+ scratch_size = info->interfaces[i].scratch_size;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ assert(scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ return (scratch_size > 1024) ?
+ (util_last_bit(scratch_size - 1) - 10): 0;
+ } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ assert(scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 2KB */
+ return (scratch_size > 2048) ?
+ (util_last_bit(scratch_size - 1) - 11): 0;
+ } else {
+ assert(scratch_size <= 12 * 1024);
+
+ return (scratch_size > 1024) ?
+ (scratch_size - 1) / 1024 : 0;
+ }
+}
+
+static bool
+compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ struct compute_urb_configuration urb;
+ uint8_t scratch_space;
+
+ uint32_t dw1, dw2, dw4;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!compute_get_gen6_urb_configuration(dev, info, &urb) ||
+ !compute_validate_gen6(dev, info, &urb))
+ return false;
+
+ scratch_space = compute_get_gen6_scratch_space(dev, info);
+
+ dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
+ urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
+ GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
+ GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+ dw2 |= GEN7_VFE_DW2_GPGPU_MODE;
+
+ assert(urb.urb_entry_size);
+
+ dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
+ urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT;
+
+ STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3);
+ compute->vfe[0] = dw1;
+ compute->vfe[1] = dw2;
+ compute->vfe[2] = dw4;
+
+ return true;
+}
+
+static uint8_t
+compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+ return (interface->sampler_count <= 12) ?
+ (interface->sampler_count + 3) / 4 : 4;
+}
+
+static uint8_t
+compute_interface_get_gen6_surface_count(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+ return (interface->surface_count <= 31) ? interface->surface_count : 31;
+}
+
+static uint8_t
+compute_interface_get_gen7_slm_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_interface_info *interface)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 2, page 61:
+ *
+ * "The amount is specified in 4k blocks, but only powers of 2 are
+ * allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice."
+ */
+ assert(interface->slm_size <= 64 * 1024);
+
+ return util_next_power_of_two((interface->slm_size + 4095) / 4096);
+}
+
+static bool
+compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ uint8_t i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < info->interface_count; i++) {
+ const struct ilo_state_compute_interface_info *interface =
+ &info->interfaces[i];
+ uint16_t read_offset, per_thread_read_len, cross_thread_read_len;
+ uint8_t sampler_count, surface_count;
+ uint32_t dw0, dw2, dw3, dw4, dw5, dw6;
+
+ assert(interface->kernel_offset % 64 == 0);
+ assert(interface->thread_group_size);
+
+ read_offset = interface->curbe_read_offset / 32;
+ per_thread_read_len = (interface->curbe_read_length + 31) / 32;
+ cross_thread_read_len =
+ (interface->cross_thread_curbe_read_length + 31) / 32;
+
+ sampler_count =
+ compute_interface_get_gen6_sampler_count(dev, interface);
+ surface_count =
+ compute_interface_get_gen6_surface_count(dev, interface);
+
+ dw0 = interface->kernel_offset;
+ dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
+ dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
+ dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
+ read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
+
+ dw5 = 0;
+ dw6 = 0;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ const uint8_t slm_size =
+ compute_interface_get_gen7_slm_size(dev, interface);
+
+ dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
+
+ if (slm_size) {
+ dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE |
+ slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT;
+ }
+
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
+ *
+ * "(Number of Threads in GPGPU Thread Group) Specifies the
+ * number of threads that are in this thread group. Used to
+ * program the barrier for the number of messages to expect. The
+ * minimum value is 0 (which will disable the barrier), while
+ * the maximum value is the number of threads in a subslice for
+ * local barriers."
+ *
+ * From the Broadwell PRM, volume 2d, page 183:
+ *
+ * "(Number of Threads in GPGPU Thread Group) Specifies the
+ * number of threads that are in this thread group. The minimum
+ * value is 1, while the maximum value is the number of threads
+ * in a subslice for local barriers. See vol1b Configurations
+ * for the number of threads per subslice for different
+ * products. The maximum value for global barriers is limited
+ * by the number of threads in the system, or by 511, whichever
+ * is lower. This field should not be set to 0 even if the
+ * barrier is disabled, since an accurate value is needed for
+ * proper pre-emption."
+ */
+ if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ dw5 |= interface->thread_group_size <<
+ GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw6 |= cross_thread_read_len <<
+ GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT;
+ }
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6);
+ compute->idrt[i][0] = dw0;
+ compute->idrt[i][1] = dw2;
+ compute->idrt[i][2] = dw3;
+ compute->idrt[i][3] = dw4;
+ compute->idrt[i][4] = dw5;
+ compute->idrt[i][5] = dw6;
+ }
+
+ return true;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info)
+{
+ bool ret = true;
+
+ assert(ilo_is_zeroed(compute, sizeof(*compute)));
+ assert(ilo_is_zeroed(info->data, info->data_size));
+
+ assert(ilo_state_compute_data_size(dev, info->interface_count) <=
+ info->data_size);
+ compute->idrt = (uint32_t (*)[6]) info->data;
+
+ ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info);
+ ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info);
+
+ assert(ret);
+
+ return ret;
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
new file mode 100644
index 0000000..346f7b6
--- /dev/null
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -0,0 +1,92 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_COMPUTE_H
+#define ILO_STATE_COMPUTE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ * "The first 64 URB entries are reserved for the interface
+ * description..."
+ */
+#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64
+
+struct ilo_state_compute_interface_info {
+ /* usually 0 unless there are multiple interfaces */
+ uint32_t kernel_offset;
+
+ uint32_t scratch_size;
+
+ uint8_t sampler_count;
+ uint8_t surface_count;
+
+ uint16_t thread_group_size;
+ uint32_t slm_size;
+
+ uint16_t curbe_read_offset;
+ uint16_t curbe_read_length;
+ uint16_t cross_thread_curbe_read_length;
+};
+
+struct ilo_state_compute_info {
+ void *data;
+ size_t data_size;
+
+ const struct ilo_state_compute_interface_info *interfaces;
+ uint8_t interface_count;
+
+ uint32_t cv_urb_alloc_size;
+ uint32_t curbe_alloc_size;
+};
+
+struct ilo_state_compute {
+ uint32_t vfe[3];
+
+ uint32_t (*idrt)[6];
+ uint8_t idrt_count;
+};
+
+static inline size_t
+ilo_state_compute_data_size(const struct ilo_dev *dev,
+ uint8_t interface_count)
+{
+ const struct ilo_state_compute *compute = NULL;
+ return sizeof(compute->idrt[0]) * interface_count;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+ const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info);
+
+#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c
index 5618920..3b4c802 100644
--- a/src/gallium/drivers/ilo/ilo_render_dynamic.c
+++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c
@@ -30,6 +30,7 @@
#include "ilo_common.h"
#include "ilo_blitter.h"
+#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -522,20 +523,39 @@ gen6_emit_launch_grid_dynamic_idrt(struct ilo_render *r,
struct ilo_render_launch_grid_session *session)
{
const struct ilo_shader_state *cs = vec->cs;
- struct gen6_idrt_data data;
+ struct ilo_state_compute_interface_info interface;
+ struct ilo_state_compute_info info;
+ uint32_t kernel_offset;
ILO_DEV_ASSERT(r->dev, 7, 7.5);
- memset(&data, 0, sizeof(data));
+ memset(&interface, 0, sizeof(interface));
- data.cs = cs;
- data.sampler_offset = r->state.cs.SAMPLER_STATE;
- data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE;
+ interface.sampler_count =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
+ interface.surface_count =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
+ interface.thread_group_size = session->thread_group_size;
+ interface.slm_size =
+ ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
+ interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size;
- data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
- data.thread_group_size = session->thread_group_size;
+ memset(&info, 0, sizeof(info));
+ info.data = session->compute_data;
+ info.data_size = sizeof(session->compute_data);
+ info.interfaces = &interface;
+ info.interface_count = 1;
+ info.cv_urb_alloc_size = r->dev->urb_size;
+ info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+ ilo_state_compute_init(&session->compute, r->dev, &info);
+
+ kernel_offset = ilo_shader_get_kernel_offset(cs);
+
+ session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder,
+ &session->compute, &kernel_offset,
+ &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE);
- session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1);
session->idrt_size = 32;
}
diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h
index 00c8113..aae4ef2 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen.h
+++ b/src/gallium/drivers/ilo/ilo_render_gen.h
@@ -185,6 +185,9 @@ struct ilo_render_launch_grid_session {
uint32_t idrt;
int idrt_size;
+
+ uint32_t compute_data[6];
+ struct ilo_state_compute compute;
};
int
diff --git a/src/gallium/drivers/ilo/ilo_render_media.c b/src/gallium/drivers/ilo/ilo_render_media.c
index 387920a..a0de002 100644
--- a/src/gallium/drivers/ilo/ilo_render_media.c
+++ b/src/gallium/drivers/ilo/ilo_render_media.c
@@ -30,6 +30,7 @@
#include "core/ilo_builder_mi.h"
#include "core/ilo_builder_render.h"
+#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_render_gen.h"
@@ -206,7 +207,7 @@ ilo_render_emit_launch_grid_commands(struct ilo_render *render,
gen6_state_base_address(render->builder, true);
- gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
+ gen6_MEDIA_VFE_STATE(render->builder, &session->compute);
if (pcb_size)
gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index e4746d0..537e5db 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -29,6 +29,7 @@
#define ILO_STATE_H
#include "core/ilo_state_cc.h"
+#include "core/ilo_state_compute.h"
#include "core/ilo_state_raster.h"
#include "core/ilo_state_sampler.h"
#include "core/ilo_state_sbe.h"