summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_compiler.h
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2016-06-08 22:21:22 -0700
committerKenneth Graunke <kenneth@whitecape.org>2016-06-12 00:38:26 -0700
commit87d062a94080373995170f51063a9649c96c6dea (patch)
treea0057a7ebd072ec6104b83d17fb6ec7f396f60c7 /src/mesa/drivers/dri/i965/brw_compiler.h
parent3f48548a6f65fe90b97956c7be73268917c6f2f9 (diff)
downloadexternal_mesa3d-87d062a94080373995170f51063a9649c96c6dea.zip
external_mesa3d-87d062a94080373995170f51063a9649c96c6dea.tar.gz
external_mesa3d-87d062a94080373995170f51063a9649c96c6dea.tar.bz2
i965: Fix shared local memory size for Gen9+.
Skylake changes the representation of shared local memory size: Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB | ------------------------------------------------------------------- Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 | ------------------------------------------------------------------- Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | The old formula would substantially underallocate the amount of space. This fixes GPU hangs on Skylake when running with full thread counts. v2: Fix the Vulkan driver too, use a helper function, and fix the table in the comments and commit message. Cc: "12.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_compiler.h')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h33
1 files changed, 33 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index f55b7f3..c944eff 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -26,6 +26,7 @@
#include <stdio.h>
#include "brw_device_info.h"
#include "main/mtypes.h"
+#include "main/macros.h"
#ifdef __cplusplus
extern "C" {
@@ -831,6 +832,38 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
unsigned *final_assembly_size,
char **error_str);
+static inline uint32_t
+encode_slm_size(const struct brw_device_info *devinfo, uint32_t bytes)
+{
+ uint32_t slm_size = 0;
+
+ /* Shared Local Memory is specified as powers of two, and encoded in
+ * INTERFACE_DESCRIPTOR_DATA with the following representations:
+ *
+ * Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
+ * -------------------------------------------------------------------
+ * Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
+ * -------------------------------------------------------------------
+ * Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ */
+ assert(bytes <= 64 * 1024);
+
+ if (bytes > 0) {
+ /* Shared Local Memory Size is specified as powers of two. */
+ slm_size = util_next_power_of_two(bytes);
+
+ if (devinfo->gen >= 9) {
+ /* Use a minimum of 1kB; turn an exponent of 10 (1024 kB) into 1. */
+ slm_size = ffs(MAX2(slm_size, 1024)) - 10;
+ } else {
+ /* Use a minimum of 4kB; convert to the pre-Gen9 representation. */
+ slm_size = MAX2(slm_size, 4096) / 4096;
+ }
+ }
+
+ return slm_size;
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif