diff options
247 files changed, 7061 insertions, 3849 deletions
diff --git a/Makefile.am b/Makefile.am index 9f49ce6..149610c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,7 +32,9 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \ --enable-vdpau \ --enable-xa \ --enable-xvmc \ - --with-egl-platforms=x11,wayland,drm + --with-egl-platforms=x11,wayland,drm \ + --with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \ + --with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast ACLOCAL_AMFLAGS = -I m4 @@ -1 +1 @@ -11.0.0-devel +11.1.0-devel diff --git a/configure.ac b/configure.ac index 74e13b3..dd23eca 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63 LIBDRM_INTEL_REQUIRED=2.4.61 LIBDRM_NVVIEUX_REQUIRED=2.4.33 LIBDRM_NOUVEAU_REQUIRED=2.4.62 -LIBDRM_FREEDRENO_REQUIRED=2.4.57 +LIBDRM_FREEDRENO_REQUIRED=2.4.64 DRI2PROTO_REQUIRED=2.6 DRI3PROTO_REQUIRED=1.0 PRESENTPROTO_REQUIRED=1.0 @@ -1639,6 +1639,10 @@ if test "x$enable_nine" = xyes; then if test "x$with_gallium_drivers" = xswrast; then AC_MSG_ERROR([nine requires at least one non-swrast gallium driver]) fi + if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 6; then + AC_MSG_ERROR([gcc >= 4.6 is required to build nine]) + fi + if test "x$enable_dri3" = xno; then AC_MSG_WARN([using nine together with wine requires DRI3 enabled system]) fi diff --git a/docs/GL3.txt b/docs/GL3.txt index 54c0c5a..331b2da 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -163,7 +163,7 @@ GL 4.3, GLSL 4.30: GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_program_interface_query DONE (all drivers) GL_ARB_robust_buffer_access_behavior not started - GL_ARB_shader_image_size in progress (Martin Peres) + GL_ARB_shader_image_size DONE (i965) GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) @@ -210,8 +210,8 @@ GLES3.1, GLSL ES 3.1 GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_program_interface_query DONE (all drivers) GL_ARB_shader_atomic_counters DONE (i965) - GL_ARB_shader_image_load_store in progress (curro) - GL_ARB_shader_image_size in progress (Martin Peres) + GL_ARB_shader_image_load_store DONE (i965) + GL_ARB_shader_image_size DONE (i965) GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias) GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) diff --git a/docs/index.html b/docs/index.html index b9e6148..b067256 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@ <h1>News</h1> +<h2>August 22 2015</h2> +<p> +<a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released. +This is a bug-fix release. +</p> + <h2>August 11 2015</h2> <p> <a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 39e7f61..2cc4701 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release. </p> <ul> +<li><a href="relnotes/10.6.5.html">10.6.5 release notes</a> <li><a href="relnotes/10.6.4.html">10.6.4 release notes</a> <li><a href="relnotes/10.6.3.html">10.6.3 release notes</a> <li><a href="relnotes/10.6.2.html">10.6.2 release notes</a> diff --git a/docs/relnotes/10.6.5.html b/docs/relnotes/10.6.5.html new file mode 100644 index 0000000..e7326ff --- /dev/null +++ b/docs/relnotes/10.6.5.html @@ -0,0 +1,124 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 10.6.5 Release Notes / August 22, 2015</h1> + +<p> +Mesa 10.6.5 is a bug fix release which fixes bugs found since the 10.6.4 release. +</p> +<p> +Mesa 10.6.5 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +afe290fc7af75a25df5ee52396a9f09e5dba85fb3e159304bdda265b8564b0d4 mesa-10.6.5.tar.gz +fb6fac3c85bcfa9d06b8dd439169f23f0c0924a88e44362e738b99b1feff762f mesa-10.6.5.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li> + +</ul> + + +<h2>Changes</h2> + +<p>Adam Jackson (1):</p> +<ul> + <li>glx: Fix __glXWireToEvent for BufferSwapComplete</li> +</ul> + +<p>Alex Deucher (2):</p> +<ul> + <li>radeonsi: add new OLAND pci id</li> + <li>radeonsi: properly set the raster_config for KV</li> +</ul> + +<p>Emil Velikov (4):</p> +<ul> + <li>docs: add sha256 checksums for 10.6.4</li> + <li>vc4: add missing nir include, to fix the build</li> + <li>Revert "radeonsi: properly set the raster_config for KV"</li> + <li>Update version to 10.6.5</li> +</ul> + +<p>Frank Binns (1):</p> +<ul> + <li>egl/x11: don't abort when creating a DRI2 drawable fails</li> +</ul> + +<p>Ilia Mirkin (3):</p> +<ul> + <li>nouveau: no need to do tnl wakeup, state updates are always hooked up</li> + <li>gm107/ir: indirect handle goes first on maxwell also</li> + <li>nv50,nvc0: take level into account when doing eng2d multi-layer blits</li> +</ul> + +<p>Jason Ekstrand (4):</p> +<ul> + <li>meta/copy_image: Stash off the scissor</li> + <li>mesa/formats: Only do byteswapping for packed formats</li> + <li>mesa/formats: Fix swizzle flipping for big-endian targets</li> + <li>mesa/formats: Don't flip channels of null array formats</li> +</ul> + +<p>Marek Olšák (3):</p> +<ul> + <li>radeonsi: fix polygon offset scale</li> + <li>r600g: fix polygon offset scale</li> + <li>r600g: allow setting geometry shader sampler states</li> +</ul> + +<p>Neil Roberts (1):</p> +<ul> + <li>i965/bdw: Fix setting the instancing state for the SGVS element</li> +</ul> + +<p>Oded Gabbay (2):</p> +<ul> + <li>mesa: clear existing swizzle info before bitwise-OR</li> + <li>mesa/formats: don't byteswap when building array formats</li> +</ul> + +<p>Renaud Gaubert (1):</p> +<ul> + <li>glsl: avoid compiler's segfault when processing operators with void arguments</li> +</ul> + + +</div> +</body> +</html> diff --git a/docs/relnotes/11.0.0.html b/docs/relnotes/11.0.0.html index 75967ac..537b883 100644 --- a/docs/relnotes/11.0.0.html +++ b/docs/relnotes/11.0.0.html @@ -46,9 +46,12 @@ Note: some of the new features are only available with certain drivers. <ul> <li>New hardware support for AMD GCN 1.2 GPUs: Tonga, Iceland, Carrizo, Fiji</li> <li>OpenGL 4.1 on radeonsi, nvc0</li> +<li>OpenGL ES 3.0 on freedreno (a3xx, a4xx) <li>GL_AMD_vertex_shader_viewport_index on radeonsi</li> <li>GL_ARB_conditional_render_inverted on r600, radeonsi</li> +<li>GL_ARB_depth_buffer_float on a4xx</li> <li>GL_ARB_derivative_control on radeonsi</li> +<li>GL_ARB_draw_buffers, GL_ARB_draw_buffers_blend on a4xx</li> <li>GL_ARB_fragment_layer_viewport on radeonsi</li> <li>GL_ARB_framebuffer_no_attachments on i965</li> <li>GL_ARB_get_texture_sub_image for all drivers</li> @@ -56,12 +59,15 @@ Note: some of the new features are only available with certain drivers. <li>GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi</li> <li>GL_ARB_shader_image_load_store on i965</li> <li>GL_ARB_shader_precision on radeonsi, nvc0</li> +<li>GL_ARB_shader_image_size on i965</li> <li>GL_ARB_shader_stencil_export on llvmpipe</li> <li>GL_ARB_shader_subroutine on core profile all drivers</li> <li>GL_ARB_tessellation_shader on nvc0, radeonsi</li> +<li>GL_ARB_transform_feedback2, GL_ARB_transform_feedback_instanced, GL_EXT_transform_feedback on a3xx, a4xx</li> <li>GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi</li> <li>GL_ARB_viewport_array on radeonsi</li> <li>GL_EXT_depth_bounds_test on radeonsi, nv30, nv50, nvc0</li> +<li>GL_EXT_texture_compression_s3tc on freedreno (a3xx)</li> <li>GL_NV_read_depth (GLES) on all drivers</li> <li>GL_NV_read_depth_stencil (GLES) on all drivers</li> <li>GL_NV_read_stencil (GLES) on all drivers</li> @@ -69,6 +75,7 @@ Note: some of the new features are only available with certain drivers. <li>GL_OES_texture_half_float on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li> <li>GL_OES_texture_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li> <li>GL_OES_texture_half_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li> +<li>GL_EXT_draw_buffers2 on a4xx</li> <li>GLX_ARB_create_context_robustness on r600, radeonsi</li> <li>EGL_EXT_create_context_robustness on r600, radeonsi</li> <li>EGL_KHR_gl_colorspace on r600, radeonsi, nv50, nvc0</li> diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html new file mode 100644 index 0000000..7f80206 --- /dev/null +++ b/docs/relnotes/11.1.0.html @@ -0,0 +1,60 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.1.0 Release Notes / TBD</h1> + +<p> +Mesa 11.1.0 is a new development release. +People who are concerned with stability and reliability should stick +with a previous release or wait for Mesa 11.1.1. +</p> +<p> +Mesa 11.1.0 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD. +</pre> + + +<h2>New features</h2> + +<p> +Note: some of the new features are only available with certain drivers. +</p> + +<ul> +TBD. +</ul> + +<h2>Bug fixes</h2> + +TBD. + +<h2>Changes</h2> + +TBD. + +</div> +</body> +</html> diff --git a/include/GL/glext.h b/include/GL/glext.h index e5f1d89..907a582 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright (c) 2013-2014 The Khronos Group Inc. +** Copyright (c) 2013-2015 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -33,7 +33,7 @@ extern "C" { ** used to make the header, and the header can be found at ** http://www.opengl.org/registry/ ** -** Khronos $Revision: 29735 $ on $Date: 2015-02-02 19:00:01 -0800 (Mon, 02 Feb 2015) $ +** Khronos $Revision: 31811 $ on $Date: 2015-08-10 17:01:11 +1000 (Mon, 10 Aug 2015) $ */ #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) @@ -53,7 +53,7 @@ extern "C" { #define GLAPI extern #endif -#define GL_GLEXT_VERSION 20150202 +#define GL_GLEXT_VERSION 20150809 /* Generated C header for: * API: gl @@ -1041,6 +1041,22 @@ typedef unsigned short GLhalf; #define GL_COLOR_ATTACHMENT13 0x8CED #define GL_COLOR_ATTACHMENT14 0x8CEE #define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF #define GL_DEPTH_ATTACHMENT 0x8D00 #define GL_STENCIL_ATTACHMENT 0x8D20 #define GL_FRAMEBUFFER 0x8D40 @@ -2859,6 +2875,17 @@ GLAPI void APIENTRY glTextureBarrier (void); #define GL_ARB_ES3_1_compatibility 1 #endif /* GL_ARB_ES3_1_compatibility */ +#ifndef GL_ARB_ES3_2_compatibility +#define GL_ARB_ES3_2_compatibility 1 +#define GL_PRIMITIVE_BOUNDING_BOX_ARB 0x92BE +#define GL_MULTISAMPLE_LINE_WIDTH_RANGE_ARB 0x9381 +#define GL_MULTISAMPLE_LINE_WIDTH_GRANULARITY_ARB 0x9382 +typedef void (APIENTRYP PFNGLPRIMITIVEBOUNDINGBOXARBPROC) (GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glPrimitiveBoundingBoxARB (GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW); +#endif +#endif /* GL_ARB_ES3_2_compatibility */ + #ifndef GL_ARB_ES3_compatibility #define GL_ARB_ES3_compatibility 1 #endif /* GL_ARB_ES3_compatibility */ @@ -3272,6 +3299,10 @@ GLAPI GLboolean APIENTRY glIsProgramARB (GLuint program); #define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_ARB 0x8B8B #endif /* GL_ARB_fragment_shader */ +#ifndef GL_ARB_fragment_shader_interlock +#define GL_ARB_fragment_shader_interlock 1 +#endif /* GL_ARB_fragment_shader_interlock */ + #ifndef GL_ARB_framebuffer_no_attachments #define GL_ARB_framebuffer_no_attachments 1 #endif /* GL_ARB_framebuffer_no_attachments */ @@ -3332,6 +3363,91 @@ GLAPI void APIENTRY glFramebufferTextureFaceARB (GLenum target, GLenum attachmen #define GL_ARB_gpu_shader_fp64 1 #endif /* GL_ARB_gpu_shader_fp64 */ +#ifndef GL_ARB_gpu_shader_int64 +#define GL_ARB_gpu_shader_int64 1 +#define GL_INT64_ARB 0x140E +#define GL_INT64_VEC2_ARB 0x8FE9 +#define GL_INT64_VEC3_ARB 0x8FEA +#define GL_INT64_VEC4_ARB 0x8FEB +#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5 +#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6 +#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7 +typedef void (APIENTRYP PFNGLUNIFORM1I64ARBPROC) (GLint location, GLint64 x); +typedef void (APIENTRYP PFNGLUNIFORM2I64ARBPROC) (GLint location, GLint64 x, GLint64 y); +typedef void (APIENTRYP PFNGLUNIFORM3I64ARBPROC) (GLint location, GLint64 x, GLint64 y, GLint64 z); +typedef void (APIENTRYP PFNGLUNIFORM4I64ARBPROC) (GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w); +typedef void (APIENTRYP PFNGLUNIFORM1I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM2I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM3I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM4I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM1UI64ARBPROC) (GLint location, GLuint64 x); +typedef void (APIENTRYP PFNGLUNIFORM2UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y); +typedef void (APIENTRYP PFNGLUNIFORM3UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y, GLuint64 z); +typedef void (APIENTRYP PFNGLUNIFORM4UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w); +typedef void (APIENTRYP PFNGLUNIFORM1UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM2UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM3UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLUNIFORM4UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLGETUNIFORMI64VARBPROC) (GLuint program, GLint location, GLint64 *params); +typedef void (APIENTRYP PFNGLGETUNIFORMUI64VARBPROC) (GLuint program, GLint location, GLuint64 *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMI64VARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLint64 *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMUI64VARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint64 *params); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1I64ARBPROC) (GLuint program, GLint location, GLint64 x); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UI64ARBPROC) (GLuint program, GLint location, GLuint64 x); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glUniform1i64ARB (GLint location, GLint64 x); +GLAPI void APIENTRY glUniform2i64ARB (GLint location, GLint64 x, GLint64 y); +GLAPI void APIENTRY glUniform3i64ARB (GLint location, GLint64 x, GLint64 y, GLint64 z); +GLAPI void APIENTRY glUniform4i64ARB (GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w); +GLAPI void APIENTRY glUniform1i64vARB (GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glUniform2i64vARB (GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glUniform3i64vARB (GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glUniform4i64vARB (GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glUniform1ui64ARB (GLint location, GLuint64 x); +GLAPI void APIENTRY glUniform2ui64ARB (GLint location, GLuint64 x, GLuint64 y); +GLAPI void APIENTRY glUniform3ui64ARB (GLint location, GLuint64 x, GLuint64 y, GLuint64 z); +GLAPI void APIENTRY glUniform4ui64ARB (GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w); +GLAPI void APIENTRY glUniform1ui64vARB (GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glUniform2ui64vARB (GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glUniform3ui64vARB (GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glUniform4ui64vARB (GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glGetUniformi64vARB (GLuint program, GLint location, GLint64 *params); +GLAPI void APIENTRY glGetUniformui64vARB (GLuint program, GLint location, GLuint64 *params); +GLAPI void APIENTRY glGetnUniformi64vARB (GLuint program, GLint location, GLsizei bufSize, GLint64 *params); +GLAPI void APIENTRY glGetnUniformui64vARB (GLuint program, GLint location, GLsizei bufSize, GLuint64 *params); +GLAPI void APIENTRY glProgramUniform1i64ARB (GLuint program, GLint location, GLint64 x); +GLAPI void APIENTRY glProgramUniform2i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y); +GLAPI void APIENTRY glProgramUniform3i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z); +GLAPI void APIENTRY glProgramUniform4i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w); +GLAPI void APIENTRY glProgramUniform1i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glProgramUniform2i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glProgramUniform3i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glProgramUniform4i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value); +GLAPI void APIENTRY glProgramUniform1ui64ARB (GLuint program, GLint location, GLuint64 x); +GLAPI void APIENTRY glProgramUniform2ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y); +GLAPI void APIENTRY glProgramUniform3ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z); +GLAPI void APIENTRY glProgramUniform4ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w); +GLAPI void APIENTRY glProgramUniform1ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glProgramUniform2ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glProgramUniform3ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glProgramUniform4ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value); +#endif +#endif /* GL_ARB_gpu_shader_int64 */ + #ifndef GL_ARB_half_float_pixel #define GL_ARB_half_float_pixel 1 typedef unsigned short GLhalfARB; @@ -3711,6 +3827,16 @@ GLAPI void APIENTRY glGetQueryObjectuivARB (GLuint id, GLenum pname, GLuint *par #define GL_ARB_occlusion_query2 1 #endif /* GL_ARB_occlusion_query2 */ +#ifndef GL_ARB_parallel_shader_compile +#define GL_ARB_parallel_shader_compile 1 +#define GL_MAX_SHADER_COMPILER_THREADS_ARB 0x91B0 +#define GL_COMPLETION_STATUS_ARB 0x91B1 +typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSARBPROC) (GLuint count); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMaxShaderCompilerThreadsARB (GLuint count); +#endif +#endif /* GL_ARB_parallel_shader_compile */ + #ifndef GL_ARB_pipeline_statistics_query #define GL_ARB_pipeline_statistics_query 1 #define GL_VERTICES_SUBMITTED_ARB 0x82EE @@ -3753,6 +3879,10 @@ GLAPI void APIENTRY glPointParameterfvARB (GLenum pname, const GLfloat *params); #define GL_COORD_REPLACE_ARB 0x8862 #endif /* GL_ARB_point_sprite */ +#ifndef GL_ARB_post_depth_coverage +#define GL_ARB_post_depth_coverage 1 +#endif /* GL_ARB_post_depth_coverage */ + #ifndef GL_ARB_program_interface_query #define GL_ARB_program_interface_query 1 #endif /* GL_ARB_program_interface_query */ @@ -3826,6 +3956,26 @@ GLAPI void APIENTRY glGetnMinmaxARB (GLenum target, GLboolean reset, GLenum form #define GL_ARB_robustness_isolation 1 #endif /* GL_ARB_robustness_isolation */ +#ifndef GL_ARB_sample_locations +#define GL_ARB_sample_locations 1 +#define GL_SAMPLE_LOCATION_SUBPIXEL_BITS_ARB 0x933D +#define GL_SAMPLE_LOCATION_PIXEL_GRID_WIDTH_ARB 0x933E +#define GL_SAMPLE_LOCATION_PIXEL_GRID_HEIGHT_ARB 0x933F +#define GL_PROGRAMMABLE_SAMPLE_LOCATION_TABLE_SIZE_ARB 0x9340 +#define GL_SAMPLE_LOCATION_ARB 0x8E50 +#define GL_PROGRAMMABLE_SAMPLE_LOCATION_ARB 0x9341 +#define GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS_ARB 0x9342 +#define GL_FRAMEBUFFER_SAMPLE_LOCATION_PIXEL_GRID_ARB 0x9343 +typedef void (APIENTRYP PFNGLFRAMEBUFFERSAMPLELOCATIONSFVARBPROC) (GLenum target, GLuint start, GLsizei count, const GLfloat *v); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVARBPROC) (GLuint framebuffer, GLuint start, GLsizei count, const GLfloat *v); +typedef void (APIENTRYP PFNGLEVALUATEDEPTHVALUESARBPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glFramebufferSampleLocationsfvARB (GLenum target, GLuint start, GLsizei count, const GLfloat *v); +GLAPI void APIENTRY glNamedFramebufferSampleLocationsfvARB (GLuint framebuffer, GLuint start, GLsizei count, const GLfloat *v); +GLAPI void APIENTRY glEvaluateDepthValuesARB (void); +#endif +#endif /* GL_ARB_sample_locations */ + #ifndef GL_ARB_sample_shading #define GL_ARB_sample_shading 1 #define GL_SAMPLE_SHADING_ARB 0x8C36 @@ -3852,14 +4002,26 @@ GLAPI void APIENTRY glMinSampleShadingARB (GLfloat value); #define GL_ARB_separate_shader_objects 1 #endif /* GL_ARB_separate_shader_objects */ +#ifndef GL_ARB_shader_atomic_counter_ops +#define GL_ARB_shader_atomic_counter_ops 1 +#endif /* GL_ARB_shader_atomic_counter_ops */ + #ifndef GL_ARB_shader_atomic_counters #define GL_ARB_shader_atomic_counters 1 #endif /* GL_ARB_shader_atomic_counters */ +#ifndef GL_ARB_shader_ballot +#define GL_ARB_shader_ballot 1 +#endif /* GL_ARB_shader_ballot */ + #ifndef GL_ARB_shader_bit_encoding #define GL_ARB_shader_bit_encoding 1 #endif /* GL_ARB_shader_bit_encoding */ +#ifndef GL_ARB_shader_clock +#define GL_ARB_shader_clock 1 +#endif /* GL_ARB_shader_clock */ + #ifndef GL_ARB_shader_draw_parameters #define GL_ARB_shader_draw_parameters 1 #endif /* GL_ARB_shader_draw_parameters */ @@ -4029,6 +4191,10 @@ GLAPI void APIENTRY glGetShaderSourceARB (GLhandleARB obj, GLsizei maxLength, GL #define GL_ARB_shader_texture_lod 1 #endif /* GL_ARB_shader_texture_lod */ +#ifndef GL_ARB_shader_viewport_layer_array +#define GL_ARB_shader_viewport_layer_array 1 +#endif /* GL_ARB_shader_viewport_layer_array */ + #ifndef GL_ARB_shading_language_100 #define GL_ARB_shading_language_100 1 #define GL_SHADING_LANGUAGE_VERSION_ARB 0x8B8C @@ -4102,12 +4268,20 @@ GLAPI void APIENTRY glNamedBufferPageCommitmentARB (GLuint buffer, GLintptr offs #define GL_MAX_SPARSE_3D_TEXTURE_SIZE_ARB 0x9199 #define GL_MAX_SPARSE_ARRAY_TEXTURE_LAYERS_ARB 0x919A #define GL_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS_ARB 0x91A9 -typedef void (APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +typedef void (APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); #ifdef GL_GLEXT_PROTOTYPES -GLAPI void APIENTRY glTexPageCommitmentARB (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +GLAPI void APIENTRY glTexPageCommitmentARB (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); #endif #endif /* GL_ARB_sparse_texture */ +#ifndef GL_ARB_sparse_texture2 +#define GL_ARB_sparse_texture2 1 +#endif /* GL_ARB_sparse_texture2 */ + +#ifndef GL_ARB_sparse_texture_clamp +#define GL_ARB_sparse_texture_clamp 1 +#endif /* GL_ARB_sparse_texture_clamp */ + #ifndef GL_ARB_stencil_texturing #define GL_ARB_stencil_texturing 1 #endif /* GL_ARB_stencil_texturing */ @@ -4260,6 +4434,12 @@ GLAPI void APIENTRY glGetCompressedTexImageARB (GLenum target, GLint level, void #define GL_DOT3_RGBA_ARB 0x86AF #endif /* GL_ARB_texture_env_dot3 */ +#ifndef GL_ARB_texture_filter_minmax +#define GL_ARB_texture_filter_minmax 1 +#define GL_TEXTURE_REDUCTION_MODE_ARB 0x9366 +#define GL_WEIGHTED_AVERAGE_ARB 0x9367 +#endif /* GL_ARB_texture_filter_minmax */ + #ifndef GL_ARB_texture_float #define GL_ARB_texture_float 1 #define GL_TEXTURE_RED_TYPE_ARB 0x8C10 @@ -4754,6 +4934,11 @@ GLAPI void APIENTRY glBlendBarrierKHR (void); #define GL_KHR_debug 1 #endif /* GL_KHR_debug */ +#ifndef GL_KHR_no_error +#define GL_KHR_no_error 1 +#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008 +#endif /* GL_KHR_no_error */ + #ifndef GL_KHR_robust_buffer_access_behavior #define GL_KHR_robust_buffer_access_behavior 1 #endif /* GL_KHR_robust_buffer_access_behavior */ @@ -4896,7 +5081,6 @@ typedef void (APIENTRYP PFNGLPOINTPARAMETERXVOESPROC) (GLenum pname, const GLfix typedef void (APIENTRYP PFNGLPOINTSIZEXOESPROC) (GLfixed size); typedef void (APIENTRYP PFNGLPOLYGONOFFSETXOESPROC) (GLfixed factor, GLfixed units); typedef void (APIENTRYP PFNGLROTATEXOESPROC) (GLfixed angle, GLfixed x, GLfixed y, GLfixed z); -typedef void (APIENTRYP PFNGLSAMPLECOVERAGEOESPROC) (GLfixed value, GLboolean invert); typedef void (APIENTRYP PFNGLSCALEXOESPROC) (GLfixed x, GLfixed y, GLfixed z); typedef void (APIENTRYP PFNGLTEXENVXOESPROC) (GLenum target, GLenum pname, GLfixed param); typedef void (APIENTRYP PFNGLTEXENVXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params); @@ -5001,7 +5185,6 @@ GLAPI void APIENTRY glPointParameterxvOES (GLenum pname, const GLfixed *params); GLAPI void APIENTRY glPointSizexOES (GLfixed size); GLAPI void APIENTRY glPolygonOffsetxOES (GLfixed factor, GLfixed units); GLAPI void APIENTRY glRotatexOES (GLfixed angle, GLfixed x, GLfixed y, GLfixed z); -GLAPI void APIENTRY glSampleCoverageOES (GLfixed value, GLboolean invert); GLAPI void APIENTRY glScalexOES (GLfixed x, GLfixed y, GLfixed z); GLAPI void APIENTRY glTexEnvxOES (GLenum target, GLenum pname, GLfixed param); GLAPI void APIENTRY glTexEnvxvOES (GLenum target, GLenum pname, const GLfixed *params); @@ -6715,7 +6898,7 @@ typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBLFORMATEXTPROC) (GLuint vaob typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBBINDINGEXTPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex); typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBINDINGDIVISOREXTPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor); typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBLOFFSETEXTPROC) (GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride, GLintptr offset); -typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBDIVISOREXTPROC) (GLuint vaobj, GLuint index, GLuint divisor); #ifdef GL_GLEXT_PROTOTYPES GLAPI void APIENTRY glMatrixLoadfEXT (GLenum mode, const GLfloat *m); @@ -6971,7 +7154,7 @@ GLAPI void APIENTRY glVertexArrayVertexAttribLFormatEXT (GLuint vaobj, GLuint at GLAPI void APIENTRY glVertexArrayVertexAttribBindingEXT (GLuint vaobj, GLuint attribindex, GLuint bindingindex); GLAPI void APIENTRY glVertexArrayVertexBindingDivisorEXT (GLuint vaobj, GLuint bindingindex, GLuint divisor); GLAPI void APIENTRY glVertexArrayVertexAttribLOffsetEXT (GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride, GLintptr offset); -GLAPI void APIENTRY glTexturePageCommitmentEXT (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +GLAPI void APIENTRY glTexturePageCommitmentEXT (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); GLAPI void APIENTRY glVertexArrayVertexAttribDivisorEXT (GLuint vaobj, GLuint index, GLuint divisor); #endif #endif /* GL_EXT_direct_state_access */ @@ -8635,6 +8818,14 @@ GLAPI void APIENTRY glBlendFuncSeparateINGR (GLenum sfactorRGB, GLenum dfactorRG #define GL_INTEL_fragment_shader_ordering 1 #endif /* GL_INTEL_fragment_shader_ordering */ +#ifndef GL_INTEL_framebuffer_CMAA +#define GL_INTEL_framebuffer_CMAA 1 +typedef void (APIENTRYP PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glApplyFramebufferAttachmentCMAAINTEL (void); +#endif +#endif /* GL_INTEL_framebuffer_CMAA */ + #ifndef GL_INTEL_map_texture #define GL_INTEL_map_texture 1 #define GL_TEXTURE_MEMORY_LAYOUT_INTEL 0x83FF @@ -8939,6 +9130,65 @@ GLAPI void APIENTRY glBlendBarrierNV (void); #define GL_NV_blend_square 1 #endif /* GL_NV_blend_square */ +#ifndef GL_NV_command_list +#define GL_NV_command_list 1 +#define GL_TERMINATE_SEQUENCE_COMMAND_NV 0x0000 +#define GL_NOP_COMMAND_NV 0x0001 +#define GL_DRAW_ELEMENTS_COMMAND_NV 0x0002 +#define GL_DRAW_ARRAYS_COMMAND_NV 0x0003 +#define GL_DRAW_ELEMENTS_STRIP_COMMAND_NV 0x0004 +#define GL_DRAW_ARRAYS_STRIP_COMMAND_NV 0x0005 +#define GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV 0x0006 +#define GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV 0x0007 +#define GL_ELEMENT_ADDRESS_COMMAND_NV 0x0008 +#define GL_ATTRIBUTE_ADDRESS_COMMAND_NV 0x0009 +#define GL_UNIFORM_ADDRESS_COMMAND_NV 0x000A +#define GL_BLEND_COLOR_COMMAND_NV 0x000B +#define GL_STENCIL_REF_COMMAND_NV 0x000C +#define GL_LINE_WIDTH_COMMAND_NV 0x000D +#define GL_POLYGON_OFFSET_COMMAND_NV 0x000E +#define GL_ALPHA_REF_COMMAND_NV 0x000F +#define GL_VIEWPORT_COMMAND_NV 0x0010 +#define GL_SCISSOR_COMMAND_NV 0x0011 +#define GL_FRONT_FACE_COMMAND_NV 0x0012 +typedef void (APIENTRYP PFNGLCREATESTATESNVPROC) (GLsizei n, GLuint *states); +typedef void (APIENTRYP PFNGLDELETESTATESNVPROC) (GLsizei n, const GLuint *states); +typedef GLboolean (APIENTRYP PFNGLISSTATENVPROC) (GLuint state); +typedef void (APIENTRYP PFNGLSTATECAPTURENVPROC) (GLuint state, GLenum mode); +typedef GLuint (APIENTRYP PFNGLGETCOMMANDHEADERNVPROC) (GLenum tokenID, GLuint size); +typedef GLushort (APIENTRYP PFNGLGETSTAGEINDEXNVPROC) (GLenum shadertype); +typedef void (APIENTRYP PFNGLDRAWCOMMANDSNVPROC) (GLenum primitiveMode, GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, GLuint count); +typedef void (APIENTRYP PFNGLDRAWCOMMANDSADDRESSNVPROC) (GLenum primitiveMode, const GLuint64 *indirects, const GLsizei *sizes, GLuint count); +typedef void (APIENTRYP PFNGLDRAWCOMMANDSSTATESNVPROC) (GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +typedef void (APIENTRYP PFNGLDRAWCOMMANDSSTATESADDRESSNVPROC) (const GLuint64 *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +typedef void (APIENTRYP PFNGLCREATECOMMANDLISTSNVPROC) (GLsizei n, GLuint *lists); +typedef void (APIENTRYP PFNGLDELETECOMMANDLISTSNVPROC) (GLsizei n, const GLuint *lists); +typedef GLboolean (APIENTRYP PFNGLISCOMMANDLISTNVPROC) (GLuint list); +typedef void (APIENTRYP PFNGLLISTDRAWCOMMANDSSTATESCLIENTNVPROC) (GLuint list, GLuint segment, const void **indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +typedef void (APIENTRYP PFNGLCOMMANDLISTSEGMENTSNVPROC) (GLuint list, GLuint segments); +typedef void (APIENTRYP PFNGLCOMPILECOMMANDLISTNVPROC) (GLuint list); +typedef void (APIENTRYP PFNGLCALLCOMMANDLISTNVPROC) (GLuint list); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glCreateStatesNV (GLsizei n, GLuint *states); +GLAPI void APIENTRY glDeleteStatesNV (GLsizei n, const GLuint *states); +GLAPI GLboolean APIENTRY glIsStateNV (GLuint state); +GLAPI void APIENTRY glStateCaptureNV (GLuint state, GLenum mode); +GLAPI GLuint APIENTRY glGetCommandHeaderNV (GLenum tokenID, GLuint size); +GLAPI GLushort APIENTRY glGetStageIndexNV (GLenum shadertype); +GLAPI void APIENTRY glDrawCommandsNV (GLenum primitiveMode, GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, GLuint count); +GLAPI void APIENTRY glDrawCommandsAddressNV (GLenum primitiveMode, const GLuint64 *indirects, const GLsizei *sizes, GLuint count); +GLAPI void APIENTRY glDrawCommandsStatesNV (GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +GLAPI void APIENTRY glDrawCommandsStatesAddressNV (const GLuint64 *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +GLAPI void APIENTRY glCreateCommandListsNV (GLsizei n, GLuint *lists); +GLAPI void APIENTRY glDeleteCommandListsNV (GLsizei n, const GLuint *lists); +GLAPI GLboolean APIENTRY glIsCommandListNV (GLuint list); +GLAPI void APIENTRY glListDrawCommandsStatesClientNV (GLuint list, GLuint segment, const void **indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count); +GLAPI void APIENTRY glCommandListSegmentsNV (GLuint list, GLuint segments); +GLAPI void APIENTRY glCompileCommandListNV (GLuint list); +GLAPI void APIENTRY glCallCommandListNV (GLuint list); +#endif +#endif /* GL_NV_command_list */ + #ifndef GL_NV_compute_program5 #define GL_NV_compute_program5 1 #define GL_COMPUTE_PROGRAM_NV 0x90FB @@ -8971,6 +9221,17 @@ GLAPI void APIENTRY glSubpixelPrecisionBiasNV (GLuint xbits, GLuint ybits); #endif #endif /* GL_NV_conservative_raster */ +#ifndef GL_NV_conservative_raster_dilate +#define GL_NV_conservative_raster_dilate 1 +#define GL_CONSERVATIVE_RASTER_DILATE_NV 0x9379 +#define GL_CONSERVATIVE_RASTER_DILATE_RANGE_NV 0x937A +#define GL_CONSERVATIVE_RASTER_DILATE_GRANULARITY_NV 0x937B +typedef void (APIENTRYP PFNGLCONSERVATIVERASTERPARAMETERFNVPROC) (GLenum pname, GLfloat value); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat value); +#endif +#endif /* GL_NV_conservative_raster_dilate */ + #ifndef GL_NV_copy_depth_to_color #define GL_NV_copy_depth_to_color 1 #define GL_DEPTH_STENCIL_TO_RGBA_NV 0x886E @@ -10850,6 +11111,21 @@ GLAPI void APIENTRY glVideoCaptureStreamParameterdvNV (GLuint video_capture_slot #define GL_FORMAT_SUBSAMPLE_244_244_OML 0x8983 #endif /* GL_OML_subsample */ +#ifndef GL_OVR_multiview +#define GL_OVR_multiview 1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_NUM_VIEWS_OVR 0x9630 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_BASE_VIEW_INDEX_OVR 0x9632 +#define GL_MAX_VIEWS_OVR 0x9631 +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex, GLsizei numViews); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glFramebufferTextureMultiviewOVR (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex, GLsizei numViews); +#endif +#endif /* GL_OVR_multiview */ + +#ifndef GL_OVR_multiview2 +#define GL_OVR_multiview2 1 +#endif /* GL_OVR_multiview2 */ + #ifndef GL_PGI_misc_hints #define GL_PGI_misc_hints 1 #define GL_PREFER_DOUBLEBUFFER_HINT_PGI 0x1A1F8 diff --git a/scons/gallium.py b/scons/gallium.py index 51b84d7..46dbf0e 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -300,6 +300,7 @@ def generate(env): # C preprocessor options cppdefines = [] + cppdefines += ['__STDC_LIMIT_MACROS'] if env['build'] in ('debug', 'checked'): cppdefines += ['DEBUG'] else: diff --git a/src/egl/SConscript b/src/egl/SConscript index 1b2a427..f8102db 100644 --- a/src/egl/SConscript +++ b/src/egl/SConscript @@ -15,7 +15,6 @@ env.Append(CPPPATH = [ # parse Makefile.sources egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES') -egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES')) env.Append(CPPDEFINES = [ '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU', diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index a439a3b..eda5087 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -68,7 +68,7 @@ release_buffer(struct gbm_surface *_surf, struct gbm_bo *bo) { struct gbm_dri_surface *surf = (struct gbm_dri_surface *) _surf; struct dri2_egl_surface *dri2_surf = surf->dri_private; - int i; + unsigned i; for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { if (dri2_surf->color_buffers[i].bo == bo) { @@ -82,7 +82,7 @@ has_free_buffers(struct gbm_surface *_surf) { struct gbm_dri_surface *surf = (struct gbm_dri_surface *) _surf; struct dri2_egl_surface *dri2_surf = surf->dri_private; - int i; + unsigned i; for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) if (!dri2_surf->color_buffers[i].locked) @@ -189,7 +189,7 @@ dri2_drm_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); - int i; + unsigned i; if (!_eglPutSurface(surf)) return EGL_TRUE; @@ -218,7 +218,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf) struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); struct gbm_dri_surface *surf = dri2_surf->gbm_surf; - int i; + unsigned i; if (dri2_surf->back == NULL) { for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { @@ -414,7 +414,7 @@ dri2_drm_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw); - int i; + unsigned i; if (dri2_dpy->swrast) { (*dri2_dpy->core->swapBuffers)(dri2_surf->dri_drawable); diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index dabaf1eb..dbc64ba 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1227,6 +1227,8 @@ dri2_wl_swrast_get_stride_for_format(int format, int w) * Taken from weston shared/os-compatibility.c */ +#ifndef HAVE_MKOSTEMP + static int set_cloexec_or_close(int fd) { @@ -1249,6 +1251,8 @@ err: return -1; } +#endif + /* * Taken from weston shared/os-compatibility.c */ diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 93dfb80..278d5e9 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -24,6 +24,7 @@ #include "util/ralloc.h" #include "glsl/nir/nir.h" +#include "glsl/nir/nir_control_flow.h" #include "glsl/nir/nir_builder.h" #include "glsl/list.h" #include "glsl/shader_enums.h" @@ -307,7 +308,7 @@ ttn_emit_immediate(struct ttn_compile *c) for (i = 0; i < 4; i++) load_const->value.u[i] = tgsi_imm->u[i].Uint; - nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr); + nir_builder_instr_insert(b, &load_const->instr); } static nir_src @@ -363,7 +364,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->variables[0] = ttn_array_deref(c, load, var, offset, indirect); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); src = nir_src_for_ssa(&load->dest.ssa); @@ -414,7 +415,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->num_components = ncomp; nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); src = nir_src_for_ssa(&load->dest.ssa); break; @@ -476,7 +477,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, srcn++; } nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); src = nir_src_for_ssa(&load->dest.ssa); break; @@ -552,7 +553,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) load->dest = nir_dest_for_reg(reg); - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); } else { assert(!tgsi_dst->Indirect); dest.dest.reg.reg = c->temp_regs[index].reg; @@ -667,7 +668,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) instr->src[i].src = nir_src_for_ssa(src[i]); instr->dest = dest; - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); } static void @@ -683,7 +684,7 @@ ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest, mov->src[0].src = nir_src_for_ssa(def); for (unsigned i = def->num_components; i < 4; i++) mov->src[0].swizzle[i] = def->num_components - 1; - nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr); + nir_builder_instr_insert(b, &mov->instr); } static void @@ -902,7 +903,7 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); - nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr); + nir_builder_instr_insert(b, &discard->instr); } static void @@ -912,7 +913,7 @@ ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); discard->src[0] = nir_src_for_ssa(cmp); - nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr); + nir_builder_instr_insert(b, &discard->instr); } static void @@ -976,14 +977,14 @@ static void ttn_cont(nir_builder *b) { nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue); - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); } static void ttn_brk(nir_builder *b) { nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break); - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); } static void @@ -1279,7 +1280,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) assert(src_number == num_srcs); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); /* Resolve the writemask on the texture op. */ ttn_move_dest(b, dest, &instr->dest.ssa); @@ -1318,10 +1319,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) txs->src[0].src_type = nir_tex_src_lod; nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr); + nir_builder_instr_insert(b, &txs->instr); nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr); + nir_builder_instr_insert(b, &qlv->instr); ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ); ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W); @@ -1730,7 +1731,7 @@ ttn_emit_instruction(struct ttn_compile *c) store->variables[0] = ttn_array_deref(c, store, var, offset, indirect); store->src[0] = nir_src_for_reg(dest.dest.reg.reg); - nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); + nir_builder_instr_insert(b, &store->instr); } } @@ -1759,11 +1760,26 @@ ttn_add_output_stores(struct ttn_compile *c) store->const_index[0] = loc; store->src[0].reg.reg = c->output_regs[loc].reg; store->src[0].reg.base_offset = c->output_regs[loc].offset; - nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); + nir_builder_instr_insert(b, &store->instr); } } } +static gl_shader_stage +tgsi_processor_to_shader_stage(unsigned processor) +{ + switch (processor) { + case TGSI_PROCESSOR_FRAGMENT: return MESA_SHADER_FRAGMENT; + case TGSI_PROCESSOR_VERTEX: return MESA_SHADER_VERTEX; + case TGSI_PROCESSOR_GEOMETRY: return MESA_SHADER_GEOMETRY; + case TGSI_PROCESSOR_TESS_CTRL: return MESA_SHADER_TESS_CTRL; + case TGSI_PROCESSOR_TESS_EVAL: return MESA_SHADER_TESS_EVAL; + case TGSI_PROCESSOR_COMPUTE: return MESA_SHADER_COMPUTE; + default: + unreachable("invalid TGSI processor"); + }; +} + struct nir_shader * tgsi_to_nir(const void *tgsi_tokens, const nir_shader_compiler_options *options) @@ -1775,7 +1791,12 @@ tgsi_to_nir(const void *tgsi_tokens, int ret; c = rzalloc(NULL, struct ttn_compile); - s = nir_shader_create(NULL, options); + + tgsi_scan_shader(tgsi_tokens, &scan); + c->scan = &scan; + + s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor), + options); nir_function *func = nir_function_create(s, "main"); nir_function_overload *overload = nir_function_overload_create(func); @@ -1784,9 +1805,6 @@ tgsi_to_nir(const void *tgsi_tokens, nir_builder_init(&c->build, impl); nir_builder_insert_after_cf_list(&c->build, &impl->body); - tgsi_scan_shader(tgsi_tokens, &scan); - c->scan = &scan; - s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1; s->num_uniforms = scan.const_file_max[0] + 1; s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index a6675c5..3e3ed5b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -259,7 +259,7 @@ struct translate_ctx struct tgsi_token *tokens_end; struct tgsi_header *header; unsigned processor : 4; - int implied_array_size : 5; + unsigned implied_array_size : 6; unsigned num_immediates; }; @@ -675,6 +675,9 @@ parse_register_dcl( eat_opt_white( &cur ); if (cur[0] == '[') { + bool is_in = *file == TGSI_FILE_INPUT; + bool is_out = *file == TGSI_FILE_OUTPUT; + ++cur; ctx->cur = cur; if (!parse_register_dcl_bracket( ctx, &brackets[1] )) @@ -684,7 +687,11 @@ parse_register_dcl( * input primitive. so we want to declare just * the index relevant to the semantics which is in * the second bracket */ - if (ctx->processor == TGSI_PROCESSOR_GEOMETRY && *file == TGSI_FILE_INPUT) { + + /* tessellation has similar constraints to geometry shader */ + if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) || + (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) || + (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) { brackets[0] = brackets[1]; *num_brackets = 1; } else { @@ -740,6 +747,14 @@ parse_dst_operand( dst->Dimension.Indirect = 0; dst->Dimension.Dimension = 0; dst->Dimension.Index = bracket[0].index; + + if (bracket[0].ind_file != TGSI_FILE_NULL) { + dst->Dimension.Indirect = 1; + dst->DimIndirect.File = bracket[0].ind_file; + dst->DimIndirect.Index = bracket[0].ind_index; + dst->DimIndirect.Swizzle = bracket[0].ind_comp; + dst->DimIndirect.ArrayID = bracket[0].ind_array; + } bracket[0] = bracket[1]; } dst->Register.Index = bracket[0].index; @@ -1623,6 +1638,10 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_header( ctx )) return FALSE; + if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL || + ctx->processor == TGSI_PROCESSOR_TESS_EVAL) + ctx->implied_array_size = 32; + while (*ctx->cur != '\0') { uint label_val = 0; if (!eat_white( &ctx->cur )) { diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 85206ea..9bba07a 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -104,7 +104,7 @@ struct blitter_context_priv void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; /* Blend state. */ - void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */ + void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */ void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1]; /* Depth stencil alpha state. */ @@ -159,7 +159,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) struct pipe_rasterizer_state rs_state; struct pipe_sampler_state sampler_state; struct pipe_vertex_element velem[2]; - unsigned i; + unsigned i, j; ctx = CALLOC_STRUCT(blitter_context_priv); if (!ctx) @@ -208,8 +208,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&blend, 0, sizeof(blend)); for (i = 0; i <= PIPE_MASK_RGBA; i++) { - blend.rt[0].colormask = i; - ctx->blend[i] = pipe->create_blend_state(pipe, &blend); + for (j = 0; j < 2; j++) { + memset(&blend.rt[0], 0, sizeof(blend.rt[0])); + blend.rt[0].colormask = i; + if (j) { + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + } + ctx->blend[i][j] = pipe->create_blend_state(pipe, &blend); + } } /* depth stencil alpha state objects */ @@ -409,9 +421,10 @@ void util_blitter_destroy(struct blitter_context *blitter) struct pipe_context *pipe = blitter->pipe; int i, j, f; - for (i = 0; i <= PIPE_MASK_RGBA; i++) { - pipe->delete_blend_state(pipe, ctx->blend[i]); - } + for (i = 0; i <= PIPE_MASK_RGBA; i++) + for (j = 0; j < 2; j++) + pipe->delete_blend_state(pipe, ctx->blend[i][j]); + for (i = 0; i < Elements(ctx->blend_clear); i++) { if (ctx->blend_clear[i]) pipe->delete_blend_state(pipe, ctx->blend_clear[i]); @@ -1217,7 +1230,7 @@ static void *get_clear_blend_state(struct blitter_context_priv *ctx, /* Return an existing blend state. */ if (!clear_buffers) - return ctx->blend[0]; + return ctx->blend[0][0]; index = GET_CLEAR_BLEND_STATE_IDX(clear_buffers); @@ -1483,7 +1496,8 @@ void util_blitter_copy_texture(struct blitter_context *blitter, /* Copy. */ util_blitter_blit_generic(blitter, dst_view, &dstbox, src_view, srcbox, src->width0, src->height0, - PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL); + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + FALSE); pipe_surface_reference(&dst_view, NULL); pipe_sampler_view_reference(&src_view, NULL); @@ -1496,7 +1510,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, const struct pipe_box *srcbox, unsigned src_width0, unsigned src_height0, unsigned mask, unsigned filter, - const struct pipe_scissor_state *scissor) + const struct pipe_scissor_state *scissor, + boolean alpha_blend) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; @@ -1550,7 +1565,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, fb_state.zsbuf = NULL; if (blit_depth || blit_stencil) { - pipe->bind_blend_state(pipe, ctx->blend[0]); + pipe->bind_blend_state(pipe, ctx->blend[0][0]); if (blit_depth && blit_stencil) { pipe->bind_depth_stencil_alpha_state(pipe, @@ -1573,7 +1588,9 @@ void util_blitter_blit_generic(struct blitter_context *blitter, } } else { - pipe->bind_blend_state(pipe, ctx->blend[mask & PIPE_MASK_RGBA]); + unsigned colormask = mask & PIPE_MASK_RGBA; + + pipe->bind_blend_state(pipe, ctx->blend[colormask][alpha_blend]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); ctx->bind_fs_state(pipe, blitter_get_fs_texfetch_col(ctx, src->format, src_target, @@ -1786,7 +1803,8 @@ util_blitter_blit(struct blitter_context *blitter, util_blitter_blit_generic(blitter, dst_view, &info->dst.box, src_view, &info->src.box, src->width0, src->height0, info->mask, info->filter, - info->scissor_enable ? &info->scissor : NULL); + info->scissor_enable ? &info->scissor : NULL, + info->alpha_blend); pipe_surface_reference(&dst_view, NULL); pipe_sampler_view_reference(&src_view, NULL); @@ -1815,7 +1833,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, blitter_disable_render_cond(ctx); /* bind states */ - pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]); + pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); bind_fs_write_one_cbuf(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); @@ -1867,7 +1885,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, blitter_disable_render_cond(ctx); /* bind states */ - pipe->bind_blend_state(pipe, ctx->blend[0]); + pipe->bind_blend_state(pipe, ctx->blend[0][0]); if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { sr.ref_value[0] = stencil & 0xff; pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); @@ -1933,8 +1951,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, blitter_disable_render_cond(ctx); /* bind states */ - pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] : - ctx->blend[0]); + pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA][0] : + ctx->blend[0][0]); pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage); if (cbsurf) bind_fs_write_one_cbuf(ctx); @@ -2187,7 +2205,7 @@ void util_blitter_custom_color(struct blitter_context *blitter, /* bind states */ pipe->bind_blend_state(pipe, custom_blend ? custom_blend - : ctx->blend[PIPE_MASK_RGBA]); + : ctx->blend[PIPE_MASK_RGBA][0]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); bind_fs_write_one_cbuf(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 0cd173d..becdb02 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -246,7 +246,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, const struct pipe_box *srcbox, unsigned src_width0, unsigned src_height0, unsigned mask, unsigned filter, - const struct pipe_scissor_state *scissor); + const struct pipe_scissor_state *scissor, + boolean alpha_blend); void util_blitter_blit(struct blitter_context *blitter, const struct pipe_blit_info *info); diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 654b5bb..70ed911 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -676,6 +676,9 @@ util_try_blit_via_copy_region(struct pipe_context *ctx, return FALSE; } + if (blit->alpha_blend) + return FALSE; + ctx->resource_copy_region(ctx, blit->dst.resource, blit->dst.level, blit->dst.box.x, blit->dst.box.y, blit->dst.box.z, blit->src.resource, blit->src.level, diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index c4516ba..dd48956 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index 8e8cf6a..441bfec 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -326,6 +326,13 @@ enum a3xx_tex_type { A3XX_TEX_3D = 3, }; +enum a3xx_tex_msaa { + A3XX_TPL1_MSAA1X = 0, + A3XX_TPL1_MSAA2X = 1, + A3XX_TPL1_MSAA4X = 2, + A3XX_TPL1_MSAA8X = 3, +}; + #define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 #define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 #define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 @@ -2652,6 +2659,7 @@ static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val) #define REG_A3XX_VGT_IMMED_DATA 0x000021fd #define REG_A3XX_TEX_SAMP_0 0x00000000 +#define A3XX_TEX_SAMP_0_CLAMPENABLE 0x00000001 #define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR 0x00000002 #define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 @@ -2695,6 +2703,7 @@ static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val { return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK; } +#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF 0x01000000 #define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000 #define REG_A3XX_TEX_SAMP_1 0x00000001 @@ -2750,6 +2759,12 @@ static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val) { return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK; } +#define A3XX_TEX_CONST_0_MSAATEX__MASK 0x00300000 +#define A3XX_TEX_CONST_0_MSAATEX__SHIFT 20 +static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val) +{ + return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK; +} #define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000 #define A3XX_TEX_CONST_0_FMT__SHIFT 22 static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) @@ -2785,7 +2800,7 @@ static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val) } #define REG_A3XX_TEX_CONST_2 0x00000002 -#define A3XX_TEX_CONST_2_INDX__MASK 0x000000ff +#define A3XX_TEX_CONST_2_INDX__MASK 0x000001ff #define A3XX_TEX_CONST_2_INDX__SHIFT 0 static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val) { @@ -2805,7 +2820,7 @@ static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) } #define REG_A3XX_TEX_CONST_3 0x00000003 -#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x00007fff +#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0001ffff #define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index ec87aa9..04cb9b9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -262,6 +262,15 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), }; enum a3xx_vtx_fmt @@ -301,7 +310,7 @@ fd3_pipe2fetchsize(enum pipe_format format) { if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; - switch (util_format_get_blocksizebits(format)) { + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; case 32: return TFETCH_4_BYTE; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index 9c16804..583caaa 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -73,7 +73,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, so->gras_su_poly_offset_scale = A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale); so->gras_su_poly_offset_offset = - A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f); so->gras_su_mode_control = A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index c30658d..2d6ecb2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -115,6 +115,7 @@ fd3_sampler_state_create(struct pipe_context *pctx, so->texsamp0 = COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) | + COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) | COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) | A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | @@ -239,7 +240,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = - A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp); + A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 563f70a..2e1d712 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -162,10 +162,13 @@ enum a4xx_tex_fmt { TFMT4_8_UNORM = 4, TFMT4_8_8_UNORM = 14, TFMT4_8_8_8_8_UNORM = 28, + TFMT4_8_SNORM = 5, TFMT4_8_8_SNORM = 15, TFMT4_8_8_8_8_SNORM = 29, + TFMT4_8_UINT = 6, TFMT4_8_8_UINT = 16, TFMT4_8_8_8_8_UINT = 30, + TFMT4_8_SINT = 7, TFMT4_8_8_SINT = 17, TFMT4_8_8_8_8_SINT = 31, TFMT4_16_UINT = 21, @@ -430,7 +433,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; } #define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00002000 -#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0x007fc000 +#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xffffc000 #define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14 static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) { @@ -440,7 +443,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; } static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; } -#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x0001fff8 +#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x03fffff8 #define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT 3 static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val) { @@ -1460,6 +1463,7 @@ static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val) { return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK; } +#define A4XX_SP_FS_MRT_REG_COLOR_SRGB 0x00040000 #define REG_A4XX_SP_CS_CTRL_REG0 0x00002300 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index ab7850e..3a1d4b6 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -56,6 +56,7 @@ struct fd4_emit { uint32_t sprite_coord_enable; /* bitmask */ bool sprite_coord_mode; bool rasterflat; + bool no_decode_srgb; /* cached to avoid repeated lookups of same variants: */ struct ir3_shader_variant *vp, *fp; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 3e00454..6c9e217 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -79,9 +79,9 @@ struct fd4_format { static struct fd4_format formats[PIPE_FORMAT_COUNT] = { /* 8-bit */ VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), - V_(R8_SNORM, 8_SNORM, NONE, WZYX), - V_(R8_UINT, 8_UINT, NONE, WZYX), - V_(R8_SINT, 8_SINT, NONE, WZYX), + VT(R8_SNORM, 8_SNORM, NONE, WZYX), + VT(R8_UINT, 8_UINT, NONE, WZYX), + VT(R8_SINT, 8_SINT, NONE, WZYX), V_(R8_USCALED, 8_UINT, NONE, WZYX), V_(R8_SSCALED, 8_UINT, NONE, WZYX), @@ -115,8 +115,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), - VT(R8G8_UINT, 8_8_UINT, NONE, WZYX), - VT(R8G8_SINT, 8_8_SINT, NONE, WZYX), + VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), + VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX), V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX), diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 81c37f7..3f8bbf3 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -46,7 +46,8 @@ static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, - struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) + struct pipe_surface **bufs, uint32_t *bases, + uint32_t bin_w, bool decode_srgb) { enum a4xx_tile_mode tile_mode; unsigned i; @@ -60,6 +61,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { enum a4xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; + bool srgb = false; struct fd_resource *rsc = NULL; struct fd_resource_slice *slice = NULL; uint32_t stride = 0; @@ -68,10 +70,9 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; - enum pipe_format pformat = 0; + enum pipe_format pformat = psurf->format; rsc = fd_resource(psurf->texture); - pformat = psurf->format; /* In case we're drawing to Z32F_S8, the "color" actually goes to * the stencil @@ -86,6 +87,11 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, format = fd4_pipe2color(pformat); swap = fd4_pipe2swap(pformat); + if (decode_srgb) + srgb = util_format_is_srgb(pformat); + else + pformat = util_format_linear(pformat); + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); offset = fd_resource_offset(rsc, psurf->u.tex.level, @@ -108,7 +114,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | - A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); + A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | + COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB)); if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, base); OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride)); @@ -282,7 +289,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases, struct fd_ringbuffer *ring = ctx->ring; struct pipe_surface *zsbufs[2]; - emit_mrt(ring, nr_bufs, bufs, bases, bin_w); + emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false); if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { /* The gmem_restore_tex logic will put the first buffer's stencil @@ -315,6 +322,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) .key = { .half_precision = fd_half_precision(pfb), }, + .no_decode_srgb = true, }; unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; float x0, y0, x1, y1; @@ -520,7 +528,7 @@ fd4_emit_sysmem_prep(struct fd_context *ctx) OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1); @@ -677,7 +685,7 @@ fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 1a6d014..a3d7123 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -450,10 +450,15 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); for (i = 0; i < 8; i++) { enum a4xx_color_fmt format = 0; - if (i < nr) + bool srgb = false; + if (i < nr) { format = fd4_emit_format(bufs[i]); + if (bufs[i] && !emit->no_decode_srgb) + srgb = util_format_is_srgb(bufs[i]->format); + } OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | + COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) | COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index d2bc5fe..213b29c 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -187,9 +187,9 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: /* ?? not sure about _CUBE_ARRAY */ + case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(1) | + A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_3D: diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index 00b6acb..29944b7 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index 98a90e2..432dce3 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 8e6d431..0b6b9fb 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -131,11 +131,13 @@ static void fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { + struct fd_ringbuffer *ring = fd_context(pctx)->ring; + fd_context_render(pctx); if (fence) { fd_screen_fence_ref(pctx->screen, fence, NULL); - *fence = fd_fence_create(pctx); + *fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(ring)); } } diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c index 04a9fea..5125f09 100644 --- a/src/gallium/drivers/freedreno/freedreno_fence.c +++ b/src/gallium/drivers/freedreno/freedreno_fence.c @@ -50,35 +50,18 @@ fd_screen_fence_ref(struct pipe_screen *pscreen, *ptr = pfence; } -/* TODO we need to spiff out libdrm_freedreno a bit to allow passing - * the timeout.. and maybe a better way to check if fence has been - * signaled. The current implementation is a bit lame for now to - * avoid bumping libdrm version requirement. - */ - -boolean fd_screen_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence) -{ - uint32_t timestamp = fd_ringbuffer_timestamp(fence->ctx->ring); - - /* TODO util helper for compare w/ rollover? */ - return timestamp >= fence->timestamp; -} - boolean fd_screen_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, uint64_t timeout) { - if (!timeout) - return fd_screen_fence_signalled(screen, fence); - - if (fd_pipe_wait(fence->screen->pipe, fence->timestamp)) + if (fd_pipe_wait_timeout(fence->screen->pipe, fence->timestamp, timeout)) return false; return true; } -struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx) +struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx, + uint32_t timestamp) { struct pipe_fence_handle *fence; struct fd_context *ctx = fd_context(pctx); @@ -91,7 +74,7 @@ struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx) fence->ctx = ctx; fence->screen = ctx->screen; - fence->timestamp = fd_ringbuffer_timestamp(ctx->ring); + fence->timestamp = timestamp; return fence; } diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h b/src/gallium/drivers/freedreno/freedreno_fence.h index e36bcc4..06c314a 100644 --- a/src/gallium/drivers/freedreno/freedreno_fence.h +++ b/src/gallium/drivers/freedreno/freedreno_fence.h @@ -34,11 +34,10 @@ void fd_screen_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *pfence); -boolean fd_screen_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *pfence); boolean fd_screen_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *pfence, uint64_t timeout); -struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx); +struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx, + uint32_t timestamp); #endif /* FREEDRENO_FENCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 709ad4e..98de096 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -222,7 +222,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, ptrans->level = level; ptrans->usage = usage; ptrans->box = *box; - ptrans->stride = slice->pitch * rsc->cpp; + ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp; ptrans->layer_stride = slice->size0; if (usage & PIPE_TRANSFER_READ) @@ -375,9 +375,11 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) for (level = 0; level <= prsc->last_level; level++) { struct fd_resource_slice *slice = fd_resource_slice(rsc, level); + uint32_t blocks; slice->pitch = width = align(width, 32); slice->offset = size; + blocks = util_format_get_nblocks(prsc->format, width, height); /* 1d array and 2d array textures must all have the same layer size * for each miplevel on a3xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least @@ -387,9 +389,9 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) if (prsc->target == PIPE_TEXTURE_3D && ( level == 1 || (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) - slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); + slice->size0 = align(blocks * rsc->cpp, alignment); else if (level == 0 || rsc->layer_first || alignment == 1) - slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); + slice->size0 = align(blocks * rsc->cpp, alignment); else slice->size0 = rsc->slices[level - 1].size0; @@ -459,7 +461,6 @@ fd_resource_create(struct pipe_screen *pscreen, if (is_a4xx(fd_screen(pscreen))) { switch (tmpl->target) { case PIPE_TEXTURE_3D: - /* TODO 3D_ARRAY? */ rsc->layer_first = false; break; default: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index b55f5b3..86e9a21 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -163,7 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_COMPUTE: @@ -176,6 +175,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return is_a3xx(screen) || is_a4xx(screen); case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: @@ -191,8 +191,13 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 16383; case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return is_a3xx(screen); + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_CUBE_MAP_ARRAY: + return is_a4xx(screen); + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; @@ -202,7 +207,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return is_ir3(screen) ? 130 : 120; /* Unsupported features. */ - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: @@ -230,8 +234,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - case PIPE_CAP_TEXTURE_FLOAT_LINEAR: - case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0ab3345..071901a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1636,6 +1636,11 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0); } + /* the array coord for cube arrays needs 0.5 added to it */ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array && + opc != OPC_ISAML) + coord[3] = ir3_ADD_F(b, coord[3], 0, create_immed(b, fui(0.5)), 0); + /* * lay out the first argument in the proper order: * - actual coordinates first @@ -1759,6 +1764,12 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) tex_info(tex, &flags, &coords); + /* Actually we want the number of dimensions, not coordinates. This + * distinction only matters for cubes. + */ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + coords = 2; + dst = get_dst(ctx, &tex->dest, 4); compile_assert(ctx, tex->num_srcs == 1); @@ -2301,7 +2312,7 @@ emit_instructions(struct ir3_compile *ctx) ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); /* Create inputs in first block: */ - ctx->block = get_block(ctx, fxn->start_block); + ctx->block = get_block(ctx, nir_start_block(fxn)); ctx->in_block = ctx->block; list_addtail(&ctx->block->node, &ctx->ir->block_list); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c index dc9e462..bed7b7b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -29,6 +29,7 @@ #include "ir3_nir.h" #include "glsl/nir/nir_builder.h" +#include "glsl/nir/nir_control_flow.h" /* Based on nir_opt_peephole_select, and hacked up to more aggressively * flatten anything that can be flattened diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index eaf3b3c..8801839 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -189,7 +189,7 @@ ir3_ra_alloc_reg_set(void *memctx) } /* allocate the reg-set.. */ - set->regs = ra_alloc_reg_set(set, ra_reg_count); + set->regs = ra_alloc_reg_set(set, ra_reg_count, true); set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index 24e0156..b2a639c 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -120,7 +120,8 @@ i915_surface_copy_render(struct pipe_context *pipe, util_blitter_blit_generic(i915->blitter, dst_view, &dstbox, src_view, src_box, src_width0, src_height0, - PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL); + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + FALSE); return; fallback: diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 3fae3bc..9346ea3 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -121,7 +121,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_target_nv50.cpp \ codegen/nv50_ir_target_nv50.h \ codegen/nv50_ir_util.cpp \ - codegen/nv50_ir_util.h + codegen/nv50_ir_util.h \ + codegen/unordered_set.h NVC0_CODEGEN_SOURCES := \ codegen/nv50_ir_emit_gk110.cpp \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 3ddaeaf..ba1b085 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -29,8 +29,8 @@ #include <deque> #include <list> #include <vector> -#include <tr1/unordered_set> +#include "codegen/unordered_set.h" #include "codegen/nv50_ir_util.h" #include "codegen/nv50_ir_graph.h" @@ -585,10 +585,10 @@ public: static inline Value *get(Iterator&); - std::tr1::unordered_set<ValueRef *> uses; + unordered_set<ValueRef *> uses; std::list<ValueDef *> defs; - typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator; - typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator; + typedef unordered_set<ValueRef *>::iterator UseIterator; + typedef unordered_set<ValueRef *>::const_iterator UseCIterator; typedef std::list<ValueDef *>::iterator DefIterator; typedef std::list<ValueDef *>::const_iterator DefCIterator; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index f06056f..8f15429 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i) code[0] |= typeSizeofLog2(dType) << 10; code[0] |= typeSizeofLog2(i->sType) << 12; + code[1] |= i->subOp << 12; if (isSignedIntType(dType)) code[0] |= 0x4000; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index ef5c87d..6e22788 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F() emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x29, 2, insn->subOp); emitRND (0x27, rnd, -1); emitField(0x0d, 1, isSignedType(insn->sType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); @@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I() emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x29, 2, insn->subOp); emitField(0x0d, 1, isSignedType(insn->sType)); emitField(0x0c, 1, isSignedType(insn->dType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index f607f3b..6bf5219 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i) code[0] |= util_logbase2(typeSizeof(dType)) << 20; code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; + // for 8/16 source types, the byte/word is in subOp. word 1 is + // represented as 2. + code[1] |= i->subOp << 0x17; + if (sat) code[0] |= 0x20; if (abs) @@ -2614,11 +2618,12 @@ private: int imul; // integer MUL to MUL delay 3 } res; struct ScoreData { - int r[64]; + int r[256]; int p[8]; int c; } rd, wr; int base; + int regs; void rebase(const int base) { @@ -2627,7 +2632,7 @@ private: return; this->base = 0; - for (int i = 0; i < 64; ++i) { + for (int i = 0; i < regs; ++i) { rd.r[i] += delta; wr.r[i] += delta; } @@ -2646,16 +2651,17 @@ private: res.imul += delta; res.tex += delta; } - void wipe() + void wipe(int regs) { memset(&rd, 0, sizeof(rd)); memset(&wr, 0, sizeof(wr)); memset(&res, 0, sizeof(res)); + this->regs = regs; } int getLatest(const ScoreData& d) const { int max = 0; - for (int i = 0; i < 64; ++i) + for (int i = 0; i < regs; ++i) if (d.r[i] > max) max = d.r[i]; for (int i = 0; i < 8; ++i) @@ -2690,7 +2696,7 @@ private: } void setMax(const RegScores *that) { - for (int i = 0; i < 64; ++i) { + for (int i = 0; i < regs; ++i) { rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); } @@ -2711,7 +2717,7 @@ private: } void print(int cycle) { - for (int i = 0; i < 64; ++i) { + for (int i = 0; i < regs; ++i) { if (rd.r[i] > cycle) INFO("rd $r%i @ %i\n", i, rd.r[i]); if (wr.r[i] > cycle) @@ -2806,9 +2812,10 @@ SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const bool SchedDataCalculator::visit(Function *func) { + int regs = targ->getFileSize(FILE_GPR) + 1; scoreBoards.resize(func->cfg.getSize()); for (size_t i = 0; i < scoreBoards.size(); ++i) - scoreBoards[i].wipe(); + scoreBoards[i].wipe(regs); return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 4847a0f..f153674 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -2990,9 +2990,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_UBFE: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); - src1 = fetchSrc(1, c); - src2 = fetchSrc(2, c); - mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && + tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { + src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 | + tgsi.getSrc(1).getValueU32(c, info)); + } else { + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + } mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); } break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 1f3fce2..420cc4e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -193,100 +193,16 @@ GM107LoweringPass::visit(Instruction *i) checkPredicate(i); switch (i->op) { - case OP_TEX: - case OP_TXB: - case OP_TXL: - case OP_TXF: - case OP_TXG: - return handleTEX(i->asTex()); - case OP_TXD: - return handleTXD(i->asTex()); - case OP_TXLQ: - return handleTXLQ(i->asTex()); - case OP_TXQ: - return handleTXQ(i->asTex()); - case OP_EX2: - bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); - i->setSrc(0, i->getDef(0)); - break; - case OP_POW: - return handlePOW(i); - case OP_DIV: - return handleDIV(i); - case OP_MOD: - return handleMOD(i); - case OP_SQRT: - return handleSQRT(i); - case OP_EXPORT: - return handleEXPORT(i); case OP_PFETCH: return handlePFETCH(i); - case OP_EMIT: - case OP_RESTART: - return handleOUT(i); - case OP_RDSV: - return handleRDSV(i); - case OP_WRSV: - return handleWRSV(i); - case OP_LOAD: - if (i->src(0).getFile() == FILE_SHADER_INPUT) { - if (prog->getType() == Program::TYPE_COMPUTE) { - i->getSrc(0)->reg.file = FILE_MEMORY_CONST; - i->getSrc(0)->reg.fileIndex = 0; - } else - if (prog->getType() == Program::TYPE_GEOMETRY && - i->src(0).isIndirect(0)) { - // XXX: this assumes vec4 units - Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), - i->getIndirect(0, 0), bld.mkImm(4)); - i->setIndirect(0, 0, ptr); - i->op = OP_VFETCH; - } else { - i->op = OP_VFETCH; - assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP - } - } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { - if (i->src(0).isIndirect(1)) { - Value *ptr; - if (i->src(0).isIndirect(0)) - ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), - i->getIndirect(0, 1), bld.mkImm(0x1010), - i->getIndirect(0, 0)); - else - ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), - i->getIndirect(0, 1), bld.mkImm(16)); - i->setIndirect(0, 1, NULL); - i->setIndirect(0, 0, ptr); - i->subOp = NV50_IR_SUBOP_LDC_IS; - } - } - break; - case OP_ATOM: - { - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; - handleATOM(i); - handleCasExch(i, cctl); - } - break; - case OP_SULDB: - case OP_SULDP: - case OP_SUSTB: - case OP_SUSTP: - case OP_SUREDB: - case OP_SUREDP: - handleSurfaceOpNVE4(i->asTex()); - break; case OP_DFDX: case OP_DFDY: - handleDFDX(i); - break; + return handleDFDX(i); case OP_POPCNT: - handlePOPCNT(i); - break; + return handlePOPCNT(i); default: - break; + return NVC0LoweringPass::visit(i); } - return true; } } // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index c3c302d..b1f4065 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -224,7 +224,7 @@ NVC0LegalizePostRA::findFirstUses( const Instruction *texi, const Instruction *insn, std::list<TexUse> &uses, - std::tr1::unordered_set<const Instruction *>& visited) + unordered_set<const Instruction *>& visited) { for (int d = 0; insn->defExists(d); ++d) { Value *v = insn->getDef(d); @@ -323,7 +323,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) if (!uses) return false; for (size_t i = 0; i < texes.size(); ++i) { - std::tr1::unordered_set<const Instruction *> visited; + unordered_set<const Instruction *> visited; findFirstUses(texes[i], texes[i], uses[i], visited); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 260e101..2ce52e5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <tr1/unordered_set> - #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_build_util.h" @@ -73,7 +71,7 @@ private: inline bool insnDominatedBy(const Instruction *, const Instruction *) const; void findFirstUses(const Instruction *tex, const Instruction *def, std::list<TexUse>&, - std::tr1::unordered_set<const Instruction *>&); + unordered_set<const Instruction *>&); void findOverwritingDefs(const Instruction *tex, Instruction *insn, const BasicBlock *term, std::list<TexUse>&); @@ -111,10 +109,11 @@ protected: void checkPredicate(Instruction *); + virtual bool visit(Instruction *); + private: virtual bool visit(Function *); virtual bool visit(BasicBlock *); - virtual bool visit(Instruction *); void readTessCoord(LValue *dst, int c); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index cea96dc..b01ef41 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1023,27 +1023,53 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_AND: { - CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp(); - if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) - return; - if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) - return; - if (imm0.reg.data.f32 != 1.0) - return; - if (i->getSrc(t)->getInsn()->dType != TYPE_U32) - return; + Instruction *src = i->getSrc(t)->getInsn(); + ImmediateValue imm1; + if (imm0.reg.data.u32 == 0) { + i->op = OP_MOV; + i->setSrc(0, new_ImmediateValue(prog, 0u)); + i->src(0).mod = Modifier(0); + i->setSrc(1, NULL); + } else if (imm0.reg.data.u32 == ~0U) { + i->op = i->src(t).mod.getOp(); + if (t) { + i->setSrc(0, i->getSrc(t)); + i->src(0).mod = i->src(t).mod; + } + i->setSrc(1, NULL); + } else if (src->asCmp()) { + CmpInstruction *cmp = src->asCmp(); + if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) + return; + if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) + return; + if (imm0.reg.data.f32 != 1.0) + return; + if (cmp->dType != TYPE_U32) + return; - i->getSrc(t)->getInsn()->dType = TYPE_F32; - if (i->src(t).mod != Modifier(0)) { - assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT)); - i->src(t).mod = Modifier(0); - cmp->setCond = inverseCondCode(cmp->setCond); - } - i->op = OP_MOV; - i->setSrc(s, NULL); - if (t) { - i->setSrc(0, i->getSrc(t)); - i->setSrc(t, NULL); + cmp->dType = TYPE_F32; + if (i->src(t).mod != Modifier(0)) { + assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT)); + i->src(t).mod = Modifier(0); + cmp->setCond = inverseCondCode(cmp->setCond); + } + i->op = OP_MOV; + i->setSrc(s, NULL); + if (t) { + i->setSrc(0, i->getSrc(t)); + i->setSrc(t, NULL); + } + } else if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32) && + src->op == OP_SHR && + src->src(1).getImmediate(imm1) && + i->src(t).mod == Modifier(0) && + util_is_power_of_two(imm0.reg.data.u32 + 1)) { + // low byte = offset, high byte = width + uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | imm1.reg.data.u32; + i->op = OP_EXTBF; + i->setSrc(0, src->getSrc(0)); + i->setSrc(1, new_ImmediateValue(prog, ext)); } } break; @@ -1106,6 +1132,84 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; break; } + case OP_CVT: { + Storage res; + + // TODO: handle 64-bit values properly + if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8) + return; + + // TODO: handle single byte/word extractions + if (i->subOp) + return; + + bld.setPosition(i, true); /* make sure bld is init'ed */ + +#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \ + case type: \ + switch (i->sType) { \ + case TYPE_F32: \ + res.data.dst = util_iround(i->saturate ? \ + CLAMP(imm0.reg.data.f32, fmin, fmax) : \ + imm0.reg.data.f32); \ + break; \ + case TYPE_S32: \ + res.data.dst = i->saturate ? \ + CLAMP(imm0.reg.data.s32, imin, imax) : \ + imm0.reg.data.s32; \ + break; \ + case TYPE_U32: \ + res.data.dst = i->saturate ? \ + CLAMP(imm0.reg.data.u32, umin, umax) : \ + imm0.reg.data.u32; \ + break; \ + case TYPE_S16: \ + res.data.dst = i->saturate ? \ + CLAMP(imm0.reg.data.s16, imin, imax) : \ + imm0.reg.data.s16; \ + break; \ + case TYPE_U16: \ + res.data.dst = i->saturate ? \ + CLAMP(imm0.reg.data.u16, umin, umax) : \ + imm0.reg.data.u16; \ + break; \ + default: return; \ + } \ + i->setSrc(0, bld.mkImm(res.data.dst)); \ + break + + switch(i->dType) { + CASE(TYPE_U16, u16, 0, UINT16_MAX, 0, UINT16_MAX, 0, UINT16_MAX); + CASE(TYPE_S16, s16, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, 0, INT16_MAX); + CASE(TYPE_U32, u32, 0, UINT32_MAX, 0, INT32_MAX, 0, UINT32_MAX); + CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX); + case TYPE_F32: + switch (i->sType) { + case TYPE_F32: + res.data.f32 = i->saturate ? + CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) : + imm0.reg.data.f32; + break; + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.f32)); + break; + default: + return; + } +#undef CASE + + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ + i->op = OP_MOV; + i->saturate = 0; + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ + break; + } default: return; } @@ -1212,7 +1316,8 @@ private: void handleRCP(Instruction *); void handleSLCT(Instruction *); void handleLOGOP(Instruction *); - void handleCVT(Instruction *); + void handleCVT_NEG(Instruction *); + void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); BuildUtil bld; @@ -1463,12 +1568,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop) // nv50: // F2I(NEG(I2F(ABS(SET)))) void -AlgebraicOpt::handleCVT(Instruction *cvt) +AlgebraicOpt::handleCVT_NEG(Instruction *cvt) { + Instruction *insn = cvt->getSrc(0)->getInsn(); if (cvt->sType != TYPE_F32 || cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) return; - Instruction *insn = cvt->getSrc(0)->getInsn(); if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) return; if (insn->src(0).mod != Modifier(0)) @@ -1498,6 +1603,104 @@ AlgebraicOpt::handleCVT(Instruction *cvt) delete_Instruction(prog, cvt); } +// Some shaders extract packed bytes out of words and convert them to +// e.g. float. The Fermi+ CVT instruction can extract those directly, as can +// nv50 for word sizes. +// +// CVT(EXTBF(x, byte/word)) +// CVT(AND(bytemask, x)) +// CVT(AND(bytemask, SHR(x, 8/16/24))) +// CVT(SHR(x, 16/24)) +void +AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt) +{ + Instruction *insn = cvt->getSrc(0)->getInsn(); + ImmediateValue imm; + Value *arg = NULL; + unsigned width, offset; + if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn) + return; + if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) { + width = (imm.reg.data.u32 >> 8) & 0xff; + offset = imm.reg.data.u32 & 0xff; + arg = insn->getSrc(0); + + if (width != 8 && width != 16) + return; + if (width == 8 && offset & 0x7) + return; + if (width == 16 && offset & 0xf) + return; + } else if (insn->op == OP_AND) { + int s; + if (insn->src(0).getImmediate(imm)) + s = 0; + else if (insn->src(1).getImmediate(imm)) + s = 1; + else + return; + + if (imm.reg.data.u32 == 0xff) + width = 8; + else if (imm.reg.data.u32 == 0xffff) + width = 16; + else + return; + + arg = insn->getSrc(!s); + Instruction *shift = arg->getInsn(); + offset = 0; + if (shift && shift->op == OP_SHR && + shift->sType == cvt->sType && + shift->src(1).getImmediate(imm) && + ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) || + (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) { + arg = shift->getSrc(0); + offset = imm.reg.data.u32; + } + } else if (insn->op == OP_SHR && + insn->sType == cvt->sType && + insn->src(1).getImmediate(imm)) { + arg = insn->getSrc(0); + if (imm.reg.data.u32 == 24) { + width = 8; + offset = 24; + } else if (imm.reg.data.u32 == 16) { + width = 16; + offset = 16; + } else { + return; + } + } + + if (!arg) + return; + + // Irrespective of what came earlier, we can undo a shift on the argument + // by adjusting the offset. + Instruction *shift = arg->getInsn(); + if (shift && shift->op == OP_SHL && + shift->src(1).getImmediate(imm) && + ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) || + (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) && + imm.reg.data.u32 <= offset) { + arg = shift->getSrc(0); + offset -= imm.reg.data.u32; + } + + // The unpackSnorm lowering still leaves a few shifts behind, but it's too + // annoying to detect them. + + if (width == 8) { + cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8; + } else { + assert(width == 16); + cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16; + } + cvt->setSrc(0, arg); + cvt->subOp = offset >> 3; +} + // SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6) void AlgebraicOpt::handleSUCLAMP(Instruction *insn) @@ -1568,7 +1771,9 @@ AlgebraicOpt::visit(BasicBlock *bb) handleLOGOP(i); break; case OP_CVT: - handleCVT(i); + handleCVT_NEG(i); + if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32)) + handleCVT_EXTBF(i); break; case OP_SUCLAMP: handleSUCLAMP(i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 78bc97f..0cd21cf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -25,7 +25,6 @@ #include <stack> #include <limits> -#include <tr1/unordered_set> namespace nv50_ir { @@ -1551,7 +1550,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) // Keep track of which instructions to delete later. Deleting them // inside the loop is unsafe since a single instruction may have // multiple destinations that all need to be spilled (like OP_SPLIT). - std::tr1::unordered_set<Instruction *> to_del; + unordered_set<Instruction *> to_del; for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); ++d) { @@ -1593,7 +1592,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) } } - for (std::tr1::unordered_set<Instruction *>::const_iterator it = to_del.begin(); + for (unordered_set<Instruction *>::const_iterator it = to_del.begin(); it != to_del.end(); ++it) delete_Instruction(func->getProgram(), *it); } diff --git a/src/gallium/drivers/nouveau/codegen/unordered_set.h b/src/gallium/drivers/nouveau/codegen/unordered_set.h new file mode 100644 index 0000000..8ef6d46 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/unordered_set.h @@ -0,0 +1,48 @@ +#ifndef __NV50_UNORDERED_SET_H__ +#define __NV50_UNORDERED_SET_H__ + +#if (__cplusplus >= 201103L) || defined(ANDROID) +#include <unordered_set> +#else +#include <tr1/unordered_set> +#endif + +namespace nv50_ir { + +#if __cplusplus >= 201103L +using std::unordered_set; +#elif !defined(ANDROID) +using std::tr1::unordered_set; +#else // Android release before lollipop +using std::isfinite; +typedef std::tr1::unordered_set<void *> voidptr_unordered_set; + +template <typename V> +class unordered_set : public voidptr_unordered_set { + public: + typedef voidptr_unordered_set _base; + typedef _base::iterator _biterator; + typedef _base::const_iterator const_biterator; + + class iterator : public _biterator { + public: + iterator(const _biterator & i) : _biterator(i) {} + V operator*() const { return reinterpret_cast<V>(*_biterator(*this)); } + }; + class const_iterator : public const_biterator { + public: + const_iterator(const iterator & i) : const_biterator(i) {} + const_iterator(const const_biterator & i) : const_biterator(i) {} + const V operator*() const { return reinterpret_cast<const V>(*const_biterator(*this)); } + }; + + iterator begin() { return _base::begin(); } + iterator end() { return _base::end(); } + const_iterator begin() const { return _base::begin(); } + const_iterator end() const { return _base::end(); } +}; +#endif + +} // namespace nv50_ir + +#endif // __NV50_UNORDERED_SET_H__ diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c index 8660498..495450b 100644 --- a/src/gallium/drivers/nouveau/nouveau_compiler.c +++ b/src/gallium/drivers/nouveau/nouveau_compiler.c @@ -190,6 +190,10 @@ main(int argc, char *argv[]) type = PIPE_SHADER_GEOMETRY; else if (!strncmp(text, "COMP", 4)) type = PIPE_SHADER_COMPUTE; + else if (!strncmp(text, "TESS_CTRL", 9)) + type = PIPE_SHADER_TESS_CTRL; + else if (!strncmp(text, "TESS_EVAL", 9)) + type = PIPE_SHADER_TESS_EVAL; else { _debug_printf("Unrecognized TGSI header\n"); return 1; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 9505a0b..410e631 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -117,7 +117,6 @@ nv50_blend_state_create(struct pipe_context *pipe, struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj); int i; bool emit_common_func = cso->rt[0].blend_enable; - uint32_t ms; if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) { SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1); @@ -189,15 +188,6 @@ nv50_blend_state_create(struct pipe_context *pipe, SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); } - ms = 0; - if (cso->alpha_to_coverage) - ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; - if (cso->alpha_to_one) - ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; - - SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); - SB_DATA (so, ms); - assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 985603d..b304a17 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -1,4 +1,6 @@ +#include "util/u_format.h" + #include "nv50/nv50_context.h" #include "nv50/nv50_defs.xml.h" @@ -314,6 +316,25 @@ nv50_validate_derived_2(struct nv50_context *nv50) } static void +nv50_validate_derived_3(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + uint32_t ms = 0; + + if ((!fb->nr_cbufs || !fb->cbufs[0] || + !util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) { + if (nv50->blend->pipe.alpha_to_coverage) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (nv50->blend->pipe.alpha_to_one) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + } + + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, ms); +} + +static void nv50_validate_clip(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; @@ -474,6 +495,7 @@ static struct state_validate { { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER }, + { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER }, { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h index cf75d1e..4b1d00c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h @@ -19,7 +19,7 @@ struct nv50_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[84]; // TODO: allocate less if !independent_blend_enable + uint32_t state[82]; // TODO: allocate less if !independent_blend_enable }; struct nv50_rasterizer_stateobj { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index b1ae016..64348b3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) return NV50_SURFACE_FORMAT_R16_UNORM; case 4: return NV50_SURFACE_FORMAT_BGRA8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_RGBA16_FLOAT; + case 16: + return NV50_SURFACE_FORMAT_RGBA32_FLOAT; default: return 0; } @@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit) /* zsa state */ BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1); PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1); + PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1); @@ -1387,18 +1393,24 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) PUSH_DATA (push, info->dst.box.z + i); } else { const unsigned z = info->dst.box.z + i; + const uint64_t address = dst->base.address + + dst->level[info->dst.level].offset + + z * dst->layer_stride; BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); - PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); } if (src->layout_3d) { /* not possible because of depth tiling */ assert(0); } else { const unsigned z = info->src.box.z + i; + const uint64_t address = src->base.address + + src->level[info->src.level].offset + + z * src->layer_stride; BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2); - PUSH_DATAh(push, src->base.address + z * src->layer_stride); - PUSH_DATA (push, src->base.address + z * src->layer_stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); } BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */ PUSH_DATA (push, srcy >> 32); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 84f8db6..7a15a11 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -132,6 +132,9 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) pipe_resource_reference(res, NULL); } util_dynarray_fini(&nvc0->global_residents); + + if (nvc0->tcp_empty) + nvc0->base.pipe.delete_tcs_state(&nvc0->base.pipe, nvc0->tcp_empty); } static void @@ -306,13 +309,6 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) pipe->memory_barrier = nvc0_memory_barrier; pipe->get_sample_position = nvc0_context_get_sample_position; - if (!screen->cur_ctx) { - nvc0->state = screen->save_state; - screen->cur_ctx = nvc0; - nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); - } - screen->base.pushbuf->kick_notify = nvc0_default_kick_notify; - nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); nvc0_init_state_functions(nvc0); @@ -326,6 +322,21 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) /* shader builtin library is per-screen, but we need a context for m2mf */ nvc0_program_library_upload(nvc0); + nvc0_program_init_tcp_empty(nvc0); + if (!nvc0->tcp_empty) + goto out_err; + /* set the empty tctl prog on next draw in case one is never set */ + nvc0->dirty |= NVC0_NEW_TCTLPROG; + + /* now that there are no more opportunities for errors, set the current + * context if there isn't already one. + */ + if (!screen->cur_ctx) { + nvc0->state = screen->save_state; + screen->cur_ctx = nvc0; + nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); + } + screen->base.pushbuf->kick_notify = nvc0_default_kick_notify; /* add permanently resident buffers to bufctxts */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index f449942..df1a891 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -128,6 +128,8 @@ struct nvc0_context { struct nvc0_program *fragprog; struct nvc0_program *compprog; + struct nvc0_program *tcp_empty; + struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[6]; uint16_t constbuf_valid[6]; @@ -227,6 +229,7 @@ void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); void nvc0_program_library_upload(struct nvc0_context *); uint32_t nvc0_program_symbol_offset(const struct nvc0_program *, uint32_t label); +void nvc0_program_init_tcp_empty(struct nvc0_context *); /* nvc0_query.c */ void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 507a250..12f1bb7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -22,6 +22,8 @@ #include "pipe/p_defines.h" +#include "tgsi/tgsi_ureg.h" + #include "nvc0/nvc0_context.h" #include "codegen/nv50_ir_driver.h" @@ -799,3 +801,18 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) return prog->code_base + base + syms[i].offset; return prog->code_base; /* no symbols or symbol not found */ } + +void +nvc0_program_init_tcp_empty(struct nvc0_context *nvc0) +{ + struct ureg_program *ureg; + + ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL); + if (!ureg) + return; + + ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1); + ureg_END(ureg); + + nvc0->tcp_empty = ureg_create_shader_and_destroy(ureg, &nvc0->base.pipe); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 8aa127a..8f8ac2d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -148,8 +148,13 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); PUSH_DATA (push, tp->num_gprs); } else { - BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + tp = nvc0->tcp_empty; + /* not a whole lot we can do to handle this failure */ + if (!nvc0_program_validate(nvc0, tp)) + assert(!"unable to validate empty tcp"); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); PUSH_DATA (push, 0x20); + PUSH_DATA (push, tp->code_base); } nvc0_program_update_context_state(nvc0, tp, 1); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 2a33857..ee29912 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -90,7 +90,6 @@ nvc0_blend_state_create(struct pipe_context *pipe, struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); int i; int r; /* reference */ - uint32_t ms; uint8_t blend_en = 0; bool indep_masks = false; bool indep_funcs = false; @@ -176,15 +175,6 @@ nvc0_blend_state_create(struct pipe_context *pipe, } } - ms = 0; - if (cso->alpha_to_coverage) - ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; - if (cso->alpha_to_one) - ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; - - SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); - SB_DATA (so, ms); - assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -234,7 +224,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample); SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); - if (cso->line_smooth) + if (cso->line_smooth || cso->multisample) SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1); else SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index ce1119c..47bd66d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -1,4 +1,5 @@ +#include "util/u_format.h" #include "util/u_math.h" #include "nvc0/nvc0_context.h" @@ -555,6 +556,25 @@ nvc0_validate_derived_2(struct nvc0_context *nvc0) } static void +nvc0_validate_derived_3(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + uint32_t ms = 0; + + if ((!fb->nr_cbufs || !fb->cbufs[0] || + !util_format_is_pure_integer(fb->cbufs[0]->format)) && nvc0->blend) { + if (nvc0->blend->pipe.alpha_to_coverage) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (nvc0->blend->pipe.alpha_to_one) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + } + + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, ms); +} + +static void nvc0_validate_tess_state(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; @@ -628,6 +648,7 @@ static struct state_validate { { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, + { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER }, { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | NVC0_NEW_VERTPROG | NVC0_NEW_TEVLPROG | diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h index 18fcc12..8bc33c6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -17,7 +17,7 @@ struct nvc0_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[72]; + uint32_t state[70]; }; struct nvc0_rasterizer_stateobj { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 51a6f93..dbdf292 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit) /* zsa state */ IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0); IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0); IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0); @@ -1336,18 +1337,24 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) PUSH_DATA (push, info->dst.box.z + i); } else { const unsigned z = info->dst.box.z + i; + const uint64_t address = dst->base.address + + dst->level[info->dst.level].offset + + z * dst->layer_stride; BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); - PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); } if (src->layout_3d) { /* not possible because of depth tiling */ assert(0); } else { const unsigned z = info->src.box.z + i; + const uint64_t address = src->base.address + + src->level[info->src.level].offset + + z * src->layer_stride; BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2); - PUSH_DATAh(push, src->base.address + z * src->layer_stride); - PUSH_DATA (push, src->base.address + z * src->layer_stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); } BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */ PUSH_DATA (push, srcy >> 32); diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c index 14f93fb..e8f4087 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -693,7 +693,8 @@ void rc_init_regalloc_state(struct rc_regalloc_state *s) }; /* Allocate the main ra data structure */ - s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW); + s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW, + true); /* Create the register classes */ for (i = 0; i < RC_REG_CLASS_COUNT; i++) { diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6ea8f24..b8cc316 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -667,7 +667,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, r300_blitter_begin(r300, R300_COPY); util_blitter_blit_generic(r300->blitter, dst_view, &dstbox, src_view, src_box, src_width0, src_height0, - PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL); + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + FALSE); r300_blitter_end(r300); pipe_surface_reference(&dst_view, NULL); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index b0002c3..22a0950 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -732,7 +732,8 @@ void r600_resource_copy_region(struct pipe_context *ctx, r600_blitter_begin(ctx, R600_COPY_TEXTURE); util_blitter_blit_generic(rctx->blitter, dst_view, &dstbox, src_view, src_box, src_width0, src_height0, - PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL); + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + FALSE); r600_blitter_end(ctx); pipe_surface_reference(&dst_view, NULL); diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index 381f06d..fdbe1c0 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -262,7 +262,7 @@ static const struct alu_op_info alu_op_table[] = { {"PRED_SETNE_PUSH_INT", 2, { 0x4D, 0x4D },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_NE | AF_INT_CMP }, {"PRED_SETLT_PUSH_INT", 2, { 0x4E, 0x4E },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_LT | AF_INT_CMP }, {"PRED_SETLE_PUSH_INT", 2, { 0x4F, 0x4F },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_LE | AF_INT_CMP }, - {"FLT_TO_INT", 1, { 0x6B, 0x50 },{ AF_S, AF_S, AF_VS, AF_VS}, AF_INT_DST | AF_CVT }, + {"FLT_TO_INT", 1, { 0x6B, 0x50 },{ AF_S, AF_S, AF_V, AF_V}, AF_INT_DST | AF_CVT }, {"BFREV_INT", 1, { -1, 0x51 },{ 0, 0, AF_VS, AF_VS}, AF_INT_DST }, {"ADDC_UINT", 2, { -1, 0x52 },{ 0, 0, AF_VS, AF_VS}, AF_UINT_DST }, {"SUBB_UINT", 2, { -1, 0x53 },{ 0, 0, AF_VS, AF_VS}, AF_UINT_DST }, diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 9b66105..384ba80 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -90,7 +90,7 @@ struct r600_context; struct r600_bytecode; -struct r600_shader_key; +union r600_shader_key; /* This is an atom containing GPU commands that never change. * This is supposed to be copied directly into the CS. */ @@ -643,7 +643,7 @@ void r600_resource_copy_region(struct pipe_context *ctx, /* r600_shader.c */ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, - struct r600_shader_key key); + union r600_shader_key key); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8d1f95a..4c4b600 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -62,7 +62,7 @@ The compiler must issue the source argument to slots z, y, and x static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, - struct r600_shader_key key); + union r600_shader_key key); static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, @@ -133,7 +133,7 @@ static int store_shader(struct pipe_context *ctx, int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, - struct r600_shader_key key) + union r600_shader_key key) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; @@ -141,7 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); - unsigned export_shader = key.vs_as_es; + unsigned export_shader = key.vs.as_es; shader->shader.bc.isa = rctx->isa; @@ -1802,7 +1802,7 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind) static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, - struct r600_shader_key key) + union r600_shader_key key) { struct r600_screen *rscreen = rctx->screen; struct r600_shader *shader = &pipeshader->shader; @@ -1816,7 +1816,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, unsigned opcode; int i, j, k, r = 0; int next_param_base = 0, next_clip_base; - int max_color_exports = MAX2(key.nr_cbufs, 1); + int max_color_exports = MAX2(key.ps.nr_cbufs, 1); /* Declarations used by llvm code */ bool use_llvm = false; bool indirect_gprs; @@ -1830,8 +1830,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.shader = shader; ctx.native_integers = true; - shader->vs_as_gs_a = key.vs_as_gs_a; - shader->vs_as_es = key.vs_as_es; + shader->vs_as_gs_a = key.vs.as_gs_a; + shader->vs_as_es = key.vs.as_es; r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); @@ -1844,9 +1844,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; - ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); + ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); - if (key.vs_as_es) { + if (key.vs.as_es) { ctx.gs_for_vs = &rctx->gs_shader->current->shader; } else { ctx.gs_for_vs = NULL; @@ -1866,7 +1866,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->nr_ps_color_exports = 0; shader->nr_ps_max_color_exports = 0; - shader->two_side = key.color_two_side; + shader->two_side = key.ps.color_two_side; /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. @@ -1970,7 +1970,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->fs_write_all = FALSE; if (shader->vs_as_gs_a) - vs_add_primid_output(&ctx, key.vs_prim_id_out); + vs_add_primid_output(&ctx, key.vs.prim_id_out); while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); @@ -2091,7 +2091,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, radeon_llvm_ctx.chip_class = ctx.bc->chip_class; radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); radeon_llvm_ctx.stream_outputs = &so; - radeon_llvm_ctx.alpha_to_one = key.alpha_to_one; + radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; radeon_llvm_ctx.has_compressed_msaa_texturing = ctx.bc->has_compressed_msaa_texturing; mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); @@ -2270,7 +2270,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, convert_edgeflag_to_int(&ctx); if (ring_outputs) { - if (key.vs_as_es) + if (key.vs.as_es) emit_gs_ring_writes(&ctx, FALSE); } else { /* Export output */ @@ -2386,7 +2386,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, j--; continue; } - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; output[j].array_base = shader->output[i].sid; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; shader->nr_ps_color_exports++; @@ -2399,7 +2399,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].swizzle_x = 0; output[j].swizzle_y = 1; output[j].swizzle_z = 2; - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; output[j].burst_count = 1; output[j].array_base = k; output[j].op = CF_OP_EXPORT; @@ -6151,10 +6151,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]); + r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]); + r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); if (r) return r; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 5d05c81..927bac5 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -95,13 +95,17 @@ struct r600_shader { struct r600_shader_array * arrays; }; -struct r600_shader_key { - unsigned color_two_side:1; - unsigned alpha_to_one:1; - unsigned nr_cbufs:4; - unsigned vs_as_es:1; - unsigned vs_as_gs_a:1; - unsigned vs_prim_id_out:8; +union r600_shader_key { + struct { + unsigned nr_cbufs:4; + unsigned color_two_side:1; + unsigned alpha_to_one:1; + } ps; + struct { + unsigned prim_id_out:8; + unsigned as_es:1; /* export shader */ + unsigned as_gs_a:1; + } vs; }; struct r600_shader_array { @@ -122,7 +126,7 @@ struct r600_pipe_shader { unsigned flatshade; unsigned pa_cl_vs_out_cntl; unsigned nr_ps_color_outputs; - struct r600_shader_key key; + union r600_shader_key key; unsigned db_shader_control; unsigned ps_depth_export; unsigned enabled_stream_buffers_mask; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index aa4a8d0..a05dd83 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -702,29 +702,39 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom) } /* Compute the key for the hw shader variant */ -static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx, +static inline union r600_shader_key r600_shader_selector_key(struct pipe_context * ctx, struct r600_pipe_shader_selector * sel) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_shader_key key; + union r600_shader_key key; memset(&key, 0, sizeof(key)); - if (sel->type == PIPE_SHADER_FRAGMENT) { - key.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side; - key.alpha_to_one = rctx->alpha_to_one && - rctx->rasterizer && rctx->rasterizer->multisample_enable && - !rctx->framebuffer.cb0_is_integer; - key.nr_cbufs = rctx->framebuffer.state.nr_cbufs; - /* Dual-source blending only makes sense with nr_cbufs == 1. */ - if (key.nr_cbufs == 1 && rctx->dual_src_blend) - key.nr_cbufs = 2; - } else if (sel->type == PIPE_SHADER_VERTEX) { - key.vs_as_es = (rctx->gs_shader != NULL); + switch (sel->type) { + case PIPE_SHADER_VERTEX: { + key.vs.as_es = (rctx->gs_shader != NULL); if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) { - key.vs_as_gs_a = true; - key.vs_prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid; + key.vs.as_gs_a = true; + key.vs.prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid; } + break; + } + case PIPE_SHADER_GEOMETRY: + break; + case PIPE_SHADER_FRAGMENT: { + key.ps.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side; + key.ps.alpha_to_one = rctx->alpha_to_one && + rctx->rasterizer && rctx->rasterizer->multisample_enable && + !rctx->framebuffer.cb0_is_integer; + key.ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs; + /* Dual-source blending only makes sense with nr_cbufs == 1. */ + if (key.ps.nr_cbufs == 1 && rctx->dual_src_blend) + key.ps.nr_cbufs = 2; + break; } + default: + assert(0); + } + return key; } @@ -734,7 +744,7 @@ static int r600_shader_select(struct pipe_context *ctx, struct r600_pipe_shader_selector* sel, bool *dirty) { - struct r600_shader_key key; + union r600_shader_key key; struct r600_pipe_shader * shader = NULL; int r; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 16ee541..81f3f45 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -209,8 +209,6 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) static unsigned calc_ctx_size(struct ruvd_decoder *dec) { - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); @@ -223,8 +221,7 @@ static unsigned calc_ctx_size(struct ruvd_decoder *dec) width = align (width, 16); height = align (height, 16); - ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024; - return ctx_size; + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } /* calculate size of reference picture buffer */ diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 48972bd..b7450b6 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -586,7 +586,8 @@ void si_resource_copy_region(struct pipe_context *ctx, si_blitter_begin(ctx, SI_COPY); util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox, src_view, src_box, src_width0, src_height0, - PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL); + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + FALSE); si_blitter_end(ctx); pipe_surface_reference(&dst_view, NULL); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4288e9b..fa6c15a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2277,7 +2277,7 @@ static void tex_fetch_args( unsigned sampler_index; unsigned num_deriv_channels = 0; bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false; - LLVMValueRef res_ptr, samp_ptr; + LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1; sampler_index = emit_data->inst->Src[sampler_src].Register.Index; @@ -2293,9 +2293,19 @@ static void tex_fetch_args( samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER); samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index); + + if (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) { + ind_index = LLVMBuildAdd(gallivm->builder, ind_index, + lp_build_const_int32(gallivm, + SI_FMASK_TEX_OFFSET), ""); + fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE); + fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index); + } } else { res_ptr = si_shader_ctx->resources[sampler_index]; samp_ptr = si_shader_ctx->samplers[sampler_index]; + fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index]; } if (target == TGSI_TEXTURE_BUFFER) { @@ -2493,7 +2503,7 @@ static void tex_fetch_args( txf_emit_data.dst_type = LLVMVectorType( LLVMInt32TypeInContext(gallivm->context), 4); txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count); - txf_emit_data.args[1] = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index]; + txf_emit_data.args[1] = fmask_ptr; txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture); txf_emit_data.arg_count = 3; @@ -2524,8 +2534,7 @@ static void tex_fetch_args( * resource descriptor is 0 (invalid), */ LLVMValueRef fmask_desc = - LLVMBuildBitCast(gallivm->builder, - si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index], + LLVMBuildBitCast(gallivm->builder, fmask_ptr, LLVMVectorType(uint_bld->elem_type, 8), ""); LLVMValueRef fmask_word1 = @@ -3973,7 +3982,7 @@ static void si_dump_key(unsigned shader, union si_shader_key *key) fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n", key->vs.es_enabled_outputs); fprintf(stderr, " as_es = %u\n", key->vs.as_es); - fprintf(stderr, " as_es = %u\n", key->vs.as_ls); + fprintf(stderr, " as_ls = %u\n", key->vs.as_ls); break; case PIPE_SHADER_TESS_CTRL: diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 654c46f..3a63af8 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -270,6 +270,7 @@ struct vc4_context { struct ra_regs *regs; unsigned int reg_class_any; + unsigned int reg_class_a_or_b_or_acc; unsigned int reg_class_r4_or_a; unsigned int reg_class_a; diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 7978ea1..5b43583 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -143,15 +143,6 @@ qir_opt_algebraic(struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: - if (qir_reg_equals(inst->src[0], inst->src[1])) { - /* Turn "dst = (sf == x) ? a : a)" into - * "dst = a" - */ - replace_with_mov(c, inst, inst->src[1]); - progress = true; - break; - } - if (is_zero(c, inst->src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. @@ -207,6 +198,7 @@ qir_opt_algebraic(struct vc4_compile *c) /* FADD(a, FSUB(0, b)) -> FSUB(a, b) */ if (inst->src[1].file == QFILE_TEMP && + c->defs[inst->src[1].index] && c->defs[inst->src[1].index]->op == QOP_FSUB) { struct qinst *fsub = c->defs[inst->src[1].index]; if (is_zero(c, fsub->src[0])) { @@ -221,6 +213,7 @@ qir_opt_algebraic(struct vc4_compile *c) /* FADD(FSUB(0, b), a) -> FSUB(a, b) */ if (inst->src[0].file == QFILE_TEMP && + c->defs[inst->src[0].index] && c->defs[inst->src[0].index]->op == QOP_FSUB) { struct qinst *fsub = c->defs[inst->src[0].index]; if (is_zero(c, fsub->src[0])) { @@ -236,18 +229,20 @@ qir_opt_algebraic(struct vc4_compile *c) break; case QOP_FMUL: - if (replace_x_0_with_0(c, inst, 0) || - replace_x_0_with_0(c, inst, 1) || - fmul_replace_one(c, inst, 0) || - fmul_replace_one(c, inst, 1)) { + if (!inst->dst.pack && + (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1) || + fmul_replace_one(c, inst, 0) || + fmul_replace_one(c, inst, 1))) { progress = true; break; } break; case QOP_MUL24: - if (replace_x_0_with_0(c, inst, 0) || - replace_x_0_with_0(c, inst, 1)) { + if (!inst->dst.pack && + (replace_x_0_with_0(c, inst, 0) || + replace_x_0_with_0(c, inst, 1))) { progress = true; break; } @@ -280,6 +275,14 @@ qir_opt_algebraic(struct vc4_compile *c) } break; + case QOP_RCP: + if (is_1f(c, inst->src[0])) { + replace_with_mov(c, inst, inst->src[0]); + progress = true; + break; + } + break; + default: break; } diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index a755de9..fd2539a 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -39,21 +39,27 @@ qir_opt_copy_propagation(struct vc4_compile *c) { bool progress = false; bool debug = false; - struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg)); list_for_each_entry(struct qinst, inst, &c->instructions, link) { for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { int index = inst->src[i].index; if (inst->src[i].file == QFILE_TEMP && - (movs[index].file == QFILE_TEMP || - movs[index].file == QFILE_UNIF)) { + c->defs[index] && + c->defs[index]->op == QOP_MOV && + (c->defs[index]->src[0].file == QFILE_TEMP || + c->defs[index]->src[0].file == QFILE_UNIF)) { + /* If it has a pack, it shouldn't be an SSA + * def. + */ + assert(!c->defs[index]->dst.pack); + if (debug) { fprintf(stderr, "Copy propagate: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } - inst->src[i] = movs[index]; + inst->src[i] = c->defs[index]->src[0]; if (debug) { fprintf(stderr, "to: "); @@ -64,14 +70,6 @@ qir_opt_copy_propagation(struct vc4_compile *c) progress = true; } } - - if (inst->op == QOP_MOV && - inst->dst.file == QFILE_TEMP && - inst->src[0].file != QFILE_VPM) { - movs[inst->dst.index] = inst->src[0]; - } } - - free(movs); return progress; } diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c index e04f028..f2cdf8f 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -68,7 +68,7 @@ qir_opt_vpm_writes(struct vc4_compile *c) continue; struct qinst *inst = c->defs[temp]; - if (qir_is_multi_instruction(inst)) + if (!inst || qir_is_multi_instruction(inst)) continue; if (qir_depends_on_flags(inst) || inst->sf) @@ -79,22 +79,6 @@ qir_opt_vpm_writes(struct vc4_compile *c) continue; } - /* A QOP_TEX_RESULT destination is r4, so we can't move - * accesses to it past another QOP_TEX_RESULT which would - * update it. - */ - int src; - for (src = 0; src < qir_get_op_nsrc(inst->op); src++) { - if (inst->src[src].file == QFILE_TEMP) { - if (c->defs[inst->src[src].index]->op == - QOP_TEX_RESULT) { - break; - } - } - } - if (src != qir_get_op_nsrc(inst->op)) - continue; - /* Move the generating instruction to the end of the program * to maintain the order of the VPM writes. */ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 13c4721..e002983 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -818,6 +818,72 @@ declare_uniform_range(struct vc4_compile *c, uint32_t start, uint32_t size) c->ubo_ranges[array_id].used = false; } +static bool +ntq_src_is_only_ssa_def_user(nir_src *src) +{ + if (!src->is_ssa) + return false; + + if (!list_empty(&src->ssa->if_uses)) + return false; + + return (src->ssa->uses.next == &src->use_link && + src->ssa->uses.next->next == &src->ssa->uses); +} + +/** + * In general, emits a nir_pack_unorm_4x8 as a series of MOVs with the pack + * bit set. + * + * However, as an optimization, it tries to find the instructions generating + * the sources to be packed and just emit the pack flag there, if possible. + */ +static void +ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) +{ + struct qreg result = qir_get_temp(c); + struct nir_alu_instr *vec4 = NULL; + + /* If packing from a vec4 op (as expected), identify it so that we can + * peek back at what generated its sources. + */ + if (instr->src[0].src.is_ssa && + instr->src[0].src.ssa->parent_instr->type == nir_instr_type_alu && + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr)->op == + nir_op_vec4) { + vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); + } + + for (int i = 0; i < 4; i++) { + int swiz = instr->src[0].swizzle[i]; + struct qreg src; + if (vec4) { + src = ntq_get_src(c, vec4->src[swiz].src, + vec4->src[swiz].swizzle[0]); + } else { + src = ntq_get_src(c, instr->src[0].src, swiz); + } + + if (vec4 && + ntq_src_is_only_ssa_def_user(&vec4->src[swiz].src) && + src.file == QFILE_TEMP && + c->defs[src.index] && + qir_is_mul(c->defs[src.index]) && + !c->defs[src.index]->dst.pack) { + struct qinst *rewrite = c->defs[src.index]; + c->defs[src.index] = NULL; + rewrite->dst = result; + rewrite->dst.pack = QPU_PACK_MUL_8A + i; + continue; + } + + qir_PACK_8_F(c, result, src, i); + } + + struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); + *dest = result; +} + static void ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) { @@ -839,17 +905,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) } if (instr->op == nir_op_pack_unorm_4x8) { - struct qreg result; - for (int i = 0; i < 4; i++) { - struct qreg src = ntq_get_src(c, instr->src[0].src, - instr->src[0].swizzle[i]); - if (i == 0) - result = qir_PACK_8888_F(c, src); - else - result = qir_PACK_8_F(c, result, src, i); - } - struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); - *dest = result; + ntq_emit_pack_unorm_4x8(c, instr); return; } @@ -1130,20 +1186,24 @@ emit_frag_end(struct vc4_compile *c) static void emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w) { - struct qreg xyi[2]; + struct qreg packed = qir_get_temp(c); for (int i = 0; i < 2; i++) { struct qreg scale = qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0); - xyi[i] = qir_FTOI(c, qir_FMUL(c, - qir_FMUL(c, - c->outputs[c->output_position_index + i], - scale), - rcp_w)); + struct qreg packed_chan = packed; + packed_chan.pack = QPU_PACK_A_16A + i; + + qir_FTOI_dest(c, packed_chan, + qir_FMUL(c, + qir_FMUL(c, + c->outputs[c->output_position_index + i], + scale), + rcp_w)); } - qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1])); + qir_VPM_WRITE(c, packed); } static void diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 254140a..9d93071 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -71,12 +71,11 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true }, - [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true }, - [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true }, - [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true }, - [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true }, - [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true }, + [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 }, + [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 }, + [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, + [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, + [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, @@ -169,6 +168,18 @@ qir_is_multi_instruction(struct qinst *inst) } bool +qir_is_mul(struct qinst *inst) +{ + switch (inst->op) { + case QOP_FMUL: + case QOP_MUL24: + return true; + default: + return false; + } +} + +bool qir_is_tex(struct qinst *inst) { return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT; @@ -273,6 +284,14 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst) inst->sf ? ".sf" : ""); qir_print_reg(c, inst->dst, true); + if (inst->dst.pack) { + if (inst->dst.pack) { + if (qir_is_mul(inst)) + vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack); + else + vc4_qpu_disasm_pack_a(stderr, inst->dst.pack); + } + } for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { fprintf(stderr, ", "); qir_print_reg(c, inst->src[i], false); @@ -348,7 +367,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst) if (inst->dst.file == QFILE_TEMP) c->defs[inst->dst.index] = inst; - list_addtail(&inst->link, &c->instructions); + qir_emit_nodef(c, inst); } bool @@ -389,8 +408,11 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst) struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg) { - while (reg.file == QFILE_TEMP && c->defs[reg.index]->op == QOP_MOV) + while (reg.file == QFILE_TEMP && + c->defs[reg.index] && + c->defs[reg.index]->op == QOP_MOV) { reg = c->defs[reg.index]->src[0]; + } return reg; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index cade795..a2b21fa 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -58,6 +58,7 @@ enum qfile { struct qreg { enum qfile file; uint32_t index; + int pack; }; enum qop { @@ -104,7 +105,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_PACK_SCALED, QOP_PACK_8888_F, QOP_PACK_8A_F, QOP_PACK_8B_F, @@ -444,13 +444,20 @@ struct qreg qir_uniform(struct vc4_compile *c, enum quniform_contents contents, uint32_t data); void qir_reorder_uniforms(struct vc4_compile *c); + void qir_emit(struct vc4_compile *c, struct qinst *inst); +static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst) +{ + list_addtail(&inst->link, &c->instructions); +} + struct qreg qir_get_temp(struct vc4_compile *c); int qir_get_op_nsrc(enum qop qop); bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); bool qir_is_multi_instruction(struct qinst *inst); +bool qir_is_mul(struct qinst *inst); bool qir_is_tex(struct qinst *inst); bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); @@ -509,6 +516,12 @@ qir_##name(struct vc4_compile *c, struct qreg a) \ struct qreg t = qir_get_temp(c); \ qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ return t; \ +} \ +static inline void \ +qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ + struct qreg a) \ +{ \ + qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \ } #define QIR_ALU2(name) \ @@ -518,6 +531,12 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ struct qreg t = qir_get_temp(c); \ qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ return t; \ +} \ +static inline void \ +qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ + struct qreg a, struct qreg b) \ +{ \ + qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, b)); \ } #define QIR_NODST_1(name) \ @@ -534,6 +553,14 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ } +#define QIR_PACK(name) \ +static inline struct qreg \ +qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \ +{ \ + qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \ + return dest; \ +} + QIR_ALU1(MOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) @@ -570,12 +597,11 @@ QIR_ALU1(RCP) QIR_ALU1(RSQ) QIR_ALU1(EXP2) QIR_ALU1(LOG2) -QIR_ALU2(PACK_SCALED) QIR_ALU1(PACK_8888_F) -QIR_ALU2(PACK_8A_F) -QIR_ALU2(PACK_8B_F) -QIR_ALU2(PACK_8C_F) -QIR_ALU2(PACK_8D_F) +QIR_PACK(PACK_8A_F) +QIR_PACK(PACK_8B_F) +QIR_PACK(PACK_8C_F) +QIR_PACK(PACK_8D_F) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) @@ -627,11 +653,12 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) } static inline struct qreg -qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) +qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); - return t; + qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef)); + if (dest.file == QFILE_TEMP) + c->defs[dest.index] = NULL; + return dest; } static inline struct qreg diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index fbb90ba..0719d28 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -24,6 +24,7 @@ #ifndef VC4_QPU_H #define VC4_QPU_H +#include <stdio.h> #include <stdint.h> #include "util/u_math.h" @@ -206,6 +207,12 @@ void vc4_qpu_disasm(const uint64_t *instructions, int num_instructions); void +vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack); + +void +vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack); + +void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); #endif /* VC4_QPU_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c index 00aeb30..0879787 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -245,6 +245,18 @@ get_special_write_desc(int reg, bool is_a) return special_write[reg]; } +void +vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack) +{ + fprintf(out, ".%s", DESC(qpu_pack_mul, pack)); +} + +void +vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack) +{ + fprintf(out, "%s", DESC(qpu_pack_a, pack)); +} + static void print_alu_dst(uint64_t inst, bool is_mul) { @@ -263,9 +275,9 @@ print_alu_dst(uint64_t inst, bool is_mul) fprintf(stderr, "%s%d?", file, waddr); if (is_mul && (inst & QPU_PM)) { - fprintf(stderr, ".%s", DESC(qpu_pack_mul, pack)); + vc4_qpu_disasm_pack_mul(stderr, pack); } else if (is_a && !(inst & QPU_PM)) { - fprintf(stderr, "%s", DESC(qpu_pack_a, pack)); + vc4_qpu_disasm_pack_a(stderr, pack); } } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index f324056..adf3a8b 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -179,10 +179,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) static const struct { uint32_t op; - bool is_mul; } translate[] = { -#define A(name) [QOP_##name] = {QPU_A_##name, false} -#define M(name) [QOP_##name] = {QPU_M_##name, true} +#define A(name) [QOP_##name] = {QPU_A_##name} +#define M(name) [QOP_##name] = {QPU_M_##name} A(FADD), A(FSUB), A(FMIN), @@ -336,28 +335,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_PACK_8B_F: case QOP_PACK_8C_F: case QOP_PACK_8D_F: - /* If dst doesn't happen to already contain src[0], - * then we have to move it in. - */ - if (qinst->src[0].file != QFILE_NULL && - (src[0].mux != dst.mux || src[0].addr != dst.addr)) { - /* Don't overwrite src1 while setting up - * the dst! - */ - if (dst.mux == src[1].mux && - dst.addr == src[1].addr) { - queue(c, qpu_m_MOV(qpu_rb(31), src[1])); - src[1] = qpu_rb(31); - } - - queue(c, qpu_m_MOV(dst, src[0])); - } - - queue(c, qpu_m_MOV(dst, src[1])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + - qinst->op - QOP_PACK_8A_F, - QPU_PACK); + queue(c, + qpu_m_MOV(dst, src[0]) | + QPU_PM | + QPU_SET_FIELD(QPU_PACK_MUL_8A + + qinst->op - QOP_PACK_8A_F, + QPU_PACK)); break; case QOP_FRAG_X: @@ -419,24 +402,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); break; - case QOP_PACK_SCALED: { - uint64_t a = (qpu_a_MOV(dst, src[0]) | - QPU_SET_FIELD(QPU_PACK_A_16A, - QPU_PACK)); - uint64_t b = (qpu_a_MOV(dst, src[1]) | - QPU_SET_FIELD(QPU_PACK_A_16B, - QPU_PACK)); - - if (dst.mux == src[1].mux && dst.addr == src[1].addr) { - queue(c, b); - queue(c, a); - } else { - queue(c, a); - queue(c, b); - } - break; - } - case QOP_TEX_S: case QOP_TEX_T: case QOP_TEX_R: @@ -529,14 +494,24 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) fixup_raddr_conflict(c, dst, &src[0], &src[1]); - if (translate[qinst->op].is_mul) { + if (qir_is_mul(qinst)) { queue(c, qpu_m_alu2(translate[qinst->op].op, dst, src[0], src[1])); + if (qinst->dst.pack) { + *last_inst(c) |= QPU_PM; + *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, + QPU_PACK); + } } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, src[0], src[1])); + if (qinst->dst.pack) { + assert(dst.mux == QPU_MUX_A); + *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, + QPU_PACK); + } } break; diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index a29db1f..3ced50f 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -113,9 +113,10 @@ vc4_alloc_reg_set(struct vc4_context *vc4) if (vc4->regs) return; - vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs)); + vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true); vc4->reg_class_any = ra_alloc_reg_class(vc4->regs); + vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs); vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs); vc4->reg_class_a = ra_alloc_reg_class(vc4->regs); for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) { @@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4) */ if (vc4_regs[i].mux == QPU_MUX_R4) { ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); continue; } ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i); } for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) { @@ -177,7 +180,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) uint8_t class_bits[c->num_temps]; struct qpu_reg *temp_registers = calloc(c->num_temps, sizeof(*temp_registers)); - memset(def, 0, sizeof(def)); + for (int i = 0; i < ARRAY_SIZE(def); i++) + def[i] = ~0; memset(use, 0, sizeof(use)); /* If things aren't ever written (undefined values), just read from @@ -196,7 +200,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) uint32_t ip = 0; list_for_each_entry(struct qinst, inst, &c->instructions, link) { if (inst->dst.file == QFILE_TEMP) { - def[inst->dst.index] = ip; + def[inst->dst.index] = MIN2(ip, def[inst->dst.index]); use[inst->dst.index] = ip; } @@ -267,17 +271,33 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; - case QOP_PACK_SCALED: - /* The pack flags require an A-file dst register. */ - class_bits[inst->dst.index] &= CLASS_BIT_A; - break; - default: break; } + if (inst->dst.pack && !qir_is_mul(inst)) { + /* The non-MUL pack flags require an A-file dst + * register. + */ + class_bits[inst->dst.index] &= CLASS_BIT_A; + } + if (qir_src_needs_a_file(inst)) { - class_bits[inst->src[0].index] &= CLASS_BIT_A; + switch (inst->op) { + case QOP_UNPACK_8A_F: + case QOP_UNPACK_8B_F: + case QOP_UNPACK_8C_F: + case QOP_UNPACK_8D_F: + /* Special case: these can be done as R4 + * unpacks, as well. + */ + class_bits[inst->src[0].index] &= (CLASS_BIT_A | + CLASS_BIT_R4); + break; + default: + class_bits[inst->src[0].index] &= CLASS_BIT_A; + break; + } } ip++; } @@ -287,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (class_bits[i]) { case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4: - case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: ra_set_node_class(g, node, vc4->reg_class_any); break; + case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: + ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc); + break; case CLASS_BIT_A | CLASS_BIT_R4: ra_set_node_class(g, node, vc4->reg_class_r4_or_a); break; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 1e493f4..266ebba 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -663,6 +663,7 @@ struct pipe_blit_info boolean render_condition_enable; /**< whether the blit should honor the current render condition */ + boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */ }; diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 63c3f8e..7c23a27 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -346,6 +346,15 @@ namespace { // Kernel metadata + struct kernel_arg_md { + llvm::StringRef type_name; + llvm::StringRef access_qual; + kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_): + type_name(type_name_), access_qual(access_qual_) {} + }; + +#if HAVE_LLVM >= 0x0306 + const llvm::MDNode * get_kernel_metadata(const llvm::Function *kernel_func) { auto mod = kernel_func->getParent(); @@ -356,12 +365,8 @@ namespace { const llvm::MDNode *kernel_node = nullptr; for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) { -#if HAVE_LLVM >= 0x0306 auto func = llvm::mdconst::dyn_extract<llvm::Function>( -#else - auto func = llvm::dyn_cast<llvm::Function>( -#endif - kernels_node->getOperand(i)->getOperand(0)); + kernels_node->getOperand(i)->getOperand(0)); if (func == kernel_func) { kernel_node = kernels_node->getOperand(i); break; @@ -387,13 +392,6 @@ namespace { return node; } - struct kernel_arg_md { - llvm::StringRef type_name; - llvm::StringRef access_qual; - kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_): - type_name(type_name_), access_qual(access_qual_) {} - }; - std::vector<kernel_arg_md> get_kernel_arg_md(const llvm::Function *kernel_func) { auto num_args = kernel_func->getArgumentList().size(); @@ -415,6 +413,17 @@ namespace { return res; } +#else + + std::vector<kernel_arg_md> + get_kernel_arg_md(const llvm::Function *kernel_func) { + return std::vector<kernel_arg_md>( + kernel_func->getArgumentList().size(), + kernel_arg_md("", "")); + } + +#endif // HAVE_LLVM >= 0x0306 + std::vector<module::argument> get_kernel_args(const llvm::Module *mod, const std::string &kernel_name, const clang::LangAS::Map &address_spaces) { diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index c5ffcb1..69e0fa2 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -545,7 +545,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, /*D3DDEVCAPS_RTPATCHES |*/ /*D3DDEVCAPS_RTPATCHHANDLEZERO |*/ /*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/ - /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/ + D3DDEVCAPS_TEXTURENONLOCALVIDMEM | /* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/ D3DDEVCAPS_TEXTUREVIDEOMEMORY | D3DDEVCAPS_TLVERTEXSYSTEMMEMORY | @@ -561,32 +561,32 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, D3DPMISCCAPS_TSSARGTEMP | D3DPMISCCAPS_BLENDOP | D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) | - /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ + /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */ /*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */ D3DPMISCCAPS_FOGANDSPECULARALPHA | D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) | D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) | D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING | - /*D3DPMISCCAPS_FOGVERTEXCLAMPED*/0; + D3DPMISCCAPS_FOGVERTEXCLAMPED; if (!screen->get_param(screen, PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION)) pCaps->PrimitiveMiscCaps |= D3DPMISCCAPS_CLIPTLVERTS; pCaps->RasterCaps = D3DPIPECAP(ANISOTROPIC_FILTER, D3DPRASTERCAPS_ANISOTROPY) | - /*D3DPRASTERCAPS_COLORPERSPECTIVE |*/ + D3DPRASTERCAPS_COLORPERSPECTIVE | D3DPRASTERCAPS_DITHER | D3DPRASTERCAPS_DEPTHBIAS | - /*D3DPRASTERCAPS_FOGRANGE |*/ - /*D3DPRASTERCAPS_FOGTABLE |*/ - /*D3DPRASTERCAPS_FOGVERTEX |*/ + D3DPRASTERCAPS_FOGRANGE | + D3DPRASTERCAPS_FOGTABLE | + D3DPRASTERCAPS_FOGVERTEX | D3DPRASTERCAPS_MIPMAPLODBIAS | D3DPRASTERCAPS_MULTISAMPLE_TOGGLE | D3DPRASTERCAPS_SCISSORTEST | D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS | /*D3DPRASTERCAPS_WBUFFER |*/ - /*D3DPRASTERCAPS_WFOG |*/ + D3DPRASTERCAPS_WFOG | /*D3DPRASTERCAPS_ZBUFFERLESSHSR |*/ - /*D3DPRASTERCAPS_ZFOG |*/ + D3DPRASTERCAPS_ZFOG | D3DPRASTERCAPS_ZTEST; pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER | @@ -697,15 +697,12 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, pCaps->MaxAnisotropy = (DWORD)screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY); - pCaps->MaxVertexW = 1.0f; /* XXX */ - pCaps->GuardBandLeft = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_LEFT); - pCaps->GuardBandTop = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_TOP); - pCaps->GuardBandRight = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_RIGHT); - pCaps->GuardBandBottom = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_BOTTOM); + /* Values for GeForce 9600 GT */ + pCaps->MaxVertexW = 1e10f; + pCaps->GuardBandLeft = -1e9f; + pCaps->GuardBandTop = -1e9f; + pCaps->GuardBandRight = 1e9f; + pCaps->GuardBandBottom = 1e9f; pCaps->ExtentsAdjust = 0.0f; pCaps->StencilCaps = @@ -724,8 +721,6 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, /*D3DFVFCAPS_DONOTSTRIPELEMENTS |*/ D3DFVFCAPS_PSIZE; - /* XXX: Some of these are probably not in SM2.0 so cap them when I figure - * them out. For now leave them all enabled. */ pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE | D3DTEXOPCAPS_SELECTARG1 | D3DTEXOPCAPS_SELECTARG2 | @@ -796,7 +791,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, pCaps->MaxVertexShaderConst = NINE_MAX_CONST_F; pCaps->PixelShaderVersion = D3DPS_VERSION(3,0); - pCaps->PixelShader1xMaxValue = 8.0f; /* XXX: wine */ + /* Value for GeForce 9600 GT */ + pCaps->PixelShader1xMaxValue = 65504.f; pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET | D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET | diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 17a8f44..d13138b 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -57,7 +57,8 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); user_assert(!(Usage & D3DUSAGE_DYNAMIC) || - Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL); + !(Pool == D3DPOOL_MANAGED || + Pool == D3DPOOL_SCRATCH), D3DERR_INVALIDCALL); hr = NineResource9_ctor(&This->base, pParams, initResource, alloc, Type, Pool, Usage); if (FAILED(hr)) @@ -85,6 +86,9 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, util_format_has_depth(util_format_description(This->base.info.format)); list_inithead(&This->list); + list_inithead(&This->list2); + if (Pool == D3DPOOL_MANAGED) + list_add(&This->list2, &This->base.base.device->managed_textures); return D3D_OK; } @@ -98,7 +102,9 @@ NineBaseTexture9_dtor( struct NineBaseTexture9 *This ) pipe_sampler_view_reference(&This->view[1], NULL); if (This->list.prev != NULL && This->list.next != NULL) - list_del(&This->list), + list_del(&This->list); + if (This->list2.prev != NULL && This->list2.next != NULL) + list_del(&This->list2); NineResource9_dtor(&This->base); } @@ -153,6 +159,8 @@ NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, user_assert(FilterType != D3DTEXF_NONE, D3DERR_INVALIDCALL); This->mipfilter = FilterType; + This->dirty_mip = TRUE; + NineBaseTexture9_GenerateMipSubLevels(This); return D3D_OK; } @@ -310,14 +318,12 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) tex->dirty_box.width, tex->dirty_box.height, tex->dirty_box.depth); if (tex->dirty_box.width) { - for (l = 0; l <= last_level; ++l) { + for (l = min_level_dirty; l <= last_level; ++l) { u_box_minify_2d(&box, &tex->dirty_box, l); - NineVolume9_AddDirtyRegion(tex->volumes[l], &tex->dirty_box); + NineVolume9_UploadSelf(tex->volumes[l], &box); } memset(&tex->dirty_box, 0, sizeof(tex->dirty_box)); } - for (l = min_level_dirty; l <= last_level; ++l) - NineVolume9_UploadSelf(tex->volumes[l]); } else { assert(!"invalid texture type"); } @@ -361,8 +367,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) box.width = u_minify(This->base.info.width0, l); box.height = u_minify(This->base.info.height0, l); box.depth = u_minify(This->base.info.depth0, l); - NineVolume9_AddDirtyRegion(tex->volumes[l], &box); - NineVolume9_UploadSelf(tex->volumes[l]); + NineVolume9_UploadSelf(tex->volumes[l], &box); } } else { assert(!"invalid texture type"); @@ -381,8 +386,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) void WINAPI NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) { - struct pipe_resource *resource = This->base.resource; - + struct pipe_resource *resource; unsigned base_level = 0; unsigned last_level = This->base.info.last_level - This->managed.lod; unsigned first_layer = 0; @@ -405,6 +409,8 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) last_layer = util_max_layer(This->view[0]->texture, base_level); + resource = This->base.resource; + util_gen_mipmap(This->pipe, resource, resource->format, base_level, last_level, first_layer, last_layer, filter); @@ -530,6 +536,11 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This, swizzle[2] = PIPE_SWIZZLE_RED; swizzle[3] = PIPE_SWIZZLE_RED; } + } else if (resource->format == PIPE_FORMAT_RGTC2_UNORM) { + swizzle[0] = PIPE_SWIZZLE_GREEN; + swizzle[1] = PIPE_SWIZZLE_RED; + swizzle[2] = PIPE_SWIZZLE_ONE; + swizzle[3] = PIPE_SWIZZLE_ONE; } else if (resource->format != PIPE_FORMAT_A8_UNORM && resource->format != PIPE_FORMAT_RGTC1_UNORM) { /* exceptions: @@ -578,6 +589,21 @@ NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) NineBaseTexture9_UploadSelf(This); } +void +NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This ) +{ + if (This->base.pool != D3DPOOL_MANAGED || + This->managed.lod_resident == -1) + return; + + pipe_resource_reference(&This->base.resource, NULL); + This->managed.lod_resident = -1; + This->managed.dirty = TRUE; + + /* If the texture is bound, we have to re-upload it */ + BASETEX_REGISTER_UPDATE(This); +} + #ifdef DEBUG void NineBaseTexture9_Dump( struct NineBaseTexture9 *This ) diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h index 9d6fb0c..b19a621 100644 --- a/src/gallium/state_trackers/nine/basetexture9.h +++ b/src/gallium/state_trackers/nine/basetexture9.h @@ -30,7 +30,8 @@ struct NineBaseTexture9 { struct NineResource9 base; - struct list_head list; + struct list_head list; /* for update_textures */ + struct list_head list2; /* for managed_textures */ /* g3d */ struct pipe_context *pipe; @@ -94,6 +95,9 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ); void WINAPI NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ); +void +NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This ); + /* For D3DPOOL_MANAGED only (after SetLOD change): */ HRESULT NineBaseTexture9_CreatePipeResource( struct NineBaseTexture9 *This, diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index edea1f2..abba263 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -43,7 +43,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, struct pipe_screen *screen = pParams->device->screen; enum pipe_format pf; unsigned i, l, f, offset, face_size = 0; - unsigned *level_offsets; + unsigned *level_offsets = NULL; D3DSURFACE_DESC sfdesc; void *p; HRESULT hr; @@ -70,6 +70,13 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + + user_assert(!(EdgeLength % w) && !(EdgeLength % h), D3DERR_INVALIDCALL); + } + info->screen = pParams->device->screen; info->target = PIPE_TEXTURE_CUBE; info->format = pf; @@ -106,7 +113,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, face_size = nine_format_get_size_and_offsets(pf, level_offsets, EdgeLength, EdgeLength, info->last_level); - This->managed_buffer = MALLOC(6 * face_size); + This->managed_buffer = align_malloc(6 * face_size, 32); if (!This->managed_buffer) return E_OUTOFMEMORY; } @@ -150,8 +157,12 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, } } - for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */ + for (i = 0; i < 6; ++i) { + /* Textures start initially dirty */ + This->dirty_rect[i].width = EdgeLength; + This->dirty_rect[i].height = EdgeLength; This->dirty_rect[i].depth = 1; + } return D3D_OK; } @@ -259,13 +270,17 @@ NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, user_assert(FaceType < 6, D3DERR_INVALIDCALL); if (This->base.base.pool != D3DPOOL_MANAGED) { - if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) + if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) { This->base.dirty_mip = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyRect) { u_box_origin_2d(This->base.base.info.width0, diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 55948cb..99197a4 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -119,48 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf); } -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ) -{ - struct pipe_context *pipe = This->pipe; - - DBG("This=%p mask=%u\n", This, mask); - - if (mask & 0x1) { - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - - if (This->prefer_user_constbuf) { - cb.buffer = NULL; - cb.user_buffer = This->state.vs_const_f; - } else { - cb.buffer = This->constbuf_vs; - cb.user_buffer = NULL; - } - cb.buffer_size = This->vs_const_size; - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - - if (This->prefer_user_constbuf) { - cb.user_buffer = This->state.ps_const_f; - } else { - cb.buffer = This->constbuf_ps; - } - cb.buffer_size = This->ps_const_size; - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); - } - - if (mask & 0x2) { - struct pipe_poly_stipple stipple; - memset(&stipple, ~0, sizeof(stipple)); - pipe->set_polygon_stipple(pipe, &stipple); - } - - This->state.changed.group = NINE_STATE_ALL; - This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1; - This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; - This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; -} - #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n) HRESULT NineDevice9_ctor( struct NineDevice9 *This, @@ -186,6 +144,7 @@ NineDevice9_ctor( struct NineDevice9 *This, if (FAILED(hr)) { return hr; } list_inithead(&This->update_textures); + list_inithead(&This->managed_textures); This->screen = pScreen; This->caps = *pCaps; @@ -341,16 +300,19 @@ NineDevice9_ctor( struct NineDevice9 *This, This->state.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1); if (!This->state.vs_const_f || !This->state.ps_const_f || - !This->state.vs_lconstf_temp) + !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp) return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || strstr(pScreen->get_name(pScreen), "ATI")) { - This->prefer_user_constbuf = TRUE; This->driver_bugs.buggy_barycentrics = TRUE; } + /* Disable NV path for now, needs some fixes */ + This->prefer_user_constbuf = TRUE; + tmpl.target = PIPE_BUFFER; tmpl.format = PIPE_FORMAT_R8_UNORM; tmpl.height0 = 1; @@ -376,6 +338,8 @@ NineDevice9_ctor( struct NineDevice9 *This, { struct pipe_resource tmplt; struct pipe_sampler_view templ; + struct pipe_sampler_state samp; + memset(&samp, 0, sizeof(samp)); tmplt.target = PIPE_TEXTURE_2D; tmplt.width0 = 1; @@ -404,22 +368,39 @@ NineDevice9_ctor( struct NineDevice9 *This, templ.swizzle_a = PIPE_SWIZZLE_ONE; templ.target = This->dummy_texture->target; - This->dummy_sampler = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ); - if (!This->dummy_sampler) + This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ); + if (!This->dummy_sampler_view) return D3DERR_DRIVERINTERNALERROR; + + samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + samp.max_lod = 15.0f; + samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; + samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + samp.compare_mode = PIPE_TEX_COMPARE_NONE; + samp.compare_func = PIPE_FUNC_LEQUAL; + samp.normalized_coords = 1; + samp.seamless_cube_map = 1; + This->dummy_sampler_state = samp; } /* Allocate upload helper for drivers that suck (from st pov ;). */ - { - unsigned bind = 0; - This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); - This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); + This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); + This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); + This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS); + + if (!This->driver_caps.user_vbufs) + This->vertex_uploader = u_upload_create(This->pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER); + if (!This->driver_caps.user_ibufs) + This->index_uploader = u_upload_create(This->pipe, 128 * 1024, 4, PIPE_BIND_INDEX_BUFFER); + if (!This->driver_caps.user_cbufs) { + unsigned alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT); - if (!This->driver_caps.user_vbufs) bind |= PIPE_BIND_VERTEX_BUFFER; - if (!This->driver_caps.user_ibufs) bind |= PIPE_BIND_INDEX_BUFFER; - if (bind) - This->upload = u_upload_create(This->pipe, 1 << 20, 4, bind); + This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size, + alignment, PIPE_BIND_CONSTANT_BUFFER); } This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION); @@ -429,10 +410,15 @@ NineDevice9_ctor( struct NineDevice9 *This, nine_ff_init(This); /* initialize fixed function code */ NineDevice9_SetDefaultState(This, FALSE); - NineDevice9_RestoreNonCSOState(This, ~0); + + { + struct pipe_poly_stipple stipple; + memset(&stipple, ~0, sizeof(stipple)); + This->pipe->set_polygon_stipple(This->pipe, &stipple); + } This->update = &This->state; - nine_update_state(This, ~0); + nine_update_state(This); ID3DPresentGroup_Release(This->present); @@ -452,12 +438,16 @@ NineDevice9_dtor( struct NineDevice9 *This ) nine_ff_fini(This); nine_state_clear(&This->state, TRUE); - if (This->upload) - u_upload_destroy(This->upload); + if (This->vertex_uploader) + u_upload_destroy(This->vertex_uploader); + if (This->index_uploader) + u_upload_destroy(This->index_uploader); + if (This->constbuf_uploader) + u_upload_destroy(This->constbuf_uploader); nine_bind(&This->record, NULL); - pipe_sampler_view_reference(&This->dummy_sampler, NULL); + pipe_sampler_view_reference(&This->dummy_sampler_view, NULL); pipe_resource_reference(&This->dummy_texture, NULL); pipe_resource_reference(&This->constbuf_vs, NULL); pipe_resource_reference(&This->constbuf_ps, NULL); @@ -465,6 +455,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->state.vs_const_f); FREE(This->state.ps_const_f); FREE(This->state.vs_lconstf_temp); + FREE(This->state.ps_lconstf_temp); if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) @@ -547,10 +538,14 @@ NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) HRESULT WINAPI NineDevice9_EvictManagedResources( struct NineDevice9 *This ) { - /* We don't really need to do anything here, but might want to free up - * the GPU virtual address space by killing pipe_resources. - */ - STUB(D3D_OK); + struct NineBaseTexture9 *tex; + + DBG("This=%p\n", This); + LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) { + NineBaseTexture9_UnLoad(tex); + } + + return D3D_OK; } HRESULT WINAPI @@ -599,11 +594,11 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap ) { - /* TODO: hardware cursor */ struct NineSurface9 *surf = NineSurface9(pCursorBitmap); struct pipe_context *pipe = This->pipe; struct pipe_box box; struct pipe_transfer *transfer; + BOOL hw_cursor; void *ptr; DBG_FLAG(DBG_SWAPCHAIN, "This=%p XHotSpot=%u YHotSpot=%u " @@ -611,8 +606,15 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, user_assert(pCursorBitmap, D3DERR_INVALIDCALL); - This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0); - This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0); + if (This->swapchains[0]->params.Windowed) { + This->cursor.w = MIN2(surf->desc.Width, 32); + This->cursor.h = MIN2(surf->desc.Height, 32); + hw_cursor = 1; /* always use hw cursor for windowed mode */ + } else { + This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0); + This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0); + hw_cursor = This->cursor.w == 32 && This->cursor.h == 32; + } u_box_origin_2d(This->cursor.w, This->cursor.h, &box); @@ -643,16 +645,21 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, lock.pBits, lock.Pitch, This->cursor.w, This->cursor.h); - if (!This->cursor.software && - This->cursor.w == 32 && This->cursor.h == 32) - ID3DPresent_SetCursor(This->swapchains[0]->present, - lock.pBits, &This->cursor.hotspot, - This->cursor.visible); + if (hw_cursor) + hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present, + lock.pBits, + &This->cursor.hotspot, + This->cursor.visible) == D3D_OK; NineSurface9_UnlockRect(surf); } pipe->transfer_unmap(pipe, transfer); + /* hide cursor if we emulate it */ + if (!hw_cursor) + ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, FALSE); + This->cursor.software = !hw_cursor; + return D3D_OK; } @@ -670,7 +677,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This, This->cursor.pos.y = Y; if (!This->cursor.software) - ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos); + This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK; } BOOL WINAPI @@ -683,7 +690,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This, This->cursor.visible = bShow && (This->cursor.hotspot.x != -1); if (!This->cursor.software) - ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow); + This->cursor.software = ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow) != D3D_OK; return old; } @@ -752,8 +759,8 @@ NineDevice9_Reset( struct NineDevice9 *This, for (i = 0; i < This->nswapchains; ++i) { D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i]; hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL); - if (FAILED(hr)) - return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST; + if (hr != D3D_OK) + return hr; } nine_pipe_context_clear(This); @@ -1108,6 +1115,13 @@ create_zs_or_rt_surface(struct NineDevice9 *This, default: break; } + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(templ.format); + const unsigned h = util_format_get_blockheight(templ.format); + + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) { /* resource_create doesn't return an error code, so check format here */ user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL); @@ -1173,6 +1187,8 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This, { struct NineSurface9 *dst = NineSurface9(pDestinationSurface); struct NineSurface9 *src = NineSurface9(pSourceSurface); + int copy_width, copy_height; + RECT destRect; DBG("This=%p pSourceSurface=%p pDestinationSurface=%p " "pSourceRect=%p pDestPoint=%p\n", This, @@ -1184,13 +1200,75 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This, if (pDestPoint) DBG("pDestPoint = (%u,%u)\n", pDestPoint->x, pDestPoint->y); + user_assert(dst && src, D3DERR_INVALIDCALL); + user_assert(dst->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); user_assert(src->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL); user_assert(dst->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); user_assert(src->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); - return NineSurface9_CopySurface(dst, src, pDestPoint, pSourceRect); + user_assert(!src->lock_count, D3DERR_INVALIDCALL); + user_assert(!dst->lock_count, D3DERR_INVALIDCALL); + + user_assert(dst->desc.Format == src->desc.Format, D3DERR_INVALIDCALL); + user_assert(!depth_stencil_format(dst->desc.Format), D3DERR_INVALIDCALL); + + if (pSourceRect) { + copy_width = pSourceRect->right - pSourceRect->left; + copy_height = pSourceRect->bottom - pSourceRect->top; + + user_assert(pSourceRect->left >= 0 && + copy_width > 0 && + pSourceRect->right <= src->desc.Width && + pSourceRect->top >= 0 && + copy_height > 0 && + pSourceRect->bottom <= src->desc.Height, + D3DERR_INVALIDCALL); + } else { + copy_width = src->desc.Width; + copy_height = src->desc.Height; + } + + destRect.right = copy_width; + destRect.bottom = copy_height; + + if (pDestPoint) { + user_assert(pDestPoint->x >= 0 && pDestPoint->y >= 0, + D3DERR_INVALIDCALL); + destRect.right += pDestPoint->x; + destRect.bottom += pDestPoint->y; + } + + user_assert(destRect.right <= dst->desc.Width && + destRect.bottom <= dst->desc.Height, + D3DERR_INVALIDCALL); + + if (compressed_format(dst->desc.Format)) { + const unsigned w = util_format_get_blockwidth(dst->base.info.format); + const unsigned h = util_format_get_blockheight(dst->base.info.format); + + if (pDestPoint) { + user_assert(!(pDestPoint->x % w) && !(pDestPoint->y % h), + D3DERR_INVALIDCALL); + } + + if (pSourceRect) { + user_assert(!(pSourceRect->left % w) && !(pSourceRect->top % h), + D3DERR_INVALIDCALL); + } + if (!(copy_width == src->desc.Width && + copy_width == dst->desc.Width && + copy_height == src->desc.Height && + copy_height == dst->desc.Height)) { + user_assert(!(copy_width % w) && !(copy_height % h), + D3DERR_INVALIDCALL); + } + } + + NineSurface9_CopyMemToDefault(dst, src, pDestPoint, pSourceRect); + + return D3D_OK; } HRESULT WINAPI @@ -1202,6 +1280,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture); unsigned l, m; unsigned last_level = dstb->base.info.last_level; + RECT rect; DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This, pSourceTexture, pDestinationTexture); @@ -1227,10 +1306,6 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL); - /* TODO: We can restrict the update to the dirty portions of the source. - * Yes, this seems silly, but it's what MSDN says ... - */ - /* Find src level that matches dst level 0: */ user_assert(srcb->base.info.width0 >= dstb->base.info.width0 && srcb->base.info.height0 >= dstb->base.info.height0 && @@ -1254,9 +1329,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineTexture9 *dst = NineTexture9(dstb); struct NineTexture9 *src = NineTexture9(srcb); - for (l = 0; l <= last_level; ++l, ++m) - NineSurface9_CopySurface(dst->surfaces[l], - src->surfaces[m], NULL, NULL); + if (src->dirty_rect.width == 0) + return D3D_OK; + + pipe_box_to_rect(&rect, &src->dirty_rect); + for (l = 0; l < m; ++l) + rect_minify_inclusive(&rect); + + for (l = 0; l <= last_level; ++l, ++m) { + fit_rect_format_inclusive(dst->base.base.info.format, + &rect, + dst->surfaces[l]->desc.Width, + dst->surfaces[l]->desc.Height); + NineSurface9_CopyMemToDefault(dst->surfaces[l], + src->surfaces[m], + (POINT *)&rect, + &rect); + rect_minify_inclusive(&rect); + } + u_box_origin_2d(0, 0, &src->dirty_rect); } else if (dstb->base.type == D3DRTYPE_CUBETEXTURE) { struct NineCubeTexture9 *dst = NineCubeTexture9(dstb); @@ -1265,10 +1356,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, /* GPUs usually have them stored as arrays of mip-mapped 2D textures. */ for (z = 0; z < 6; ++z) { + if (src->dirty_rect[z].width == 0) + continue; + + pipe_box_to_rect(&rect, &src->dirty_rect[z]); + for (l = 0; l < m; ++l) + rect_minify_inclusive(&rect); + for (l = 0; l <= last_level; ++l, ++m) { - NineSurface9_CopySurface(dst->surfaces[l * 6 + z], - src->surfaces[m * 6 + z], NULL, NULL); + fit_rect_format_inclusive(dst->base.base.info.format, + &rect, + dst->surfaces[l * 6 + z]->desc.Width, + dst->surfaces[l * 6 + z]->desc.Height); + NineSurface9_CopyMemToDefault(dst->surfaces[l * 6 + z], + src->surfaces[m * 6 + z], + (POINT *)&rect, + &rect); + rect_minify_inclusive(&rect); } + u_box_origin_2d(0, 0, &src->dirty_rect[z]); m -= l; } } else @@ -1276,9 +1382,12 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineVolumeTexture9 *dst = NineVolumeTexture9(dstb); struct NineVolumeTexture9 *src = NineVolumeTexture9(srcb); + if (src->dirty_box.width == 0) + return D3D_OK; for (l = 0; l <= last_level; ++l, ++m) - NineVolume9_CopyVolume(dst->volumes[l], - src->volumes[m], 0, 0, 0, NULL); + NineVolume9_CopyMemToDefault(dst->volumes[l], + src->volumes[m], 0, 0, 0, NULL); + u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box); } else{ assert(!"invalid texture type"); } @@ -1308,7 +1417,12 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This, user_assert(dst->desc.MultiSampleType < 2, D3DERR_INVALIDCALL); user_assert(src->desc.MultiSampleType < 2, D3DERR_INVALIDCALL); - return NineSurface9_CopySurface(dst, src, NULL, NULL); + user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL); + user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL); + + NineSurface9_CopyDefaultToMem(dst, src); + + return D3D_OK; } HRESULT WINAPI @@ -1448,6 +1562,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This, blit.filter = Filter == D3DTEXF_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; + blit.alpha_blend = FALSE; /* If both of a src and dst dimension are negative, flip them. */ if (blit.dst.box.width < 0 && blit.src.box.width < 0) { @@ -1464,8 +1579,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This, user_assert(!scaled || dst != src, D3DERR_INVALIDCALL); user_assert(!scaled || - !NineSurface9_IsOffscreenPlain(dst) || + !NineSurface9_IsOffscreenPlain(dst), D3DERR_INVALIDCALL); + user_assert(!NineSurface9_IsOffscreenPlain(dst) || NineSurface9_IsOffscreenPlain(src), D3DERR_INVALIDCALL); + user_assert(NineSurface9_IsOffscreenPlain(dst) || + dst->desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL), + D3DERR_INVALIDCALL); user_assert(!scaled || (!util_format_is_compressed(dst->base.info.format) && !util_format_is_compressed(src->base.info.format)), @@ -1561,11 +1680,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This, } d3dcolor_to_pipe_color_union(&rgba, color); - fallback = - !This->screen->is_format_supported(This->screen, surf->base.info.format, - surf->base.info.target, - surf->base.info.nr_samples, - PIPE_BIND_RENDER_TARGET); + fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET); + if (!fallback) { psurf = NineSurface9_GetSurface(surf, 0); if (!psurf) @@ -1774,7 +1890,7 @@ NineDevice9_Clear( struct NineDevice9 *This, return D3D_OK; d3dcolor_to_pipe_color_union(&rgba, Color); - nine_update_state(This, NINE_STATE_FB); + nine_update_state_framebuffer(This); rect.x1 = This->state.viewport.X; rect.y1 = This->state.viewport.Y; @@ -2012,8 +2128,10 @@ NineDevice9_SetLight( struct NineDevice9 *This, return E_OUTOFMEMORY; state->ff.num_lights = N; - for (; n < Index; ++n) + for (; n < Index; ++n) { + memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9)); state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; + } } state->ff.light[Index] = *pLight; @@ -2508,6 +2626,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Value ) { struct nine_state *state = This->update; + int bumpmap_index = -1; DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value); nine_dump_D3DTSS_value(DBG_FF, Type, Value); @@ -2516,6 +2635,36 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL); state->ff.tex_stage[Stage][Type] = Value; + switch (Type) { + case D3DTSS_BUMPENVMAT00: + bumpmap_index = 4 * Stage; + break; + case D3DTSS_BUMPENVMAT10: + bumpmap_index = 4 * Stage + 1; + break; + case D3DTSS_BUMPENVMAT01: + bumpmap_index = 4 * Stage + 2; + break; + case D3DTSS_BUMPENVMAT11: + bumpmap_index = 4 * Stage + 3; + break; + case D3DTSS_BUMPENVLSCALE: + bumpmap_index = 4 * 8 + 2 * Stage; + break; + case D3DTSS_BUMPENVLOFFSET: + bumpmap_index = 4 * 8 + 2 * Stage + 1; + break; + case D3DTSS_TEXTURETRANSFORMFLAGS: + state->changed.group |= NINE_STATE_PS1X_SHADER; + break; + default: + break; + } + + if (bumpmap_index >= 0) { + state->bumpmap_vars[bumpmap_index] = Value; + state->changed.group |= NINE_STATE_PS_CONST; + } state->changed.group |= NINE_STATE_FF_PSSTAGES; state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32); @@ -2560,12 +2709,11 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This, if (Sampler >= D3DDMAPSAMPLER) Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS; - state->samp[Sampler][Type] = Value; - state->changed.group |= NINE_STATE_SAMPLER; - state->changed.sampler[Sampler] |= 1 << Type; - - if (Type == D3DSAMP_SRGBTEXTURE) - state->changed.srgb = TRUE; + if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) { + state->samp[Sampler][Type] = Value; + state->changed.group |= NINE_STATE_SAMPLER; + state->changed.sampler[Sampler] |= 1 << Type; + } return D3D_OK; } @@ -2724,7 +2872,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This, DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n", This, PrimitiveType, StartVertex, PrimitiveCount); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = FALSE; @@ -2757,7 +2905,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, user_assert(This->state.idxbuf, D3DERR_INVALIDCALL); user_assert(This->state.vdecl, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = TRUE; @@ -2789,7 +2937,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, user_assert(pVertexStreamZeroData && VertexStreamZeroStride, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = FALSE; @@ -2803,13 +2951,16 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, vtxbuf.buffer = NULL; vtxbuf.user_buffer = pVertexStreamZeroData; - if (!This->driver_caps.user_vbufs) - u_upload_data(This->upload, + if (!This->driver_caps.user_vbufs) { + u_upload_data(This->vertex_uploader, 0, (info.max_index + 1) * VertexStreamZeroStride, /* XXX */ vtxbuf.user_buffer, &vtxbuf.buffer_offset, &vtxbuf.buffer); + u_upload_unmap(This->vertex_uploader); + vtxbuf.user_buffer = NULL; + } This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf); @@ -2851,7 +3002,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, user_assert(IndexDataFormat == D3DFMT_INDEX16 || IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = TRUE; @@ -2872,23 +3023,28 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, if (!This->driver_caps.user_vbufs) { const unsigned base = info.min_index * VertexStreamZeroStride; - u_upload_data(This->upload, + u_upload_data(This->vertex_uploader, base, (info.max_index - info.min_index + 1) * VertexStreamZeroStride, /* XXX */ (const uint8_t *)vbuf.user_buffer + base, &vbuf.buffer_offset, &vbuf.buffer); + u_upload_unmap(This->vertex_uploader); /* Won't be used: */ vbuf.buffer_offset -= base; + vbuf.user_buffer = NULL; } - if (!This->driver_caps.user_ibufs) - u_upload_data(This->upload, + if (!This->driver_caps.user_ibufs) { + u_upload_data(This->index_uploader, 0, info.count * ibuf.index_size, ibuf.user_buffer, &ibuf.offset, &ibuf.buffer); + u_upload_unmap(This->index_uploader); + ibuf.user_buffer = NULL; + } This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf); This->pipe->set_index_buffer(This->pipe, &ibuf); @@ -2935,7 +3091,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) STUB(D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); /* TODO: Create shader with stream output. */ STUB(D3DERR_INVALIDCALL); @@ -3105,6 +3261,13 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This, DBG("This=%p pShader=%p\n", This, pShader); + if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader) + return D3D_OK; + + /* ff -> non-ff: commit back non-ff constants */ + if (!state->vs && pShader) + state->commit |= NINE_STATE_COMMIT_CONST_VS; + nine_bind(&state->vs, pShader); state->changed.group |= NINE_STATE_VS; @@ -3139,6 +3302,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0]))) + return D3D_OK; + } + memcpy(&state->vs_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0])); @@ -3188,6 +3357,11 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.vs_integer) { + if (!This->is_recording) { + if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData, + Vector4iCount * sizeof(state->vs_const_i[0]))) + return D3D_OK; + } memcpy(&state->vs_const_i[StartRegister][0], pConstantData, Vector4iCount * sizeof(state->vs_const_i[0])); @@ -3252,6 +3426,16 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + bool noChange = true; + for (i = 0; i < BoolCount; i++) { + if (!!state->vs_const_b[StartRegister + i] != !!pConstantData[i]) + noChange = false; + } + if (noChange) + return D3D_OK; + } + for (i = 0; i < BoolCount; i++) state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; @@ -3433,6 +3617,13 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This, DBG("This=%p pShader=%p\n", This, pShader); + if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader) + return D3D_OK; + + /* ff -> non-ff: commit back non-ff constants */ + if (!state->ps && pShader) + state->commit |= NINE_STATE_COMMIT_CONST_PS; + nine_bind(&state->ps, pShader); state->changed.group |= NINE_STATE_PS; @@ -3473,6 +3664,12 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->ps_const_f[0]))) + return D3D_OK; + } + memcpy(&state->ps_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->ps_const_f[0])); @@ -3522,6 +3719,11 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.ps_integer) { + if (!This->is_recording) { + if (!memcmp(&state->ps_const_i[StartRegister][0], pConstantData, + Vector4iCount * sizeof(state->ps_const_i[0]))) + return D3D_OK; + } memcpy(&state->ps_const_i[StartRegister][0], pConstantData, Vector4iCount * sizeof(state->ps_const_i[0])); @@ -3585,6 +3787,16 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + bool noChange = true; + for (i = 0; i < BoolCount; i++) { + if (!!state->ps_const_b[StartRegister + i] != !!pConstantData[i]) + noChange = false; + } + if (noChange) + return D3D_OK; + } + for (i = 0; i < BoolCount; i++) state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index 7460745..98d9c4d 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -69,6 +69,7 @@ struct NineDevice9 struct nine_state state; /* device state */ struct list_head update_textures; + struct list_head managed_textures; boolean is_recording; boolean in_scene; @@ -83,7 +84,8 @@ struct NineDevice9 uint16_t max_ps_const_f; struct pipe_resource *dummy_texture; - struct pipe_sampler_view *dummy_sampler; + struct pipe_sampler_view *dummy_sampler_view; + struct pipe_sampler_state dummy_sampler_state; struct gen_mipmap_state *gen_mipmap; @@ -113,6 +115,7 @@ struct NineDevice9 struct { boolean user_vbufs; boolean user_ibufs; + boolean user_cbufs; boolean window_space_position_support; boolean vs_integer; boolean ps_integer; @@ -122,7 +125,9 @@ struct NineDevice9 boolean buggy_barycentrics; } driver_bugs; - struct u_upload_mgr *upload; + struct u_upload_mgr *vertex_uploader; + struct u_upload_mgr *index_uploader; + struct u_upload_mgr *constbuf_uploader; struct nine_range_pool range_pool; @@ -180,10 +185,6 @@ NineDevice9_GetCSO( struct NineDevice9 *This ); const D3DCAPS9 * NineDevice9_GetCaps( struct NineDevice9 *This ); -/* Mask: 0x1 = constant buffers, 0x2 = stipple */ -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ); - /*** Direct3D public ***/ HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index 8a53f0d..fe8933b 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -22,6 +22,7 @@ #include "tgsi/tgsi_dump.h" #include "util/u_box.h" #include "util/u_hash_table.h" +#include "util/u_upload_mgr.h" #define NINE_TGSI_LAZY_DEVS 1 @@ -30,13 +31,6 @@ #define NINE_FF_NUM_VS_CONST 256 #define NINE_FF_NUM_PS_CONST 24 -#define NINED3DTSS_TCI_DISABLE 0 -#define NINED3DTSS_TCI_PASSTHRU 1 -#define NINED3DTSS_TCI_CAMERASPACENORMAL 2 -#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3 -#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4 -#define NINED3DTSS_TCI_SPHEREMAP 5 - struct fvec4 { float x, y, z, w; @@ -63,16 +57,20 @@ struct nine_ff_vs_key uint32_t fog_range : 1; uint32_t color0in_one : 1; uint32_t color1in_one : 1; - uint32_t pad1 : 8; - uint32_t tc_gen : 24; /* 8 * 3 bits */ - uint32_t pad2 : 8; - uint32_t tc_idx : 24; + uint32_t fog : 1; + uint32_t pad1 : 7; + uint32_t tc_dim_input: 16; /* 8 * 2 bits */ + uint32_t pad2 : 16; + uint32_t tc_dim_output: 24; /* 8 * 3 bits */ uint32_t pad3 : 8; - uint32_t tc_dim : 24; /* 8 * 3 bits */ + uint32_t tc_gen : 24; /* 8 * 3 bits */ uint32_t pad4 : 8; + uint32_t tc_idx : 24; + uint32_t pad5 : 8; + uint32_t passthrough; }; - uint64_t value64[2]; /* don't forget to resize VertexShader9.ff_key */ - uint32_t value32[4]; + uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ + uint32_t value32[6]; }; }; @@ -106,15 +104,18 @@ struct nine_ff_ps_key uint32_t alphaarg2 : 3; uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ - uint32_t projected : 1; + uint32_t pad : 1; /* that's 32 bit exactly */ } ts[8]; - uint32_t fog : 1; /* for vFog with programmable VS */ + uint32_t projected : 16; + uint32_t fog : 1; /* for vFog coming from VS */ uint32_t fog_mode : 2; - uint32_t specular : 1; /* 9 32-bit words with this */ + uint32_t specular : 1; + uint32_t pad1 : 12; /* 9 32-bit words with this */ uint8_t colorarg_b4[3]; uint8_t colorarg_b5[3]; uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ + uint8_t pad2[3]; }; uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ uint32_t value32[12]; @@ -222,7 +223,6 @@ static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) * CONST[28].x___ RS.FogEnd * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) * CONST[28].__z_ RS.FogDensity - * CONST[29] RS.FogColor * CONST[30].x___ TWEENFACTOR * @@ -334,16 +334,15 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) { const struct nine_ff_vs_key *key = vs->key; struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX); - struct ureg_dst oPos, oCol[2], oTex[8], oPsz, oFog; - struct ureg_dst rCol[2]; /* oCol if no fog, TEMP otherwise */ + struct ureg_dst oPos, oCol[2], oPsz, oFog; struct ureg_dst rVtx, rNrm; struct ureg_dst r[8]; struct ureg_dst AR; - struct ureg_dst tmp, tmp_x, tmp_z; + struct ureg_dst tmp, tmp_x, tmp_y, tmp_z; unsigned i, c; unsigned label[32], l = 0; unsigned num_r = 8; - boolean need_rNrm = key->lighting || key->pointscale; + boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); boolean need_rVtx = key->lighting || key->fog_mode; const unsigned texcoord_sn = get_texcoord_sn(device->screen); @@ -406,9 +405,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (key->vertexpointsize) vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); - if (key->vertexblend_indexed) + if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); - if (key->vertexblend) + if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); if (key->vertextween) { vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); @@ -420,19 +419,16 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); + if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { + oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0); + oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); + } if (key->vertexpointsize || key->pointscale) { oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, TGSI_WRITEMASK_X, 0, 1); oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); } - if (key->fog_mode) { - /* We apply fog to the vertex colors, oFog is for programmable shaders only ? - */ - oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0, - TGSI_WRITEMASK_X, 0, 1); - oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); - } /* Declare TEMPs: */ @@ -440,18 +436,11 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) r[i] = ureg_DECL_local_temporary(ureg); tmp = r[0]; tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); if (key->lighting || key->vertexblend) AR = ureg_DECL_address(ureg); - if (key->fog_mode) { - rCol[0] = r[2]; - rCol[1] = r[3]; - } else { - rCol[0] = oCol[0]; - rCol[1] = oCol[1]; - } - rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ); rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ); @@ -560,8 +549,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1)); #endif } else if (key->pointscale) { - struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); - struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); @@ -582,72 +569,85 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) #endif } - /* Texture coordinate generation: - * XXX: D3DTTFF_PROJECTED, transform matrix - */ for (i = 0; i < 8; ++i) { - struct ureg_dst dst[5]; - struct ureg_src src; - unsigned c; + struct ureg_dst oTex, input_coord, transformed, t; + unsigned c, writemask; const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; - const unsigned dim = (key->tc_dim >> (i * 3)) & 0x7; + unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); + const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; + /* No texture output of index s */ if (tci == NINED3DTSS_TCI_DISABLE) continue; - oTex[i] = ureg_DECL_output(ureg, texcoord_sn, i); - - if (tci == NINED3DTSS_TCI_PASSTHRU) - vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); - - if (!dim) { - dst[c = 4] = oTex[i]; - } else { - dst[4] = r[5]; - src = ureg_src(dst[4]); - for (c = 0; c < (dim - 1); ++c) - dst[c] = ureg_writemask(tmp, (1 << dim) - 1); - dst[c] = ureg_writemask(oTex[i], (1 << dim) - 1); - } + oTex = ureg_DECL_output(ureg, texcoord_sn, i); + input_coord = r[5]; + transformed = r[6]; + /* Get the coordinate */ switch (tci) { case NINED3DTSS_TCI_PASSTHRU: - ureg_MOV(ureg, dst[4], vs->aTex[idx]); + /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * + * Else the idx is used only to determine wrapping mode. */ + vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); + ureg_MOV(ureg, input_coord, vs->aTex[idx]); break; case NINED3DTSS_TCI_CAMERASPACENORMAL: - assert(dim <= 3); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rNrm)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEPOSITION: - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: tmp.WriteMask = TGSI_WRITEMASK_XYZ; ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm)); ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp)); ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); - ureg_SUB(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; tmp.WriteMask = TGSI_WRITEMASK_XYZW; break; case NINED3DTSS_TCI_SPHEREMAP: assert(!"TODO"); break; default: + assert(0); break; } - if (!dim) - continue; - dst[c].WriteMask = ~dst[c].WriteMask; - if (dst[c].WriteMask) - ureg_MOV(ureg, dst[c], src); /* store untransformed components */ - dst[c].WriteMask = ~dst[c].WriteMask; - if (dim > 0) ureg_MUL(ureg, dst[0], _XXXX(src), _CONST(128 + i * 4)); - if (dim > 1) ureg_MAD(ureg, dst[1], _YYYY(src), _CONST(129 + i * 4), ureg_src(tmp)); - if (dim > 2) ureg_MAD(ureg, dst[2], _ZZZZ(src), _CONST(130 + i * 4), ureg_src(tmp)); - if (dim > 3) ureg_MAD(ureg, dst[3], _WWWW(src), _CONST(131 + i * 4), ureg_src(tmp)); + + /* Apply the transformation */ + /* dim_output == 0 => do not transform the components. + * XYZRHW also disables transformation */ + if (!dim_output || key->position_t) { + transformed = input_coord; + writemask = TGSI_WRITEMASK_XYZW; + } else { + for (c = 0; c < dim_output; c++) { + t = ureg_writemask(transformed, 1 << c); + switch (dim_input) { + /* dim_input = 1 2 3: -> we add trailing 1 to input*/ + case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); + break; + case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); + ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); + break; + case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); + ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); + break; + case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; + default: + assert(0); + } + } + writemask = (1 << dim_output) - 1; + } + + ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); } /* === Lighting: @@ -692,8 +692,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * specular += light.specular * atten * powFact; */ if (key->lighting) { - struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); - struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W); struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ); struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ); @@ -851,22 +849,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE); } - ureg_MAD(ureg, rCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); - ureg_MUL(ureg, rCol[1], ureg_src(rS), vs->mtlS); + ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); + ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); } else /* COLOR */ if (key->darkness) { if (key->mtl_emissive == 0 && key->mtl_ambient == 0) { - ureg_MAD(ureg, rCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19)); + ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19)); } else { - ureg_MAD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); + ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE); - ureg_ADD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp)); + ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp)); } - ureg_MUL(ureg, rCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS); + ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS); } else { - ureg_MOV(ureg, rCol[0], vs->aCol[0]); - ureg_MOV(ureg, rCol[1], vs->aCol[1]); + ureg_MOV(ureg, oCol[0], vs->aCol[0]); + ureg_MOV(ureg, oCol[1], vs->aCol[1]); } /* === Process fog. @@ -874,10 +872,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * exp(x) = ex2(log2(e) * x) */ if (key->fog_mode) { - /* Fog doesn't affect alpha, TODO: combine with light code output */ - ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), _W(rCol[0])); - ureg_MOV(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_W), _W(rCol[1])); - if (key->position_t) { ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } else @@ -905,10 +899,58 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); } ureg_MOV(ureg, oFog, _X(tmp)); - ureg_LRP(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[0]), _CONST(29)); - ureg_LRP(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[1]), _CONST(29)); + } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { + ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } + if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aWgt; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aInd; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aNrm; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); + input = ureg_scalar(input, TGSI_SWIZZLE_X); + output = oFog; + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { + (void) 0; /* TODO: replace z of position output ? */ + } + + if (key->position_t && device->driver_caps.window_space_position_support) ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); @@ -1270,10 +1312,18 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { } - if (key->ts[s].projected) - ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); - else + if (key->projected & (3 << (s *2))) { + unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); + if (dim == 4) + ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); + else { + ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1)); + ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]); + ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); + } + } else { ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); + } } if (s == 0 && @@ -1316,6 +1366,10 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) colorarg[2] != alphaarg[2]) dst.WriteMask = TGSI_WRITEMASK_XYZ; + /* Special DOTPRODUCT behaviour (see wine tests) */ + if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) + dst.WriteMask = TGSI_WRITEMASK_XYZW; + if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); @@ -1406,12 +1460,18 @@ nine_ff_get_vs(struct NineDevice9 *device) else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { s = usage / NINE_DECLUSAGE_COUNT; if (s < 8) - input_texture_coord[s] = 1; + input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type); else DBG("FF given texture coordinate >= 8. Ignoring\n"); - } + } else if (usage < NINE_DECLUSAGE_NONE) + key.passthrough |= 1 << usage; } } + /* ff vs + ps 3.0: some elements are passed to the ps (wine test). + * We do restrict to indices 0 */ + key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | + (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | + (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); if (!key.vertexpointsize) key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE]; @@ -1427,6 +1487,7 @@ nine_ff_get_vs(struct NineDevice9 *device) key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE]; key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE]; } + key.fog = !!state->rs[D3DRS_FOGENABLE]; key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0; if (key.fog_mode) key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE]; @@ -1448,7 +1509,7 @@ nine_ff_get_vs(struct NineDevice9 *device) for (s = 0; s < 8; ++s) { unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; - unsigned dim = MIN2(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7, 4); + unsigned dim; if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) gen = NINED3DTSS_TCI_PASSTHRU; @@ -1458,7 +1519,14 @@ nine_ff_get_vs(struct NineDevice9 *device) key.tc_gen |= gen << (s * 3); key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3); - key.tc_dim |= dim << (s * 3); + key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2); + + dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; + if (dim > 4) + dim = input_texture_coord[s]; + if (dim == 1) /* NV behaviour */ + dim = 0; + key.tc_dim_output |= dim << (s * 3); } vs = util_hash_table_get(device->ff.ht_vs, &key); @@ -1473,6 +1541,7 @@ nine_ff_get_vs(struct NineDevice9 *device) memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs); + (void)err; assert(err == PIPE_OK); device->ff.num_vs++; NineUnknown_ConvertRefToBind(NineUnknown(vs)); @@ -1543,8 +1612,6 @@ nine_ff_get_ps(struct NineDevice9 *device) } key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; - key.ts[s].projected = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED); - if (state->texture[s]) { switch (state->texture[s]->base.type) { case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; @@ -1558,10 +1625,14 @@ nine_ff_get_ps(struct NineDevice9 *device) key.ts[s].textarget = 1; } } + + key.projected = nine_ff_get_projected_key(state); + for (; s < 8; ++s) key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; if (state->rs[D3DRS_FOGENABLE]) key.fog_mode = state->rs[D3DRS_FOGTABLEMODE]; + key.fog = !!state->rs[D3DRS_FOGENABLE]; ps = util_hash_table_get(device->ff.ht_ps, &key); if (ps) @@ -1573,6 +1644,7 @@ nine_ff_get_ps(struct NineDevice9 *device) memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps); + (void)err; assert(err == PIPE_OK); device->ff.num_ps++; NineUnknown_ConvertRefToBind(NineUnknown(ps)); @@ -1689,7 +1761,6 @@ nine_ff_load_point_and_fog_params(struct NineDevice9 *device) if (isinf(dst[28].y)) dst[28].y = 0.0f; dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]); - d3dcolor_to_rgba(&dst[29].x, state->rs[D3DRS_FOGCOLOR]); } static void @@ -1703,7 +1774,7 @@ nine_ff_load_tex_matrices(struct NineDevice9 *device) return; for (s = 0; s < 8; ++s) { if (IS_D3DTS_DIRTY(state, TEXTURE0 + s)) - M[32 + s] = *nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE); + nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE)); } } @@ -1762,28 +1833,22 @@ nine_ff_load_viewport_info(struct NineDevice9 *device) void nine_ff_update(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; + struct pipe_constant_buffer cb; DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps); /* NOTE: the only reference belongs to the hash table */ - if (!device->state.vs) + if (!device->state.vs) { device->ff.vs = nine_ff_get_vs(device); - if (!device->state.ps) + device->state.changed.group |= NINE_STATE_VS; + } + if (!device->state.ps) { device->ff.ps = nine_ff_get_ps(device); + device->state.changed.group |= NINE_STATE_PS; + } if (!device->state.vs) { - if (device->state.ff.clobber.vs_const) { - device->state.ff.clobber.vs_const = FALSE; - device->state.changed.group |= - NINE_STATE_FF_VSTRANSF | - NINE_STATE_FF_MATERIAL | - NINE_STATE_FF_LIGHTING | - NINE_STATE_FF_OTHER; - device->state.ff.changed.transform[0] |= 0xff000c; - device->state.ff.changed.transform[8] |= 0xff; - } nine_ff_load_vs_transforms(device); nine_ff_load_tex_matrices(device); nine_ff_load_lights(device); @@ -1792,57 +1857,45 @@ nine_ff_update(struct NineDevice9 *device) memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform)); - device->state.changed.group |= NINE_STATE_VS; - device->state.changed.group |= NINE_STATE_VS_CONST; - - if (device->prefer_user_constbuf) { - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - cb.buffer = NULL; - cb.user_buffer = device->ff.vs_const; - cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - } else { - struct pipe_box box; - u_box_1d(0, NINE_FF_NUM_VS_CONST * 4 * sizeof(float), &box); - pipe->transfer_inline_write(pipe, device->constbuf_vs, 0, - 0, &box, - device->ff.vs_const, 0, 0); - nine_ranges_insert(&device->state.changed.vs_const_f, 0, NINE_FF_NUM_VS_CONST, - &device->range_pool); + cb.buffer_offset = 0; + cb.buffer = NULL; + cb.user_buffer = device->ff.vs_const; + cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; } + state->pipe.cb_vs_ff = cb; + state->commit |= NINE_STATE_COMMIT_CONST_VS; } if (!device->state.ps) { - if (device->state.ff.clobber.ps_const) { - device->state.ff.clobber.ps_const = FALSE; - device->state.changed.group |= - NINE_STATE_FF_PSSTAGES | - NINE_STATE_FF_OTHER; - } nine_ff_load_ps_params(device); - device->state.changed.group |= NINE_STATE_PS; - device->state.changed.group |= NINE_STATE_PS_CONST; - - if (device->prefer_user_constbuf) { - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - cb.buffer = NULL; - cb.user_buffer = device->ff.ps_const; - cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); - } else { - struct pipe_box box; - u_box_1d(0, NINE_FF_NUM_PS_CONST * 4 * sizeof(float), &box); - pipe->transfer_inline_write(pipe, device->constbuf_ps, 0, - 0, &box, - device->ff.ps_const, 0, 0); - nine_ranges_insert(&device->state.changed.ps_const_f, 0, NINE_FF_NUM_PS_CONST, - &device->range_pool); + cb.buffer_offset = 0; + cb.buffer = NULL; + cb.user_buffer = device->ff.ps_const; + cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; } + state->pipe.cb_ps_ff = cb; + state->commit |= NINE_STATE_COMMIT_CONST_PS; } device->state.changed.group &= ~NINE_STATE_FF; diff --git a/src/gallium/state_trackers/nine/nine_ff.h b/src/gallium/state_trackers/nine/nine_ff.h index 7cefa65..9c33c76 100644 --- a/src/gallium/state_trackers/nine/nine_ff.h +++ b/src/gallium/state_trackers/nine/nine_ff.h @@ -3,6 +3,7 @@ #define _NINE_FF_H_ #include "device9.h" +#include "vertexdeclaration9.h" boolean nine_ff_init(struct NineDevice9 *); void nine_ff_fini(struct NineDevice9 *); @@ -29,4 +30,84 @@ nine_d3d_matrix_inverse_3x3(D3DMATRIX *, const D3DMATRIX *); void nine_d3d_matrix_transpose(D3DMATRIX *, const D3DMATRIX *); +#define NINED3DTSS_TCI_DISABLE 0 +#define NINED3DTSS_TCI_PASSTHRU 1 +#define NINED3DTSS_TCI_CAMERASPACENORMAL 2 +#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3 +#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4 +#define NINED3DTSS_TCI_SPHEREMAP 5 + +static inline unsigned +nine_decltype_get_dim(BYTE type) +{ + switch (type) { + case D3DDECLTYPE_FLOAT1: return 1; + case D3DDECLTYPE_FLOAT2: return 2; + case D3DDECLTYPE_FLOAT3: return 3; + case D3DDECLTYPE_FLOAT4: return 4; + case D3DDECLTYPE_D3DCOLOR: return 1; + case D3DDECLTYPE_UBYTE4: return 4; + case D3DDECLTYPE_SHORT2: return 2; + case D3DDECLTYPE_SHORT4: return 4; + case D3DDECLTYPE_UBYTE4N: return 4; + case D3DDECLTYPE_SHORT2N: return 2; + case D3DDECLTYPE_SHORT4N: return 4; + case D3DDECLTYPE_USHORT2N: return 2; + case D3DDECLTYPE_USHORT4N: return 4; + case D3DDECLTYPE_UDEC3: return 3; + case D3DDECLTYPE_DEC3N: return 3; + case D3DDECLTYPE_FLOAT16_2: return 2; + case D3DDECLTYPE_FLOAT16_4: return 4; + default: + assert(!"Implementation error !"); + } + return 0; +} + +static inline uint16_t +nine_ff_get_projected_key(struct nine_state *state) +{ + unsigned s, i; + uint16_t projected = 0; + char input_texture_coord[8]; + memset(&input_texture_coord, 0, sizeof(input_texture_coord)); + + if (state->vdecl) { + for (i = 0; i < state->vdecl->nelems; i++) { + uint16_t usage = state->vdecl->usage_map[i]; + if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { + s = usage / NINE_DECLUSAGE_COUNT; + if (s < 8) + input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type); + } + } + } + + for (s = 0; s < 8; ++s) { + unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; + unsigned dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; + unsigned proj = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED); + + if (!state->vs) { + if (dim > 4) + dim = input_texture_coord[s]; + + if (!dim && gen == NINED3DTSS_TCI_PASSTHRU) + dim = input_texture_coord[s]; + else if (!dim) + dim = 4; + + if (dim == 1) /* NV behaviour */ + proj = 0; + if (dim > input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU) + proj = 0; + } else { + dim = 4; + } + if (proj) + projected |= (dim-1) << (2 * s); + } + return projected; +} + #endif /* _NINE_FF_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c index 4cf37b9..2be30f7 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.c +++ b/src/gallium/state_trackers/nine/nine_pipe.c @@ -27,7 +27,8 @@ #include "cso_cache/cso_context.h" void -nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *dsa_state, + const DWORD *rs) { struct pipe_depth_stencil_alpha_state dsa; @@ -65,16 +66,15 @@ nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs) dsa.alpha.ref_value = (float)rs[D3DRS_ALPHAREF] / 255.0f; } - cso_set_depth_stencil_alpha(ctx, &dsa); + *dsa_state = dsa; } -/* TODO: Keep a static copy in device so we don't have to memset every time ? */ void -nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_rasterizer_state(struct pipe_rasterizer_state *rast_state, const DWORD *rs) { struct pipe_rasterizer_state rast; - memset(&rast, 0, sizeof(rast)); /* memcmp safety */ + memset(&rast, 0, sizeof(rast)); rast.flatshade = rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT; /* rast.light_twoside = 0; */ @@ -92,7 +92,7 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) /* rast.poly_stipple_enable = 0; */ /* rast.point_smooth = 0; */ rast.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; - rast.point_quad_rasterization = !!rs[D3DRS_POINTSPRITEENABLE]; + rast.point_quad_rasterization = 1; rast.point_size_per_vertex = rs[NINED3DRS_VSPOINTSIZE]; rast.multisample = !!rs[D3DRS_MULTISAMPLEANTIALIAS]; rast.line_smooth = !!rs[D3DRS_ANTIALIASEDLINEENABLE]; @@ -110,12 +110,28 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) /* rast.line_stipple_pattern = 0; */ rast.sprite_coord_enable = rs[D3DRS_POINTSPRITEENABLE] ? 0xff : 0x00; rast.line_width = 1.0f; - rast.point_size = rs[NINED3DRS_VSPOINTSIZE] ? 1.0f : asfloat(rs[D3DRS_POINTSIZE]); /* XXX: D3DRS_POINTSIZE_MIN/MAX */ - rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * asfloat(rs[NINED3DRS_ZBIASSCALE]); + if (rs[NINED3DRS_VSPOINTSIZE]) { + rast.point_size = 1.0f; + } else { + rast.point_size = CLAMP(asfloat(rs[D3DRS_POINTSIZE]), + asfloat(rs[D3DRS_POINTSIZE_MIN]), + asfloat(rs[D3DRS_POINTSIZE_MAX])); + } + /* offset_units has the ogl/d3d11 meaning. + * d3d9: offset = scale * dz + bias + * ogl/d3d11: offset = scale * dz + r * bias + * with r implementation dependant and is supposed to be + * the smallest value the depth buffer format can hold. + * In practice on current and past hw it seems to be 2^-23 + * for all formats except float formats where it varies depending + * on the content. + * For now use 1 << 23, but in the future perhaps add a way in gallium + * to get r for the format or get the gallium behaviour */ + rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * (float)(1 << 23); rast.offset_scale = asfloat(rs[D3DRS_SLOPESCALEDEPTHBIAS]); /* rast.offset_clamp = 0.0f; */ - cso_set_rasterizer(ctx, &rast); + *rast_state = rast; } static inline void @@ -137,7 +153,7 @@ nine_convert_blend_state_fixup(struct pipe_blend_state *blend, const DWORD *rs) } void -nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs) { struct pipe_blend_state blend; @@ -181,7 +197,7 @@ nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs) /* blend.force_srgb = !!rs[D3DRS_SRGBWRITEENABLE]; */ - cso_set_blend(ctx, &blend); + *blend_state = blend; } void @@ -239,8 +255,8 @@ nine_pipe_context_clear(struct NineDevice9 *This) cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL); cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0, NULL); - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0, NULL); + cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL); + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL); pipe->set_vertex_buffers(pipe, 0, This->caps.MaxStreams, NULL); pipe->set_index_buffer(pipe, NULL); diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h index 43a7737..8611786 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.h +++ b/src/gallium/state_trackers/nine/nine_pipe.h @@ -27,6 +27,7 @@ #include "pipe/p_format.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" /* pipe_box */ +#include "util/macros.h" #include "util/u_rect.h" #include "util/u_format.h" #include "nine_helpers.h" @@ -36,9 +37,9 @@ struct cso_context; extern const enum pipe_format nine_d3d9_to_pipe_format_map[120]; extern const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT]; -void nine_convert_dsa_state(struct cso_context *, const DWORD *); -void nine_convert_rasterizer_state(struct cso_context *, const DWORD *); -void nine_convert_blend_state(struct cso_context *, const DWORD *); +void nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *, const DWORD *); +void nine_convert_rasterizer_state(struct pipe_rasterizer_state *, const DWORD *); +void nine_convert_blend_state(struct pipe_blend_state *, const DWORD *); void nine_convert_sampler_state(struct cso_context *, int idx, const DWORD *); void nine_pipe_context_clear(struct NineDevice9 *); @@ -81,6 +82,49 @@ rect_to_pipe_box(struct pipe_box *dst, const RECT *src) dst->depth = 1; } +static inline void +pipe_box_to_rect(RECT *dst, const struct pipe_box *src) +{ + dst->left = src->x; + dst->right = src->x + src->width; + dst->top = src->y; + dst->bottom = src->y + src->height; +} + +static inline void +rect_minify_inclusive(RECT *rect) +{ + rect->left = rect->left >> 2; + rect->top = rect->top >> 2; + rect->right = DIV_ROUND_UP(rect->right, 2); + rect->bottom = DIV_ROUND_UP(rect->bottom, 2); +} + +/* We suppose: + * 0 <= rect->left < rect->right + * 0 <= rect->top < rect->bottom + */ +static inline void +fit_rect_format_inclusive(enum pipe_format format, RECT *rect, int width, int height) +{ + const unsigned w = util_format_get_blockwidth(format); + const unsigned h = util_format_get_blockheight(format); + + if (util_format_is_compressed(format)) { + rect->left = rect->left - rect->left % w; + rect->top = rect->top - rect->top % h; + rect->right = (rect->right % w) == 0 ? + rect->right : + rect->right - (rect->right % w) + w; + rect->bottom = (rect->bottom % h) == 0 ? + rect->bottom : + rect->bottom - (rect->bottom % h) + h; + } + + rect->right = MIN2(rect->right, width); + rect->bottom = MIN2(rect->bottom, height); +} + static inline boolean rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src) { @@ -164,6 +208,23 @@ pipe_to_d3d9_format(enum pipe_format format) return nine_pipe_to_d3d9_format_map[format]; } +/* ATI1 and ATI2 are not officially compressed in d3d9 */ +static inline boolean +compressed_format( D3DFORMAT fmt ) +{ + switch (fmt) { + case D3DFMT_DXT1: + case D3DFMT_DXT2: + case D3DFMT_DXT3: + case D3DFMT_DXT4: + case D3DFMT_DXT5: + return TRUE; + default: + break; + } + return FALSE; +} + static inline boolean depth_stencil_format( D3DFORMAT fmt ) { diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 754f5af..28f2787 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -89,6 +89,15 @@ static inline const char *d3dsio_to_string(unsigned opcode); #define NINE_SWIZZLE4(x,y,z,w) \ TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w +#define NINE_CONSTANT_SRC(index) \ + ureg_src_register(TGSI_FILE_CONSTANT, index) + +#define NINE_APPLY_SWIZZLE(src, s) \ + ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) + +#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \ + NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s) + #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) @@ -444,6 +453,9 @@ struct shader_translator BYTE minor; } version; unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */ + unsigned num_constf_allowed; + unsigned num_consti_allowed; + unsigned num_constb_allowed; boolean native_integers; boolean inline_subroutines; @@ -505,7 +517,6 @@ struct shader_translator #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX) #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT) -#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3) #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} @@ -528,7 +539,7 @@ static boolean tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) { INT i; - if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) { + if (index < 0 || index >= tx->num_constf_allowed) { tx->failure = TRUE; return FALSE; } @@ -543,7 +554,7 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) { - if (index < 0 || index >= NINE_MAX_CONST_I) { + if (index < 0 || index >= tx->num_consti_allowed) { tx->failure = TRUE; return FALSE; } @@ -554,7 +565,7 @@ tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) { - if (index < 0 || index >= NINE_MAX_CONST_B) { + if (index < 0 || index >= tx->num_constb_allowed) { tx->failure = TRUE; return FALSE; } @@ -568,9 +579,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) { unsigned n; - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER) - if (IS_VS && index >= NINE_MAX_CONST_F_SHADER) - WARN("lconstf index %i too high, indirect access won't work\n", index); + FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) for (n = 0; n < tx->num_lconstf; ++n) if (tx->lconstf[n].idx == index) @@ -592,7 +601,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) static void tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) { - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I) + FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) tx->lconsti[index].idx = index; tx->lconsti[index].reg = tx->native_integers ? ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : @@ -601,7 +610,7 @@ tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) static void tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) { - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B) + FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) tx->lconstb[index].idx = index; tx->lconstb[index].reg = tx->native_integers ? ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : @@ -672,6 +681,54 @@ tx_pred_alloc(struct shader_translator *tx, INT idx) tx->regs.p = ureg_DECL_predicate(tx->ureg); } +/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions + * the projection should be applied on the texture. It doesn't + * apply on texkill. + * The doc is very imprecise here (it says the projection is done + * before rasterization, thus in vs, which seems wrong since ps instructions + * are affected differently) + * For now we only apply to the ps TEX instruction and TEXBEM. + * Perhaps some other instructions would need it */ +static inline void +apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, + struct ureg_src src, INT idx) +{ + struct ureg_dst tmp; + unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); + + /* no projection */ + if (dim == 1) { + ureg_MOV(tx->ureg, dst, src); + } else { + tmp = tx_scratch_scalar(tx); + ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); + ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); + } +} + +static inline void +TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, + unsigned target, struct ureg_src src0, + struct ureg_src src1, INT idx) +{ + unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); + struct ureg_dst tmp; + + /* dim == 1: no projection + * Looks like must be disabled when it makes no + * sense according the texture dimensions + */ + if (dim == 1 || dim <= target) { + ureg_TEX(tx->ureg, dst, target, src0, src1); + } else if (dim == 4) { + ureg_TXP(tx->ureg, dst, target, src0, src1); + } else { + tmp = tx_scratch(tx); + apply_ps1x_projection(tx, tmp, src0, idx); + ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); + } +} + static inline void tx_texcoord_alloc(struct shader_translator *tx, INT idx) { @@ -1086,9 +1143,18 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) assert(param->idx >= 0 && param->idx < 4); assert(!param->rel); tx->info->rt_mask |= 1 << param->idx; - if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) - tx->regs.oCol[param->idx] = - ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); + if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { + /* ps < 3: oCol[0] will have fog blending afterward + * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ + if (!IS_VS && tx->version.major < 3 && param->idx == 0) { + tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); + } else if (IS_VS && tx->version.major < 3 && param->idx == 1) { + tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg); + } else { + tx->regs.oCol[param->idx] = + ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); + } + } dst = tx->regs.oCol[param->idx]; if (IS_VS && tx->version.major < 3) dst = ureg_saturate(dst); @@ -1824,7 +1890,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, sem->Index = 0; break; default: - assert(!"Invalid DECLUSAGE."); + unreachable(!"Invalid DECLUSAGE."); break; } } @@ -2135,12 +2201,79 @@ DECL_SPECIAL(TEXKILL) DECL_SPECIAL(TEXBEM) { - STUB(D3DERR_INVALIDCALL); -} + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_dst tmp, tmp2, texcoord; + struct ureg_src sample, m00, m01, m10, m11; + struct ureg_src bumpenvlscale, bumpenvloffset; + const int m = tx->insn.dst[0].idx; + const int n = tx->insn.src[0].idx; -DECL_SPECIAL(TEXBEML) -{ - STUB(D3DERR_INVALIDCALL); + assert(tx->version.major == 1); + + sample = ureg_DECL_sampler(ureg, m); + tx->info->sampler_mask |= 1 << m; + + tx_texcoord_alloc(tx, m); + + tmp = tx_scratch(tx); + tmp2 = tx_scratch(tx); + texcoord = tx_scratch(tx); + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + + /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ + if (m % 2 == 0) { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y); + } else { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W); + } + + apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); + + /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); + /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); + /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + + /* Now the texture coordinates are in tmp.xy */ + + if (tx->insn.opcode == D3DSIO_TEXBEM) { + ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + } else if (tx->insn.opcode == D3DSIO_TEXBEML) { + /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ + ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z), + bumpenvlscale, bumpenvloffset); + ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); + } + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXREG2AR) @@ -2421,7 +2554,43 @@ DECL_SPECIAL(TEXDEPTH) DECL_SPECIAL(BEM) { - STUB(D3DERR_INVALIDCALL); + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); + struct ureg_src m00, m01, m10, m11; + const int m = tx->insn.dst[0].idx; + struct ureg_dst tmp; + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + m); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); + /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(src1, X), src0); + /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXLD) @@ -2482,7 +2651,7 @@ DECL_SPECIAL(TEX) src[1] = ureg_DECL_sampler(ureg, s); tx->info->sampler_mask |= 1 << s; - ureg_TEX(ureg, dst, t, src[0], src[1]); + TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); return D3D_OK; } @@ -2616,7 +2785,7 @@ struct sm1_op_info inst_table[] = _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), - _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)), + _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), @@ -3023,6 +3192,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) info->lconstf.data = NULL; info->lconstf.ranges = NULL; + info->bumpenvmat_needed = 0; + for (i = 0; i < Elements(tx->regs.rL); ++i) { tx->regs.rL[i] = ureg_dst_undef(); } @@ -3074,6 +3245,57 @@ tgsi_processor_from_type(unsigned shader_type) } } +static void +shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + struct ureg_src fog_end, fog_coeff, fog_density; + struct ureg_src fog_vs, depth, fog_color; + struct ureg_dst fog_factor; + + if (!tx->info->fog_enable) { + ureg_MOV(ureg, oCol0, src_col); + return; + } + + if (tx->info->fog_mode != D3DFOG_NONE) + depth = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_INTERPOLATE_LINEAR), + TGSI_SWIZZLE_Z); + + nine_info_mark_const_f_used(tx->info, 33); + fog_color = NINE_CONSTANT_SRC(32); + fog_factor = tx_scratch_scalar(tx); + + if (tx->info->fog_mode == D3DFOG_LINEAR) { + fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X); + fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y); + ureg_SUB(ureg, fog_factor, fog_end, depth); + ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); + } else if (tx->info->fog_mode == D3DFOG_EXP) { + fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); + ureg_MUL(ureg, fog_factor, depth, fog_density); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); + ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); + } else if (tx->info->fog_mode == D3DFOG_EXP2) { + fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); + ureg_MUL(ureg, fog_factor, depth, fog_density); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); + ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); + } else { + fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, + TGSI_INTERPOLATE_PERSPECTIVE), + TGSI_SWIZZLE_X); + ureg_MOV(ureg, fog_factor, fog_vs); + } + + ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), + tx_src_scalar(fog_factor), src_col, fog_color); + ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); +} + #define GET_CAP(n) device->screen->get_param( \ device->screen, PIPE_CAP_##n) #define GET_SHADER_CAP(n) device->screen->get_shader_param( \ @@ -3123,6 +3345,24 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) tx->texcoord_sn = tx->want_texcoord ? TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; + if (IS_VS) { + tx->num_constf_allowed = NINE_MAX_CONST_F; + } else if (tx->version.major < 2) {/* IS_PS v1 */ + tx->num_constf_allowed = 8; + } else if (tx->version.major == 2) {/* IS_PS v2 */ + tx->num_constf_allowed = 32; + } else {/* IS_PS v3 */ + tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; + } + + if (tx->version.major < 2) { + tx->num_consti_allowed = 0; + tx->num_constb_allowed = 0; + } else { + tx->num_consti_allowed = NINE_MAX_CONST_I; + tx->num_constb_allowed = NINE_MAX_CONST_B; + } + /* VS must always write position. Declare it here to make it the 1st output. * (Some drivers like nv50 are buggy and rely on that.) */ @@ -3145,10 +3385,26 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) goto out; } - if (IS_PS && (tx->version.major < 2) && tx->num_temp) { - ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0), - ureg_src(tx->regs.r[0])); - info->rt_mask |= 0x1; + if (IS_PS && tx->version.major < 3) { + if (tx->version.major < 2) { + assert(tx->num_temp); /* there must be color output */ + info->rt_mask |= 0x1; + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); + } else { + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); + } + } + + if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { + tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); + ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); + } + + /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ + if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) { + struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1); + ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1])); + ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f)); } if (info->position_t) @@ -3233,6 +3489,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) info->const_int_slots > 0 ? max_const_f + info->const_int_slots : info->const_float_slots; + info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ for (s = 0; s < slot_max; s++) diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index ec256c1..41577ac 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -59,6 +59,10 @@ struct nine_shader_info uint16_t sampler_mask_shadow; /* in, which samplers use depth compare */ uint8_t rt_mask; /* out, which render targets are being written */ + uint8_t fog_enable; + uint8_t fog_mode; + uint16_t projected; /* ps 1.1 to 1.3 */ + unsigned const_i_base; /* in vec4 (16 byte) units */ unsigned const_b_base; /* in vec4 (16 byte) units */ unsigned const_used_size; @@ -68,6 +72,7 @@ struct nine_shader_info unsigned const_bool_slots; struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ + uint8_t bumpenvmat_needed; }; static inline void @@ -137,4 +142,48 @@ nine_shader_variants_free(struct nine_shader_variant *list) } } +struct nine_shader_variant64 +{ + struct nine_shader_variant64 *next; + void *cso; + uint64_t key; +}; + +static inline void * +nine_shader_variant_get64(struct nine_shader_variant64 *list, uint64_t key) +{ + while (list->key != key && list->next) + list = list->next; + if (list->key == key) + return list->cso; + return NULL; +} + +static inline boolean +nine_shader_variant_add64(struct nine_shader_variant64 *list, + uint64_t key, void *cso) +{ + while (list->next) { + assert(list->key != key); + list = list->next; + } + list->next = MALLOC_STRUCT(nine_shader_variant64); + if (!list->next) + return FALSE; + list->next->next = NULL; + list->next->key = key; + list->next->cso = cso; + return TRUE; +} + +static inline void +nine_shader_variants_free64(struct nine_shader_variant64 *list) +{ + while (list->next) { + struct nine_shader_variant64 *ptr = list->next; + list->next = ptr->next; + FREE(ptr); + } +} + #endif /* _NINE_SHADER_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 6c83585..558d07a 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -33,352 +33,36 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "cso_cache/cso_context.h" +#include "util/u_upload_mgr.h" #include "util/u_math.h" #define DBG_CHANNEL DBG_DEVICE -static uint32_t -update_framebuffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - struct nine_state *state = &device->state; - struct pipe_framebuffer_state *fb = &device->state.fb; - unsigned i; - struct NineSurface9 *rt0 = state->rt[0]; - unsigned w = rt0->desc.Width; - unsigned h = rt0->desc.Height; - D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; - unsigned mask = state->ps ? state->ps->rt_mask : 1; - const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; - - DBG("\n"); - - state->rt_mask = 0x0; - fb->nr_cbufs = 0; - - /* all render targets must have the same size and the depth buffer must be - * bigger. Multisample has to match, according to spec. But some apps do - * things wrong there, and no error is returned. The behaviour they get - * apparently is that depth buffer is disabled if it doesn't match. - * Surely the same for render targets. */ - - /* Special case: D3DFMT_NULL is used to bound no real render target, - * but render to depth buffer. We have to not take into account the render - * target info. TODO: know what should happen when there are several render targers - * and the first one is D3DFMT_NULL */ - if (rt0->desc.Format == D3DFMT_NULL && state->ds) { - w = state->ds->desc.Width; - h = state->ds->desc.Height; - nr_samples = state->ds->desc.MultiSampleType; - } - - for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { - struct NineSurface9 *rt = state->rt[i]; - - if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && - rt->desc.Width == w && rt->desc.Height == h && - rt->desc.MultiSampleType == nr_samples) { - fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); - state->rt_mask |= 1 << i; - fb->nr_cbufs = i + 1; - - if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { - assert(rt->texture == D3DRTYPE_TEXTURE || - rt->texture == D3DRTYPE_CUBETEXTURE); - NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; - } - } else { - /* Color outputs must match RT slot, - * drivers will have to handle NULL entries for GL, too. - */ - fb->cbufs[i] = NULL; - } - } - - if (state->ds && state->ds->desc.Width >= w && - state->ds->desc.Height >= h && - state->ds->desc.MultiSampleType == nr_samples) { - fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); - } else { - fb->zsbuf = NULL; - } - - fb->width = w; - fb->height = h; - - pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ - - if (fb->zsbuf) { - DWORD scale; - switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - scale = fui(1.0f); - break; - case PIPE_FORMAT_Z16_UNORM: - scale = fui((float)(1 << 16)); - break; - default: - scale = fui((float)(1 << 24)); - break; - } - if (state->rs[NINED3DRS_ZBIASSCALE] != scale) { - state->rs[NINED3DRS_ZBIASSCALE] = scale; - state->changed.group |= NINE_STATE_RASTERIZER; - } - } - - return state->changed.group; -} - -static void -update_viewport(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - const D3DVIEWPORT9 *vport = &device->state.viewport; - struct pipe_viewport_state pvport; - - /* D3D coordinates are: - * -1 .. +1 for X,Y and - * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) - */ - pvport.scale[0] = (float)vport->Width * 0.5f; - pvport.scale[1] = (float)vport->Height * -0.5f; - pvport.scale[2] = vport->MaxZ - vport->MinZ; - pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; - pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; - pvport.translate[2] = vport->MinZ; - - /* We found R600 and SI cards have some imprecision - * on the barycentric coordinates used for interpolation. - * Some shaders rely on having something precise. - * We found that the proprietary driver has the imprecision issue, - * except when the render target width and height are powers of two. - * It is using some sort of workaround for these cases - * which covers likely all the cases the applications rely - * on something precise. - * We haven't found the workaround, but it seems like it's better - * for applications if the imprecision is biased towards infinity - * instead of -infinity (which is what measured). So shift slightly - * the viewport: not enough to change rasterization result (in particular - * for multisampling), but enough to make the imprecision biased - * towards infinity. We do this shift only if render target width and - * height are powers of two. - * Solves 'red shadows' bug on UE3 games. - */ - if (device->driver_bugs.buggy_barycentrics && - ((vport->Width & (vport->Width-1)) == 0) && - ((vport->Height & (vport->Height-1)) == 0)) { - pvport.translate[0] -= 1.0f / 128.0f; - pvport.translate[1] -= 1.0f / 128.0f; - } - - pipe->set_viewport_states(pipe, 0, 1, &pvport); -} - -static inline void -update_scissor(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - - pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); -} +/* State preparation only */ static inline void -update_blend(struct NineDevice9 *device) +prepare_blend(struct NineDevice9 *device) { - nine_convert_blend_state(device->cso, device->state.rs); + nine_convert_blend_state(&device->state.pipe.blend, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_BLEND; } static inline void -update_dsa(struct NineDevice9 *device) +prepare_dsa(struct NineDevice9 *device) { - nine_convert_dsa_state(device->cso, device->state.rs); + nine_convert_dsa_state(&device->state.pipe.dsa, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_DSA; } static inline void -update_rasterizer(struct NineDevice9 *device) +prepare_rasterizer(struct NineDevice9 *device) { - nine_convert_rasterizer_state(device->cso, device->state.rs); + nine_convert_rasterizer_state(&device->state.pipe.rast, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_RASTERIZER; } -/* Loop through VS inputs and pick the vertex elements with the declared - * usage from the vertex declaration, then insert the instance divisor from - * the stream source frequency setting. - */ static void -update_vertex_elements(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; - const struct NineVertexShader9 *vs; - unsigned n, b, i; - int index; - char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ - char used_streams[device->caps.MaxStreams]; - int dummy_vbo_stream = -1; - BOOL need_dummy_vbo = FALSE; - struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; - - state->stream_usage_mask = 0; - memset(vdecl_index_map, -1, 16); - memset(used_streams, 0, device->caps.MaxStreams); - vs = device->state.vs ? device->state.vs : device->ff.vs; - - if (vdecl) { - for (n = 0; n < vs->num_inputs; ++n) { - DBG("looking up input %u (usage %u) from vdecl(%p)\n", - n, vs->input_map[n].ndecl, vdecl); - - for (i = 0; i < vdecl->nelems; i++) { - if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { - vdecl_index_map[n] = i; - used_streams[vdecl->elems[i].vertex_buffer_index] = 1; - break; - } - } - if (vdecl_index_map[n] < 0) - need_dummy_vbo = TRUE; - } - } else { - /* No vertex declaration. Likely will never happen in practice, - * but we need not crash on this */ - need_dummy_vbo = TRUE; - } - - if (need_dummy_vbo) { - for (i = 0; i < device->caps.MaxStreams; i++ ) { - if (!used_streams[i]) { - dummy_vbo_stream = i; - break; - } - } - } - /* there are less vertex shader inputs than stream slots, - * so if we need a slot for the dummy vbo, we should have found one */ - assert (!need_dummy_vbo || dummy_vbo_stream != -1); - - for (n = 0; n < vs->num_inputs; ++n) { - index = vdecl_index_map[n]; - if (index >= 0) { - ve[n] = vdecl->elems[index]; - b = ve[n].vertex_buffer_index; - state->stream_usage_mask |= 1 << b; - /* XXX wine just uses 1 here: */ - if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) - ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; - } else { - /* if the vertex declaration is incomplete compared to what the - * vertex shader needs, we bind a dummy vbo with 0 0 0 0. - * This is not precised by the spec, but is the behaviour - * tested on win */ - ve[n].vertex_buffer_index = dummy_vbo_stream; - ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - ve[n].src_offset = 0; - ve[n].instance_divisor = 0; - } - } - - if (state->dummy_vbo_bound_at != dummy_vbo_stream) { - if (state->dummy_vbo_bound_at >= 0) - state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; - if (dummy_vbo_stream >= 0) { - state->changed.vtxbuf |= 1 << dummy_vbo_stream; - state->vbo_bound_done = FALSE; - } - state->dummy_vbo_bound_at = dummy_vbo_stream; - } - - cso_set_vertex_elements(device->cso, vs->num_inputs, ve); - - state->changed.stream_freq = 0; -} - -static inline uint32_t -update_shader_variant_keys(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - uint32_t mask = 0; - uint32_t vs_key = state->samplers_shadow; - uint32_t ps_key = state->samplers_shadow; - - vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0); - ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0); - - if (state->vs) vs_key &= state->vs->sampler_mask; - if (state->ps) { - if (unlikely(state->ps->byte_code.version < 0x20)) { - /* no depth textures, but variable targets */ - uint32_t m = state->ps->sampler_mask; - ps_key = 0; - while (m) { - int s = ffs(m) - 1; - m &= ~(1 << s); - ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); - } - } else { - ps_key &= state->ps->sampler_mask; - } - } - - if (state->vs && state->vs_key != vs_key) { - state->vs_key = vs_key; - mask |= NINE_STATE_VS; - } - if (state->ps && state->ps_key != ps_key) { - state->ps_key = ps_key; - mask |= NINE_STATE_PS; - } - return mask; -} - -static inline uint32_t -update_vs(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NineVertexShader9 *vs = state->vs; - uint32_t changed_group = 0; - - /* likely because we dislike FF */ - if (likely(vs)) { - state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key); - } else { - vs = device->ff.vs; - state->cso.vs = vs->variant.cso; - } - device->pipe->bind_vs_state(device->pipe, state->cso.vs); - - if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { - state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; - changed_group |= NINE_STATE_RASTERIZER; - } - - if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} - -static inline uint32_t -update_ps(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NinePixelShader9 *ps = state->ps; - uint32_t changed_group = 0; - - if (likely(ps)) { - state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key); - } else { - ps = device->ff.ps; - state->cso.ps = ps->variant.cso; - } - device->pipe->bind_fs_state(device->pipe, state->cso.ps); - - if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} +prepare_ps_constants_userbuf(struct NineDevice9 *device); #define DO_UPLOAD_CONST_F(buf,p,c,d) \ do { \ @@ -391,7 +75,7 @@ update_ps(struct NineDevice9 *device) /* OK, this is a bit ugly ... */ static void -update_constants(struct NineDevice9 *device, unsigned shader_type) +upload_constants(struct NineDevice9 *device, unsigned shader_type) { struct pipe_context *pipe = device->pipe; struct pipe_resource *buf; @@ -438,10 +122,17 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = device->state.vs->lconstf.ranges; lconstf_data = device->state.vs->lconstf.data; - device->state.ff.clobber.vs_const = TRUE; device->state.changed.group &= ~NINE_STATE_VS_CONST; } else { DBG("PS\n"); + /* features only implemented on the userbuf path */ + if (device->state.ps->bumpenvmat_needed || ( + device->state.ps->byte_code.version < 0x30 && + device->state.rs[D3DRS_FOGENABLE])) { + device->prefer_user_constbuf = TRUE; + prepare_ps_constants_userbuf(device); + return; + } buf = device->constbuf_ps; const_f = device->state.ps_const_f; @@ -464,7 +155,6 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = NULL; lconstf_data = NULL; - device->state.ff.clobber.ps_const = TRUE; device->state.changed.group &= ~NINE_STATE_PS_CONST; } @@ -524,10 +214,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) } static void -update_vs_constants_userbuf(struct NineDevice9 *device) +prepare_vs_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; cb.buffer = NULL; cb.buffer_offset = 0; @@ -567,7 +256,18 @@ update_vs_constants_userbuf(struct NineDevice9 *device) cb.user_buffer = dst; } - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_vs = cb; if (device->state.changed.vs_const_f) { struct nine_range *r = device->state.changed.vs_const_f; @@ -578,22 +278,19 @@ update_vs_constants_userbuf(struct NineDevice9 *device) device->state.changed.vs_const_f = NULL; } state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; } static void -update_ps_constants_userbuf(struct NineDevice9 *device) +prepare_ps_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = device->state.ps->const_used_size; cb.user_buffer = device->state.ps_const_f; - if (!cb.buffer_size) - return; - if (state->changed.ps_const_i) { int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); @@ -606,7 +303,47 @@ update_ps_constants_userbuf(struct NineDevice9 *device) state->changed.ps_const_b = 0; } - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->bumpenvmat_needed) { + memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); + + cb.user_buffer = device->state.ps_lconstf_temp; + } + + if (state->ps->byte_code.version < 0x30 && + state->rs[D3DRS_FOGENABLE]) { + float *dst = &state->ps_lconstf_temp[4 * 32]; + if (cb.user_buffer != state->ps_lconstf_temp) { + memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + cb.user_buffer = state->ps_lconstf_temp; + } + + d3dcolor_to_rgba(dst, state->rs[D3DRS_FOGCOLOR]); + if (state->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) { + dst[4] = asfloat(state->rs[D3DRS_FOGEND]); + dst[5] = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART])); + } else if (state->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) { + dst[4] = asfloat(state->rs[D3DRS_FOGDENSITY]); + } + cb.buffer_size = 4 * 4 * 34; + } + + if (!cb.buffer_size) + return; + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_ps = cb; if (device->state.changed.ps_const_f) { struct nine_range *r = device->state.changed.ps_const_f; @@ -617,6 +354,286 @@ update_ps_constants_userbuf(struct NineDevice9 *device) device->state.changed.ps_const_f = NULL; } state->changed.group &= ~NINE_STATE_PS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_PS; +} + +static inline uint32_t +prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NineVertexShader9 *vs = state->vs; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(vs)) + has_key_changed = NineVertexShader9_UpdateKey(vs, state); + + if (!shader_changed && !has_key_changed) + return 0; + + /* likely because we dislike FF */ + if (likely(vs)) { + state->cso.vs = NineVertexShader9_GetVariant(vs); + } else { + vs = device->ff.vs; + state->cso.vs = vs->ff_cso; + } + + if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { + state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; + changed_group |= NINE_STATE_RASTERIZER; + } + + if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_VS; + return changed_group; +} + +static inline uint32_t +prepare_ps(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NinePixelShader9 *ps = state->ps; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(ps)) + has_key_changed = NinePixelShader9_UpdateKey(ps, state); + + if (!shader_changed && !has_key_changed) + return 0; + + if (likely(ps)) { + state->cso.ps = NinePixelShader9_GetVariant(ps); + } else { + ps = device->ff.ps; + state->cso.ps = ps->ff_cso; + } + + if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_PS; + return changed_group; +} + +/* State preparation incremental */ + +/* State preparation + State commit */ + +static uint32_t +update_framebuffer(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + struct nine_state *state = &device->state; + struct pipe_framebuffer_state *fb = &device->state.fb; + unsigned i; + struct NineSurface9 *rt0 = state->rt[0]; + unsigned w = rt0->desc.Width; + unsigned h = rt0->desc.Height; + D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; + unsigned mask = state->ps ? state->ps->rt_mask : 1; + const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; + + DBG("\n"); + + state->rt_mask = 0x0; + fb->nr_cbufs = 0; + + /* all render targets must have the same size and the depth buffer must be + * bigger. Multisample has to match, according to spec. But some apps do + * things wrong there, and no error is returned. The behaviour they get + * apparently is that depth buffer is disabled if it doesn't match. + * Surely the same for render targets. */ + + /* Special case: D3DFMT_NULL is used to bound no real render target, + * but render to depth buffer. We have to not take into account the render + * target info. TODO: know what should happen when there are several render targers + * and the first one is D3DFMT_NULL */ + if (rt0->desc.Format == D3DFMT_NULL && state->ds) { + w = state->ds->desc.Width; + h = state->ds->desc.Height; + nr_samples = state->ds->desc.MultiSampleType; + } + + for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { + struct NineSurface9 *rt = state->rt[i]; + + if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && + rt->desc.Width == w && rt->desc.Height == h && + rt->desc.MultiSampleType == nr_samples) { + fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); + state->rt_mask |= 1 << i; + fb->nr_cbufs = i + 1; + + if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { + assert(rt->texture == D3DRTYPE_TEXTURE || + rt->texture == D3DRTYPE_CUBETEXTURE); + NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; + } + } else { + /* Color outputs must match RT slot, + * drivers will have to handle NULL entries for GL, too. + */ + fb->cbufs[i] = NULL; + } + } + + if (state->ds && state->ds->desc.Width >= w && + state->ds->desc.Height >= h && + state->ds->desc.MultiSampleType == nr_samples) { + fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); + } else { + fb->zsbuf = NULL; + } + + fb->width = w; + fb->height = h; + + pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ + + return state->changed.group; +} + +static void +update_viewport(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + const D3DVIEWPORT9 *vport = &device->state.viewport; + struct pipe_viewport_state pvport; + + /* D3D coordinates are: + * -1 .. +1 for X,Y and + * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) + */ + pvport.scale[0] = (float)vport->Width * 0.5f; + pvport.scale[1] = (float)vport->Height * -0.5f; + pvport.scale[2] = vport->MaxZ - vport->MinZ; + pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; + pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; + pvport.translate[2] = vport->MinZ; + + /* We found R600 and SI cards have some imprecision + * on the barycentric coordinates used for interpolation. + * Some shaders rely on having something precise. + * We found that the proprietary driver has the imprecision issue, + * except when the render target width and height are powers of two. + * It is using some sort of workaround for these cases + * which covers likely all the cases the applications rely + * on something precise. + * We haven't found the workaround, but it seems like it's better + * for applications if the imprecision is biased towards infinity + * instead of -infinity (which is what measured). So shift slightly + * the viewport: not enough to change rasterization result (in particular + * for multisampling), but enough to make the imprecision biased + * towards infinity. We do this shift only if render target width and + * height are powers of two. + * Solves 'red shadows' bug on UE3 games. + */ + if (device->driver_bugs.buggy_barycentrics && + ((vport->Width & (vport->Width-1)) == 0) && + ((vport->Height & (vport->Height-1)) == 0)) { + pvport.translate[0] -= 1.0f / 128.0f; + pvport.translate[1] -= 1.0f / 128.0f; + } + + pipe->set_viewport_states(pipe, 0, 1, &pvport); +} + +/* Loop through VS inputs and pick the vertex elements with the declared + * usage from the vertex declaration, then insert the instance divisor from + * the stream source frequency setting. + */ +static void +update_vertex_elements(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; + const struct NineVertexShader9 *vs; + unsigned n, b, i; + int index; + char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ + char used_streams[device->caps.MaxStreams]; + int dummy_vbo_stream = -1; + BOOL need_dummy_vbo = FALSE; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + state->stream_usage_mask = 0; + memset(vdecl_index_map, -1, 16); + memset(used_streams, 0, device->caps.MaxStreams); + vs = device->state.vs ? device->state.vs : device->ff.vs; + + if (vdecl) { + for (n = 0; n < vs->num_inputs; ++n) { + DBG("looking up input %u (usage %u) from vdecl(%p)\n", + n, vs->input_map[n].ndecl, vdecl); + + for (i = 0; i < vdecl->nelems; i++) { + if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { + vdecl_index_map[n] = i; + used_streams[vdecl->elems[i].vertex_buffer_index] = 1; + break; + } + } + if (vdecl_index_map[n] < 0) + need_dummy_vbo = TRUE; + } + } else { + /* No vertex declaration. Likely will never happen in practice, + * but we need not crash on this */ + need_dummy_vbo = TRUE; + } + + if (need_dummy_vbo) { + for (i = 0; i < device->caps.MaxStreams; i++ ) { + if (!used_streams[i]) { + dummy_vbo_stream = i; + break; + } + } + } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); + + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; + } + } + + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; + } + state->dummy_vbo_bound_at = dummy_vbo_stream; + } + + cso_set_vertex_elements(device->cso, vs->num_inputs, ve); + + state->changed.stream_freq = 0; } static void @@ -627,7 +644,6 @@ update_vertex_buffers(struct NineDevice9 *device) struct pipe_vertex_buffer dummy_vtxbuf; uint32_t mask = state->changed.vtxbuf; unsigned i; - unsigned start; DBG("mask=%x\n", mask); @@ -656,27 +672,6 @@ update_vertex_buffers(struct NineDevice9 *device) state->changed.vtxbuf = 0; } -static inline void -update_index_buffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - if (device->state.idxbuf) - pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); - else - pipe->set_index_buffer(pipe, NULL); -} - -/* TODO: only go through dirty textures */ -static void -validate_textures(struct NineDevice9 *device) -{ - struct NineBaseTexture9 *tex, *ptr; - LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { - list_delinit(&tex->list); - NineBaseTexture9_Validate(tex); - } -} - static inline boolean update_sampler_derived(struct nine_state *state, unsigned s) { @@ -706,20 +701,16 @@ update_sampler_derived(struct nine_state *state, unsigned s) static void update_textures_and_samplers(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; struct pipe_sampler_view *view[NINE_MAX_SAMPLERS]; - struct pipe_sampler_state samp; unsigned num_textures; unsigned i; - boolean commit_views; boolean commit_samplers; uint16_t sampler_mask = state->ps ? state->ps->sampler_mask : device->ff.ps->sampler_mask; /* TODO: Can we reduce iterations here ? */ - commit_views = FALSE; commit_samplers = FALSE; state->bound_samplers_mask_ps = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) { @@ -749,26 +740,12 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT, - s - NINE_SAMPLER_PS(0), &samp); + s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } @@ -776,16 +753,11 @@ update_textures_and_samplers(struct NineDevice9 *device) state->bound_samplers_mask_ps |= (1 << s); } - commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - num_textures, view); + cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT); - commit_views = FALSE; commit_samplers = FALSE; sampler_mask = state->vs ? state->vs->sampler_mask : 0; state->bound_samplers_mask_vs = 0; @@ -816,76 +788,170 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_VERTEX, - s - NINE_SAMPLER_VS(0), &samp); + s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } state->bound_samplers_mask_vs |= (1 << s); } - commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, - num_textures, view); + + cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX); - state->changed.srgb = FALSE; state->changed.texture = 0; } +/* State commit only */ + +static inline void +commit_blend(struct NineDevice9 *device) +{ + cso_set_blend(device->cso, &device->state.pipe.blend); +} + +static inline void +commit_dsa(struct NineDevice9 *device) +{ + cso_set_depth_stencil_alpha(device->cso, &device->state.pipe.dsa); +} + +static inline void +commit_scissor(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); +} + +static inline void +commit_rasterizer(struct NineDevice9 *device) +{ + cso_set_rasterizer(device->cso, &device->state.pipe.rast); +} -#define NINE_STATE_FREQ_GROUP_0 \ - (NINE_STATE_FB | \ - NINE_STATE_VIEWPORT | \ - NINE_STATE_SCISSOR | \ - NINE_STATE_BLEND | \ - NINE_STATE_DSA | \ - NINE_STATE_RASTERIZER | \ - NINE_STATE_VS | \ - NINE_STATE_PS | \ - NINE_STATE_BLEND_COLOR | \ - NINE_STATE_STENCIL_REF | \ +static inline void +commit_index_buffer(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + if (device->state.idxbuf) + pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); + else + pipe->set_index_buffer(pipe, NULL); +} + +static inline void +commit_vs_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.vs)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); +} + +static inline void +commit_ps_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.ps)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps); +} + +static inline void +commit_vs(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + device->pipe->bind_vs_state(device->pipe, state->cso.vs); +} + + +static inline void +commit_ps(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + device->pipe->bind_fs_state(device->pipe, state->cso.ps); +} +/* State Update */ + +#define NINE_STATE_SHADER_CHANGE_VS \ + (NINE_STATE_VS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER) + +#define NINE_STATE_SHADER_CHANGE_PS \ + (NINE_STATE_PS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_PS1X_SHADER) + +#define NINE_STATE_FREQUENT \ + (NINE_STATE_RASTERIZER | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_SAMPLER | \ + NINE_STATE_VS_CONST | \ + NINE_STATE_PS_CONST) + +#define NINE_STATE_COMMON \ + (NINE_STATE_FB | \ + NINE_STATE_BLEND | \ + NINE_STATE_DSA | \ + NINE_STATE_VIEWPORT | \ + NINE_STATE_VDECL | \ + NINE_STATE_IDXBUF) + +#define NINE_STATE_RARE \ + (NINE_STATE_SCISSOR | \ + NINE_STATE_BLEND_COLOR | \ + NINE_STATE_STENCIL_REF | \ NINE_STATE_SAMPLE_MASK) -#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0 -#define NINE_STATE_SHADER_VARIANT_GROUP \ - (NINE_STATE_TEXTURE | \ - NINE_STATE_VS | \ - NINE_STATE_PS) +/* TODO: only go through dirty textures */ +static void +validate_textures(struct NineDevice9 *device) +{ + struct NineBaseTexture9 *tex, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { + list_delinit(&tex->list); + NineBaseTexture9_Validate(tex); + } +} + +void +nine_update_state_framebuffer(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + validate_textures(device); + + if (state->changed.group & NINE_STATE_FB) + update_framebuffer(device); + + state->changed.group &= ~NINE_STATE_FB; +} boolean -nine_update_state(struct NineDevice9 *device, uint32_t mask) +nine_update_state(struct NineDevice9 *device) { struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; uint32_t group; - DBG("changed state groups: %x | %x\n", - state->changed.group & NINE_STATE_FREQ_GROUP_0, - state->changed.group & NINE_STATE_FREQ_GROUP_1); + DBG("changed state groups: %x\n", state->changed.group); /* NOTE: We may want to use the cso cache for everything, or let * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't @@ -896,35 +962,79 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) validate_textures(device); /* may clobber state */ /* ff_update may change VS/PS dirty bits */ - if ((mask & NINE_STATE_FF) && unlikely(!state->vs || !state->ps)) + if (unlikely(!state->vs || !state->ps)) nine_ff_update(device); - group = state->changed.group & mask; + group = state->changed.group; - if (group & NINE_STATE_SHADER_VARIANT_GROUP) - group |= update_shader_variant_keys(device); + if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) { + if (group & NINE_STATE_SHADER_CHANGE_VS) + group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/ + if (group & NINE_STATE_SHADER_CHANGE_PS) + group |= prepare_ps(device, (group & NINE_STATE_PS) != 0); + } - if (group & NINE_STATE_FREQ_GROUP_0) { + if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) { if (group & NINE_STATE_FB) - group = update_framebuffer(device) & mask; + group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */ + if (group & NINE_STATE_BLEND) + prepare_blend(device); + if (group & NINE_STATE_DSA) + prepare_dsa(device); if (group & NINE_STATE_VIEWPORT) update_viewport(device); - if (group & NINE_STATE_SCISSOR) - update_scissor(device); - - if (group & NINE_STATE_DSA) - update_dsa(device); - if (group & NINE_STATE_BLEND) - update_blend(device); - - if (group & NINE_STATE_VS) - group |= update_vs(device); + if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || + state->changed.stream_freq & ~1) + update_vertex_elements(device); + if (group & NINE_STATE_IDXBUF) + commit_index_buffer(device); + } + if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) { if (group & NINE_STATE_RASTERIZER) - update_rasterizer(device); + prepare_rasterizer(device); + if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) + update_textures_and_samplers(device); + if (device->prefer_user_constbuf) { + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) + prepare_vs_constants_userbuf(device); + if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) + prepare_ps_constants_userbuf(device); + } else { + if ((group & NINE_STATE_VS_CONST) && state->vs) + upload_constants(device, PIPE_SHADER_VERTEX); + if ((group & NINE_STATE_PS_CONST) && state->ps) + upload_constants(device, PIPE_SHADER_FRAGMENT); + } + } - if (group & NINE_STATE_PS) - group |= update_ps(device); + if (state->changed.vtxbuf) + update_vertex_buffers(device); + + if (state->commit & NINE_STATE_COMMIT_BLEND) + commit_blend(device); + if (state->commit & NINE_STATE_COMMIT_DSA) + commit_dsa(device); + if (state->commit & NINE_STATE_COMMIT_RASTERIZER) + commit_rasterizer(device); + if (state->commit & NINE_STATE_COMMIT_CONST_VS) + commit_vs_constants(device); + if (state->commit & NINE_STATE_COMMIT_CONST_PS) + commit_ps_constants(device); + if (state->commit & NINE_STATE_COMMIT_VS) + commit_vs(device); + if (state->commit & NINE_STATE_COMMIT_PS) + commit_ps(device); + + state->commit = 0; + + if (unlikely(state->changed.ucp)) { + pipe->set_clip_state(pipe, &state->clip); + state->changed.ucp = 0; + } + if (unlikely(group & NINE_STATE_RARE)) { + if (group & NINE_STATE_SCISSOR) + commit_scissor(device); if (group & NINE_STATE_BLEND_COLOR) { struct pipe_blend_color color; d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]); @@ -941,38 +1051,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) } } - if (state->changed.ucp) { - pipe->set_clip_state(pipe, &state->clip); - state->changed.ucp = 0; - } - - if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) { - if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) - update_textures_and_samplers(device); - - if (group & NINE_STATE_IDXBUF) - update_index_buffer(device); - - if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || - state->changed.stream_freq & ~1) - update_vertex_elements(device); - - if (device->prefer_user_constbuf) { - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) - update_vs_constants_userbuf(device); - if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) - update_ps_constants_userbuf(device); - } else { - if ((group & NINE_STATE_VS_CONST) && state->vs) - update_constants(device, PIPE_SHADER_VERTEX); - if ((group & NINE_STATE_PS_CONST) && state->ps) - update_constants(device, PIPE_SHADER_FRAGMENT); - } - } - if (state->changed.vtxbuf) - update_vertex_buffers(device); - - device->state.changed.group &= ~mask | + device->state.changed.group &= (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST); DBG("finished\n"); @@ -980,6 +1059,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) return TRUE; } +/* State defaults */ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = { @@ -1134,6 +1214,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] = [NINED3DSAMP_MINLOD] = 0, [NINED3DSAMP_SHADOW] = 0 }; + +void nine_state_restore_non_cso(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; +} + void nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, boolean is_reset) @@ -1152,6 +1244,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, } state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE; state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1; + memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars)); for (s = 0; s < Elements(state->samp); ++s) { memcpy(&state->samp[s], nine_samp_state_defaults, @@ -1170,6 +1263,9 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; state->ff.changed.transform[0] = ~0; state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32); @@ -1186,6 +1282,23 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->dummy_vbo_bound_at = -1; state->vbo_bound_done = FALSE; } + + if (!device->prefer_user_constbuf) { + /* fill cb_vs and cb_ps for the non user constbuf path */ + struct pipe_constant_buffer cb; + + cb.buffer_offset = 0; + cb.buffer_size = device->vs_const_size; + cb.buffer = device->constbuf_vs; + cb.user_buffer = NULL; + state->pipe.cb_vs = cb; + + cb.buffer_size = device->ps_const_size; + cb.buffer = device->constbuf_ps; + state->pipe.cb_ps = cb; + + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; + } } void @@ -1353,15 +1466,15 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_ZFUNC] = NINE_STATE_DSA, [D3DRS_ALPHAREF] = NINE_STATE_DSA, [D3DRS_ALPHAFUNC] = NINE_STATE_DSA, - [D3DRS_DITHERENABLE] = NINE_STATE_RASTERIZER, + [D3DRS_DITHERENABLE] = NINE_STATE_BLEND, [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND, - [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER, + [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING, - [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER, - [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER, - [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER, - [D3DRS_FOGEND] = NINE_STATE_FF_OTHER, - [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER, + [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, + [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER, [D3DRS_STENCILENABLE] = NINE_STATE_DSA, [D3DRS_STENCILFAIL] = NINE_STATE_DSA, @@ -1394,7 +1507,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER, [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER, - [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER, [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER, @@ -1404,7 +1517,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK, [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED, [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED, - [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER, [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER, [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND, [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER, @@ -1446,6 +1559,8 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND }; +/* Misc */ + D3DMATRIX * nine_state_access_transform(struct nine_state *state, D3DTRANSFORMSTATETYPE t, boolean alloc) @@ -1601,4 +1716,3 @@ const char *nine_d3drs_to_string(DWORD State) return "(invalid)"; } } - diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 2bf3f63..b34da70 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -33,8 +33,7 @@ #define NINED3DRS_VSPOINTSIZE (D3DRS_BLENDOPALPHA + 1) #define NINED3DRS_RTMASK (D3DRS_BLENDOPALPHA + 2) -#define NINED3DRS_ZBIASSCALE (D3DRS_BLENDOPALPHA + 3) -#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 4) +#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 3) #define D3DRS_LAST D3DRS_BLENDOPALPHA #define NINED3DRS_LAST NINED3DRS_ALPHACOVERAGE /* 213 */ @@ -67,17 +66,26 @@ #define NINE_STATE_BLEND_COLOR (1 << 16) #define NINE_STATE_STENCIL_REF (1 << 17) #define NINE_STATE_SAMPLE_MASK (1 << 18) -#define NINE_STATE_MISC_CONST (1 << 19) -#define NINE_STATE_FF (0x1f << 20) -#define NINE_STATE_FF_VS (0x17 << 20) -#define NINE_STATE_FF_PS (0x18 << 20) -#define NINE_STATE_FF_LIGHTING (1 << 20) -#define NINE_STATE_FF_MATERIAL (1 << 21) -#define NINE_STATE_FF_VSTRANSF (1 << 22) -#define NINE_STATE_FF_PSSTAGES (1 << 23) -#define NINE_STATE_FF_OTHER (1 << 24) -#define NINE_STATE_ALL 0x1ffffff -#define NINE_STATE_UNHANDLED (1 << 25) +#define NINE_STATE_FF (0x1f << 19) +#define NINE_STATE_FF_VS (0x17 << 19) +#define NINE_STATE_FF_PS (0x18 << 19) +#define NINE_STATE_FF_LIGHTING (1 << 19) +#define NINE_STATE_FF_MATERIAL (1 << 20) +#define NINE_STATE_FF_VSTRANSF (1 << 21) +#define NINE_STATE_FF_PSSTAGES (1 << 22) +#define NINE_STATE_FF_OTHER (1 << 23) +#define NINE_STATE_FOG_SHADER (1 << 24) +#define NINE_STATE_PS1X_SHADER (1 << 25) +#define NINE_STATE_ALL 0x3ffffff +#define NINE_STATE_UNHANDLED (1 << 26) + +#define NINE_STATE_COMMIT_DSA (1 << 0) +#define NINE_STATE_COMMIT_RASTERIZER (1 << 1) +#define NINE_STATE_COMMIT_BLEND (1 << 2) +#define NINE_STATE_COMMIT_CONST_VS (1 << 3) +#define NINE_STATE_COMMIT_CONST_PS (1 << 4) +#define NINE_STATE_COMMIT_VS (1 << 5) +#define NINE_STATE_COMMIT_PS (1 << 6) #define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4 @@ -94,6 +102,8 @@ NINE_MAX_CONST_I * 4 * sizeof(int)) +#define NINE_MAX_TEXTURE_STAGES 8 + #define NINE_MAX_LIGHTS 65536 #define NINE_MAX_LIGHTS_ACTIVE 8 @@ -124,7 +134,6 @@ struct nine_state uint16_t vs_const_b; /* NINE_MAX_CONST_B == 16 */ uint16_t ps_const_b; uint8_t ucp; - boolean srgb; } changed; struct NineSurface9 *rt[NINE_MAX_SIMULTANEOUS_RENDERTARGETS]; @@ -143,13 +152,13 @@ struct nine_state int vs_const_i[NINE_MAX_CONST_I][4]; BOOL vs_const_b[NINE_MAX_CONST_B]; float *vs_lconstf_temp; - uint32_t vs_key; struct NinePixelShader9 *ps; float *ps_const_f; int ps_const_i[NINE_MAX_CONST_I][4]; BOOL ps_const_b[NINE_MAX_CONST_B]; - uint32_t ps_key; + float *ps_lconstf_temp; + uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES]; struct { void *vs; @@ -184,13 +193,9 @@ struct nine_state struct { struct { uint32_t group; - uint32_t tex_stage[NINE_MAX_SAMPLERS][(NINED3DTSS_COUNT + 31) / 32]; + uint32_t tex_stage[NINE_MAX_TEXTURE_STAGES][(NINED3DTSS_COUNT + 31) / 32]; uint32_t transform[(NINED3DTS_COUNT + 31) / 32]; } changed; - struct { - boolean vs_const; - boolean ps_const; - } clobber; D3DMATRIX *transform; /* access only via nine_state_access_transform */ unsigned num_transforms; @@ -205,8 +210,19 @@ struct nine_state D3DMATERIAL9 material; - DWORD tex_stage[NINE_MAX_SAMPLERS][NINED3DTSS_COUNT]; + DWORD tex_stage[NINE_MAX_TEXTURE_STAGES][NINED3DTSS_COUNT]; } ff; + + uint32_t commit; + struct { + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_constant_buffer cb_vs; + struct pipe_constant_buffer cb_ps; + struct pipe_constant_buffer cb_vs_ff; + struct pipe_constant_buffer cb_ps_ff; + } pipe; }; /* map D3DRS -> NINE_STATE_x @@ -220,8 +236,10 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32]; struct NineDevice9; -boolean nine_update_state(struct NineDevice9 *, uint32_t group_mask); +void nine_update_state_framebuffer(struct NineDevice9 *); +boolean nine_update_state(struct NineDevice9 *); +void nine_state_restore_non_cso(struct NineDevice9 *device); void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *, boolean is_reset); void nine_state_clear(struct nine_state *, const boolean device); diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 3f176a3..42bc349 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -46,7 +46,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, return hr; if (cso) { - This->variant.cso = cso; + This->ff_cso = cso; return D3D_OK; } device = This->base.device; @@ -57,6 +57,8 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_ps_const_f) / 16; info.sampler_mask_shadow = 0x0; info.sampler_ps1xtypes = 0x0; + info.fog_enable = 0; + info.projected = 0; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -69,9 +71,13 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, This->byte_code.size = info.byte_size; This->variant.cso = info.cso; + This->last_cso = info.cso; + This->last_key = 0; + This->sampler_mask = info.sampler_mask; This->rt_mask = info.rt_mask; This->const_used_size = info.const_used_size; + This->bumpenvmat_needed = info.bumpenvmat_needed; /* no constant relative addressing for ps */ assert(info.lconstf.data == NULL); assert(info.lconstf.ranges == NULL); @@ -82,11 +88,12 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, void NinePixelShader9_dtor( struct NinePixelShader9 *This ) { - DBG("This=%p cso=%p\n", This, This->variant.cso); + DBG("This=%p\n", This); if (This->base.device) { struct pipe_context *pipe = This->base.device->pipe; - struct nine_shader_variant *var = &This->variant; + struct nine_shader_variant64 *var = &This->variant; + do { if (var->cso) { if (This->base.device->state.cso.ps == var->cso) @@ -95,8 +102,14 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This ) } var = var->next; } while (var); + + if (This->ff_cso) { + if (This->ff_cso == This->base.device->state.cso.ps) + pipe->bind_fs_state(pipe, NULL); + pipe->delete_fs_state(pipe, This->ff_cso); + } } - nine_shader_variants_free(&This->variant); + nine_shader_variants_free64(&This->variant); FREE((void *)This->byte_code.tokens); /* const_cast */ @@ -124,10 +137,16 @@ NinePixelShader9_GetFunction( struct NinePixelShader9 *This, } void * -NinePixelShader9_GetVariant( struct NinePixelShader9 *This, - uint32_t key ) +NinePixelShader9_GetVariant( struct NinePixelShader9 *This ) { - void *cso = nine_shader_variant_get(&This->variant, key); + void *cso; + uint64_t key; + + key = This->next_key; + if (key == This->last_key) + return This->last_cso; + + cso = nine_shader_variant_get64(&This->variant, key); if (!cso) { struct NineDevice9 *device = This->base.device; struct nine_shader_info info; @@ -139,13 +158,20 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This, info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = key & 0xffff; info.sampler_ps1xtypes = key; + info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; + info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE]; + info.projected = (key >> 48) & 0xffff; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) return NULL; - nine_shader_variant_add(&This->variant, key, info.cso); + nine_shader_variant_add64(&This->variant, key, info.cso); cso = info.cso; } + + This->last_key = key; + This->last_cso = cso; + return cso; } diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index 6dad1d1..e09009f 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -25,13 +25,16 @@ #include "iunknown.h" #include "nine_shader.h" +#include "nine_state.h" +#include "basetexture9.h" +#include "nine_ff.h" struct nine_lconstf; struct NinePixelShader9 { struct NineUnknown base; - struct nine_shader_variant variant; + struct nine_shader_variant64 variant; struct { const DWORD *tokens; @@ -41,11 +44,17 @@ struct NinePixelShader9 unsigned const_used_size; /* in bytes */ + uint8_t bumpenvmat_needed; uint16_t sampler_mask; - uint16_t sampler_mask_shadow; uint8_t rt_mask; uint64_t ff_key[6]; + void *ff_cso; + + uint64_t last_key; + void *last_cso; + + uint64_t next_key; }; static inline struct NinePixelShader9 * NinePixelShader9( void *data ) @@ -53,9 +62,49 @@ NinePixelShader9( void *data ) return (struct NinePixelShader9 *)data; } +static inline BOOL +NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps, + struct nine_state *state ) +{ + uint16_t samplers_shadow; + uint32_t samplers_ps1_types; + uint16_t projected; + uint64_t key; + BOOL res; + + if (unlikely(ps->byte_code.version < 0x20)) { + /* no depth textures, but variable targets */ + uint32_t m = ps->sampler_mask; + samplers_ps1_types = 0; + while (m) { + int s = ffs(m) - 1; + m &= ~(1 << s); + samplers_ps1_types |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); + } + key = samplers_ps1_types; + } else { + samplers_shadow = (uint16_t)((state->samplers_shadow & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0)); + key = samplers_shadow & ps->sampler_mask; + } + + if (ps->byte_code.version < 0x30) { + key |= ((uint64_t)state->rs[D3DRS_FOGENABLE]) << 32; + key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33; + } + + if (unlikely(ps->byte_code.version < 0x14)) { + projected = nine_ff_get_projected_key(state); + key |= ((uint64_t) projected) << 48; + } + + res = ps->last_key != key; + if (res) + ps->next_key = key; + return res; +} + void * -NinePixelShader9_GetVariant( struct NinePixelShader9 *vs, - uint32_t key ); +NinePixelShader9_GetVariant( struct NinePixelShader9 *ps ); /*** public ***/ diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c index bbc8320..6d91533 100644 --- a/src/gallium/state_trackers/nine/resource9.c +++ b/src/gallium/state_trackers/nine/resource9.c @@ -161,20 +161,22 @@ NineResource9_GetPrivateData( struct NineResource9 *This, DWORD *pSizeOfData ) { struct pheader *header; + DWORD sizeofdata; DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n", This, refguid, pData, pSizeOfData); - user_assert(pSizeOfData, E_POINTER); - header = util_hash_table_get(This->pdata, refguid); if (!header) { return D3DERR_NOTFOUND; } + user_assert(pSizeOfData, E_POINTER); + sizeofdata = *pSizeOfData; + *pSizeOfData = header->size; + if (!pData) { - *pSizeOfData = header->size; return D3D_OK; } - if (*pSizeOfData < header->size) { + if (sizeofdata < header->size) { return D3DERR_MOREDATA; } @@ -206,10 +208,13 @@ DWORD WINAPI NineResource9_SetPriority( struct NineResource9 *This, DWORD PriorityNew ) { - DWORD prev = This->priority; - + DWORD prev; DBG("This=%p, PriorityNew=%d\n", This, PriorityNew); + if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE) + return 0; + + prev = This->priority; This->priority = PriorityNew; return prev; } @@ -217,6 +222,9 @@ NineResource9_SetPriority( struct NineResource9 *This, DWORD WINAPI NineResource9_GetPriority( struct NineResource9 *This ) { + if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE) + return 0; + return This->priority; } diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 032b9ff..6d6e1be 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -251,7 +251,7 @@ nine_state_copy_common(struct nine_state *dst, dst->ff.material = src->ff.material; if (mask->changed.group & NINE_STATE_FF_PSSTAGES) { - for (s = 0; s < NINE_MAX_SAMPLERS; ++s) { + for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) { for (i = 0; i < NINED3DTSS_COUNT; ++i) if (mask->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32))) dst->ff.tex_stage[s][i] = src->ff.tex_stage[s][i]; diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 7533cb3..14c1ce9 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -104,11 +104,11 @@ NineSurface9_ctor( struct NineSurface9 *This, /* Ram buffer with no parent. Has to allocate the resource itself */ if (!pResource && !pContainer) { assert(!user_buffer); - This->data = MALLOC( + This->data = align_malloc( nine_format_get_level_alloc_size(This->base.info.format, pDesc->Width, pDesc->Height, - 0)); + 0), 32); if (!This->data) return E_OUTOFMEMORY; } @@ -273,7 +273,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This, This->texture == D3DRTYPE_CUBETEXTURE || This->texture == D3DRTYPE_TEXTURE); - if (This->base.pool != D3DPOOL_MANAGED) + if (This->base.pool == D3DPOOL_DEFAULT) return; /* Add a dirty rect to level 0 of the parent texture */ @@ -287,7 +287,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This, NineTexture9(This->base.base.container); NineTexture9_AddDirtyRect(tex, &dirty_rect); - } else { /* This->texture == D3DRTYPE_CUBETEXTURE */ + } else if (This->texture == D3DRTYPE_CUBETEXTURE) { struct NineCubeTexture9 *ctex = NineCubeTexture9(This->base.base.container); @@ -323,6 +323,13 @@ NineSurface9_LockRect( struct NineSurface9 *This, nine_D3DLOCK_to_str(Flags)); NineSurface9_Dump(This); + /* check if it's already locked */ + user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); + + /* set pBits to NULL after lock_count check */ + user_assert(pLockedRect, E_POINTER); + pLockedRect->pBits = NULL; + #ifdef NINE_STRICT user_assert(This->base.pool != D3DPOOL_DEFAULT || (resource && (resource->flags & NINE_RESOURCE_FLAG_LOCKABLE)), @@ -337,19 +344,17 @@ NineSurface9_LockRect( struct NineSurface9 *This, user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)), D3DERR_INVALIDCALL); - /* check if it's already locked */ - user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); - user_assert(pLockedRect, E_POINTER); - user_assert(This->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); - if (pRect && This->base.pool == D3DPOOL_DEFAULT && - util_format_is_compressed(This->base.info.format)) { + if (pRect && This->desc.Pool == D3DPOOL_DEFAULT && + compressed_format (This->desc.Format)) { const unsigned w = util_format_get_blockwidth(This->base.info.format); const unsigned h = util_format_get_blockheight(This->base.info.format); - user_assert(!(pRect->left % w) && !(pRect->right % w) && - !(pRect->top % h) && !(pRect->bottom % h), + user_assert((pRect->left == 0 && pRect->right == This->desc.Width && + pRect->top == 0 && pRect->bottom == This->desc.Height) || + (!(pRect->left % w) && !(pRect->right % w) && + !(pRect->top % h) && !(pRect->bottom % h)), D3DERR_INVALIDCALL); } @@ -363,13 +368,9 @@ NineSurface9_LockRect( struct NineSurface9 *This, usage |= PIPE_TRANSFER_DONTBLOCK; if (pRect) { + /* Windows XP accepts invalid locking rectangles, Windows 7 rejects + * them. Use Windows XP behaviour for now. */ rect_to_pipe_box(&box, pRect); - if (u_box_clip_2d(&box, &box, This->desc.Width, - This->desc.Height) < 0) { - DBG("pRect clipped by Width=%u Height=%u\n", - This->desc.Width, This->desc.Height); - return D3DERR_INVALIDCALL; - } } else { u_box_origin_2d(This->desc.Width, This->desc.Height, &box); } @@ -463,140 +464,92 @@ IDirect3DSurface9Vtbl NineSurface9_vtable = { (void *)NineSurface9_ReleaseDC }; -HRESULT -NineSurface9_CopySurface( struct NineSurface9 *This, - struct NineSurface9 *From, - const POINT *pDestPoint, - const RECT *pSourceRect ) +/* When this function is called, we have already checked + * The copy regions fit the surfaces */ +void +NineSurface9_CopyMemToDefault( struct NineSurface9 *This, + struct NineSurface9 *From, + const POINT *pDestPoint, + const RECT *pSourceRect ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *r_dst = This->base.resource; - struct pipe_resource *r_src = From->base.resource; - struct pipe_transfer *transfer; - struct pipe_box src_box; struct pipe_box dst_box; - uint8_t *p_dst; const uint8_t *p_src; + int src_x, src_y, dst_x, dst_y, copy_width, copy_height; - DBG("This=%p From=%p pDestPoint=%p pSourceRect=%p\n", - This, From, pDestPoint, pSourceRect); - - assert(This->base.pool != D3DPOOL_MANAGED && - From->base.pool != D3DPOOL_MANAGED); + assert(This->base.pool == D3DPOOL_DEFAULT && + From->base.pool == D3DPOOL_SYSTEMMEM); - user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); + if (pDestPoint) { + dst_x = pDestPoint->x; + dst_y = pDestPoint->y; + } else { + dst_x = 0; + dst_y = 0; + } - dst_box.x = pDestPoint ? pDestPoint->x : 0; - dst_box.y = pDestPoint ? pDestPoint->y : 0; + if (pSourceRect) { + src_x = pSourceRect->left; + src_y = pSourceRect->top; + copy_width = pSourceRect->right - pSourceRect->left; + copy_height = pSourceRect->bottom - pSourceRect->top; + } else { + src_x = 0; + src_y = 0; + copy_width = From->desc.Width; + copy_height = From->desc.Height; + } - user_assert(dst_box.x >= 0 && - dst_box.y >= 0, D3DERR_INVALIDCALL); + u_box_2d_zslice(dst_x, dst_y, This->layer, + copy_width, copy_height, &dst_box); - dst_box.z = This->layer; - src_box.z = From->layer; + p_src = NineSurface9_GetSystemMemPointer(From, src_x, src_y); - dst_box.depth = 1; - src_box.depth = 1; + pipe->transfer_inline_write(pipe, r_dst, This->level, + 0, /* WRITE|DISCARD are implicit */ + &dst_box, p_src, From->stride, 0); - if (pSourceRect) { - /* make sure it doesn't range outside the source surface */ - user_assert(pSourceRect->left >= 0 && - pSourceRect->right <= From->desc.Width && - pSourceRect->top >= 0 && - pSourceRect->bottom <= From->desc.Height, - D3DERR_INVALIDCALL); - if (rect_to_pipe_box_xy_only_clamp(&src_box, pSourceRect)) - return D3D_OK; - } else { - src_box.x = 0; - src_box.y = 0; - src_box.width = From->desc.Width; - src_box.height = From->desc.Height; - } + NineSurface9_MarkContainerDirty(This); +} - /* limits */ - dst_box.width = This->desc.Width - dst_box.x; - dst_box.height = This->desc.Height - dst_box.y; +void +NineSurface9_CopyDefaultToMem( struct NineSurface9 *This, + struct NineSurface9 *From ) +{ + struct pipe_context *pipe = This->pipe; + struct pipe_resource *r_src = From->base.resource; + struct pipe_transfer *transfer; + struct pipe_box src_box; + uint8_t *p_dst; + const uint8_t *p_src; - user_assert(src_box.width <= dst_box.width && - src_box.height <= dst_box.height, D3DERR_INVALIDCALL); + assert(This->base.pool == D3DPOOL_SYSTEMMEM && + From->base.pool == D3DPOOL_DEFAULT); - dst_box.width = src_box.width; - dst_box.height = src_box.height; + assert(This->desc.Width == From->desc.Width); + assert(This->desc.Height == From->desc.Height); - /* check source block align for compressed textures */ - if (util_format_is_compressed(From->base.info.format) && - ((src_box.width != From->desc.Width) || - (src_box.height != From->desc.Height))) { - const unsigned w = util_format_get_blockwidth(From->base.info.format); - const unsigned h = util_format_get_blockheight(From->base.info.format); - user_assert(!(src_box.width % w) && - !(src_box.height % h), - D3DERR_INVALIDCALL); - } + u_box_origin_2d(This->desc.Width, This->desc.Height, &src_box); + src_box.z = From->layer; - /* check destination block align for compressed textures */ - if (util_format_is_compressed(This->base.info.format) && - ((dst_box.width != This->desc.Width) || - (dst_box.height != This->desc.Height) || - dst_box.x != 0 || - dst_box.y != 0)) { - const unsigned w = util_format_get_blockwidth(This->base.info.format); - const unsigned h = util_format_get_blockheight(This->base.info.format); - user_assert(!(dst_box.x % w) && !(dst_box.width % w) && - !(dst_box.y % h) && !(dst_box.height % h), - D3DERR_INVALIDCALL); - } + p_src = pipe->transfer_map(pipe, r_src, From->level, + PIPE_TRANSFER_READ, + &src_box, &transfer); + p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - if (r_dst && r_src) { - pipe->resource_copy_region(pipe, - r_dst, This->level, - dst_box.x, dst_box.y, dst_box.z, - r_src, From->level, - &src_box); - } else - if (r_dst) { - p_src = NineSurface9_GetSystemMemPointer(From, src_box.x, src_box.y); - - pipe->transfer_inline_write(pipe, r_dst, This->level, - 0, /* WRITE|DISCARD are implicit */ - &dst_box, p_src, From->stride, 0); - } else - if (r_src) { - p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - - p_src = pipe->transfer_map(pipe, r_src, From->level, - PIPE_TRANSFER_READ, - &src_box, &transfer); - if (!p_src) - return D3DERR_DRIVERINTERNALERROR; - - util_copy_rect(p_dst, This->base.info.format, - This->stride, dst_box.x, dst_box.y, - dst_box.width, dst_box.height, - p_src, - transfer->stride, src_box.x, src_box.y); - - pipe->transfer_unmap(pipe, transfer); - } else { - p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - p_src = NineSurface9_GetSystemMemPointer(From, 0, 0); - - util_copy_rect(p_dst, This->base.info.format, - This->stride, dst_box.x, dst_box.y, - dst_box.width, dst_box.height, - p_src, - From->stride, src_box.x, src_box.y); - } + assert (p_src && p_dst); - if (This->base.pool == D3DPOOL_DEFAULT) - NineSurface9_MarkContainerDirty(This); - if (!r_dst && This->base.resource) - NineSurface9_AddDirtyRect(This, &dst_box); + util_copy_rect(p_dst, This->base.info.format, + This->stride, 0, 0, + This->desc.Width, This->desc.Height, + p_src, + transfer->stride, 0, 0); - return D3D_OK; + pipe->transfer_unmap(pipe, transfer); } + /* Gladly, rendering to a MANAGED surface is not permitted, so we will * never have to do the reverse, i.e. download the surface. */ diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h index 73092ab..76156ae 100644 --- a/src/gallium/state_trackers/nine/surface9.h +++ b/src/gallium/state_trackers/nine/surface9.h @@ -125,11 +125,15 @@ HRESULT NineSurface9_UploadSelf( struct NineSurface9 *This, const struct pipe_box *damaged ); -HRESULT -NineSurface9_CopySurface( struct NineSurface9 *This, - struct NineSurface9 *From, - const POINT *pDestPoint, - const RECT *pSourceRect ); +void +NineSurface9_CopyMemToDefault( struct NineSurface9 *This, + struct NineSurface9 *From, + const POINT *pDestPoint, + const RECT *pSourceRect ); + +void +NineSurface9_CopyDefaultToMem( struct NineSurface9 *This, + struct NineSurface9 *From ); static inline boolean NineSurface9_IsOffscreenPlain (struct NineSurface9 *This ) diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index a62e6ad..3f5be26 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -184,7 +184,9 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, /* Note: It is the role of the backend to fill if necessary * BackBufferWidth and BackBufferHeight */ - ID3DPresent_SetPresentParameters(This->present, pParams, This->mode); + hr = ID3DPresent_SetPresentParameters(This->present, pParams, This->mode); + if (hr != D3D_OK) + return hr; /* When we have flip behaviour, d3d9 expects we get back the screen buffer when we flip. * Here we don't get back the initial content of the screen. To emulate the behaviour @@ -575,9 +577,10 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; - ID3DPresent_GetCursorPos(This->present, &device->cursor.pos); - - /* NOTE: blit messes up when box.x + box.width < 0, fix driver */ + /* NOTE: blit messes up when box.x + box.width < 0, fix driver + * NOTE2: device->cursor.pos contains coordinates relative to the screen. + * This happens to be also the position of the cursor when we are fullscreen. + * We don't use sw cursor for Windowed mode */ blit.dst.box.x = MAX2(device->cursor.pos.x, 0) - device->cursor.hotspot.x; blit.dst.box.y = MAX2(device->cursor.pos.y, 0) - device->cursor.hotspot.y; blit.dst.box.width = blit.src.box.width; @@ -587,13 +590,14 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r blit.src.box.width, blit.src.box.height, blit.dst.box.x, blit.dst.box.y); + blit.alpha_blend = TRUE; This->pipe->blit(This->pipe, &blit); } if (device->hud && resource) { hud_draw(device->hud, resource); /* XXX: no offset */ /* HUD doesn't clobber stipple */ - NineDevice9_RestoreNonCSOState(device, ~0x2); + nine_state_restore_non_cso(device); } } @@ -704,6 +708,7 @@ present( struct NineSwapChain9 *This, blit.mask = PIPE_MASK_RGBA; blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; + blit.alpha_blend = FALSE; This->pipe->blit(This->pipe, &blit); } @@ -835,7 +840,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]); This->base.device->state.changed.group |= NINE_STATE_FB; - nine_update_state(This->base.device, NINE_STATE_FB); + nine_update_state_framebuffer(This->base.device); return hr; } @@ -856,6 +861,8 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, DBG("GetFrontBufferData: This=%p pDestSurface=%p\n", This, pDestSurface); + user_assert(dest_surface->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL); + width = dest_surface->desc.Width; height = dest_surface->desc.Height; @@ -870,7 +877,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, desc.MultiSampleQuality = 0; desc.Width = width; desc.Height = height; - /* NineSurface9_CopySurface needs same format. */ + /* NineSurface9_CopyDefaultToMem needs same format. */ desc.Format = dest_surface->desc.Format; desc.Usage = D3DUSAGE_RENDERTARGET; hr = NineSurface9_new(pDevice, NineUnknown(This), temp_resource, NULL, 0, @@ -883,7 +890,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, ID3DPresent_FrontBufferCopy(This->present, temp_handle); - NineSurface9_CopySurface(dest_surface, temp_surface, NULL, NULL); + NineSurface9_CopyDefaultToMem(dest_surface, temp_surface); ID3DPresent_DestroyD3DWindowBuffer(This->present, temp_handle); NineUnknown_Destroy(NineUnknown(temp_surface)); diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index 5900e76..bc325c1 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -101,6 +101,13 @@ NineTexture9_ctor( struct NineTexture9 *This, if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + info->screen = screen; info->target = PIPE_TEXTURE_2D; info->format = pf; @@ -152,10 +159,10 @@ NineTexture9_ctor( struct NineTexture9 *This, * apps access sublevels of texture even if they locked only first * level) */ level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1)); - user_buffer = MALLOC( + user_buffer = align_malloc( nine_format_get_size_and_offsets(pf, level_offsets, Width, Height, - info->last_level)); + info->last_level), 32); This->managed_buffer = user_buffer; if (!This->managed_buffer) return E_OUTOFMEMORY; @@ -202,6 +209,9 @@ NineTexture9_ctor( struct NineTexture9 *This, return hr; } + /* Textures start initially dirty */ + This->dirty_rect.width = Width; + This->dirty_rect.height = Height; This->dirty_rect.depth = 1; /* widht == 0 means empty, depth stays 1 */ if (pSharedHandle && !*pSharedHandle) {/* Pool == D3DPOOL_SYSTEMMEM */ @@ -219,7 +229,8 @@ NineTexture9_dtor( struct NineTexture9 *This ) if (This->surfaces) { /* The surfaces should have 0 references and be unbound now. */ for (l = 0; l <= This->base.base.info.last_level; ++l) - NineUnknown_Destroy(&This->surfaces[l]->base.base); + if (This->surfaces[l]) + NineUnknown_Destroy(&This->surfaces[l]->base.base); FREE(This->surfaces); } @@ -295,18 +306,22 @@ NineTexture9_AddDirtyRect( struct NineTexture9 *This, pDirtyRect ? pDirtyRect->left : 0, pDirtyRect ? pDirtyRect->top : 0, pDirtyRect ? pDirtyRect->right : 0, pDirtyRect ? pDirtyRect->bottom : 0); - /* Tracking dirty regions on DEFAULT or SYSTEMMEM resources is pointless, + /* Tracking dirty regions on DEFAULT resources is pointless, * because we always write to the final storage. Just marked it dirty in * case we need to generate mip maps. */ - if (This->base.base.pool != D3DPOOL_MANAGED) { - if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) + if (This->base.base.pool == D3DPOOL_DEFAULT) { + if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) { This->base.dirty_mip = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyRect) { u_box_origin_2d(This->base.base.info.width0, diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index bbd5ce9..fdfb79a 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -48,9 +48,10 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, return hr; if (cso) { - This->variant.cso = cso; + This->ff_cso = cso; return D3D_OK; } + device = This->base.device; info.type = PIPE_SHADER_VERTEX; @@ -59,6 +60,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.sampler_mask_shadow = 0x0; info.sampler_ps1xtypes = 0x0; + info.fog_enable = 0; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -71,6 +73,9 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, This->byte_code.size = info.byte_size; This->variant.cso = info.cso; + This->last_cso = info.cso; + This->last_key = 0; + This->const_used_size = info.const_used_size; This->lconstf = info.lconstf; This->sampler_mask = info.sampler_mask; @@ -87,11 +92,12 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, void NineVertexShader9_dtor( struct NineVertexShader9 *This ) { - DBG("This=%p cso=%p\n", This, This->variant.cso); + DBG("This=%p\n", This); if (This->base.device) { struct pipe_context *pipe = This->base.device->pipe; struct nine_shader_variant *var = &This->variant; + do { if (var->cso) { if (This->base.device->state.cso.vs == var->cso) @@ -100,6 +106,12 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) } var = var->next; } while (var); + + if (This->ff_cso) { + if (This->ff_cso == This->base.device->state.cso.vs) + pipe->bind_vs_state(pipe, NULL); + pipe->delete_vs_state(pipe, This->ff_cso); + } } nine_shader_variants_free(&This->variant); @@ -130,10 +142,16 @@ NineVertexShader9_GetFunction( struct NineVertexShader9 *This, } void * -NineVertexShader9_GetVariant( struct NineVertexShader9 *This, - uint32_t key ) +NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) { - void *cso = nine_shader_variant_get(&This->variant, key); + void *cso; + uint32_t key; + + key = This->next_key; + if (key == This->last_key) + return This->last_cso; + + cso = nine_shader_variant_get(&This->variant, key); if (!cso) { struct NineDevice9 *device = This->base.device; struct nine_shader_info info; @@ -144,6 +162,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = key & 0xf; + info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) @@ -151,6 +170,10 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This, nine_shader_variant_add(&This->variant, key, info.cso); cso = info.cso; } + + This->last_key = key; + This->last_cso = cso; + return cso; } diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h index 66c602c..15c3f4f 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.h +++ b/src/gallium/state_trackers/nine/vertexshader9.h @@ -25,6 +25,7 @@ #include "iunknown.h" #include "nine_shader.h" +#include "nine_state.h" struct NineVertexShader9 { @@ -43,7 +44,6 @@ struct NineVertexShader9 } byte_code; uint8_t sampler_mask; - uint8_t sampler_mask_shadow; boolean position_t; /* if true, disable vport transform */ boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */ @@ -54,7 +54,13 @@ struct NineVertexShader9 const struct pipe_stream_output_info *so; - uint64_t ff_key[2]; + uint64_t ff_key[3]; + void *ff_cso; + + uint32_t last_key; + void *last_cso; + + uint32_t next_key; }; static inline struct NineVertexShader9 * NineVertexShader9( void *data ) @@ -62,9 +68,29 @@ NineVertexShader9( void *data ) return (struct NineVertexShader9 *)data; } +static inline BOOL +NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs, + struct nine_state *state ) +{ + uint8_t samplers_shadow; + uint32_t key; + BOOL res; + + samplers_shadow = (uint8_t)((state->samplers_shadow & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0)); + samplers_shadow &= vs->sampler_mask; + key = samplers_shadow; + + if (vs->byte_code.version < 0x30) + key |= state->rs[D3DRS_FOGENABLE] << 8; + + res = vs->last_key != key; + if (res) + vs->next_key = key; + return res; +} + void * -NineVertexShader9_GetVariant( struct NineVertexShader9 *vs, - uint32_t key ); +NineVertexShader9_GetVariant( struct NineVertexShader9 *vs ); /*** public ***/ diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 4dfc559..0b90056 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -23,6 +23,7 @@ #include "device9.h" #include "volume9.h" #include "basetexture9.h" /* for marking dirty */ +#include "volumetexture9.h" #include "nine_helpers.h" #include "nine_pipe.h" #include "nine_dump.h" @@ -43,7 +44,7 @@ NineVolume9_AllocateData( struct NineVolume9 *This ) DBG("(%p(This=%p),level=%u) Allocating 0x%x bytes of system memory.\n", This->base.container, This, This->level, size); - This->data = (uint8_t *)MALLOC(size); + This->data = (uint8_t *)align_malloc(size, 32); if (!This->data) return E_OUTOFMEMORY; return D3D_OK; @@ -182,47 +183,23 @@ NineVolume9_GetDesc( struct NineVolume9 *This, return D3D_OK; } -static inline boolean -NineVolume9_IsDirty(struct NineVolume9 *This) -{ - return This->dirty_box[0].width != 0; -} - inline void NineVolume9_AddDirtyRegion( struct NineVolume9 *This, const struct pipe_box *box ) { - struct pipe_box cover_a, cover_b; - float vol[2]; + D3DBOX dirty_region; + struct NineVolumeTexture9 *tex = NineVolumeTexture9(This->base.container); if (!box) { - u_box_3d(0, 0, 0, This->desc.Width, This->desc.Height, - This->desc.Depth, &This->dirty_box[0]); - memset(&This->dirty_box[1], 0, sizeof(This->dirty_box[1])); - return; - } - if (!This->dirty_box[0].width) { - This->dirty_box[0] = *box; - return; - } - - u_box_union_3d(&cover_a, &This->dirty_box[0], box); - vol[0] = u_box_volume_3d(&cover_a); - - if (This->dirty_box[1].width == 0) { - vol[1] = u_box_volume_3d(&This->dirty_box[0]); - if (vol[0] > (vol[1] * 1.5f)) - This->dirty_box[1] = *box; - else - This->dirty_box[0] = cover_a; + NineVolumeTexture9_AddDirtyBox(tex, NULL); } else { - u_box_union_3d(&cover_b, &This->dirty_box[1], box); - vol[1] = u_box_volume_3d(&cover_b); - - if (vol[0] > vol[1]) - This->dirty_box[1] = cover_b; - else - This->dirty_box[0] = cover_a; + dirty_region.Left = box->x << This->level_actual; + dirty_region.Top = box->y << This->level_actual; + dirty_region.Front = box->z << This->level_actual; + dirty_region.Right = dirty_region.Left + (box->width << This->level_actual); + dirty_region.Bottom = dirty_region.Top + (box->height << This->level_actual); + dirty_region.Back = dirty_region.Front + (box->depth << This->level_actual); + NineVolumeTexture9_AddDirtyBox(tex, &dirty_region); } } @@ -254,21 +231,26 @@ NineVolume9_LockBox( struct NineVolume9 *This, pBox ? pBox->Front : 0, pBox ? pBox->Back : 0, nine_D3DLOCK_to_str(Flags)); + /* check if it's already locked */ + user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); + + /* set pBits to NULL after lock_count check */ + user_assert(pLockedVolume, E_POINTER); + pLockedVolume->pBits = NULL; + user_assert(This->desc.Pool != D3DPOOL_DEFAULT || (This->desc.Usage & D3DUSAGE_DYNAMIC), D3DERR_INVALIDCALL); user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)), D3DERR_INVALIDCALL); - user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); - user_assert(pLockedVolume, E_POINTER); - - if (pBox && This->desc.Pool == D3DPOOL_DEFAULT && - util_format_is_compressed(This->info.format)) { + if (pBox && compressed_format (This->desc.Format)) { /* For volume all pools are checked */ const unsigned w = util_format_get_blockwidth(This->info.format); const unsigned h = util_format_get_blockheight(This->info.format); - user_assert(!(pBox->Left % w) && !(pBox->Right % w) && - !(pBox->Top % h) && !(pBox->Bottom % h), + user_assert((pBox->Left == 0 && pBox->Right == This->desc.Width && + pBox->Top == 0 && pBox->Bottom == This->desc.Height) || + (!(pBox->Left % w) && !(pBox->Right % w) && + !(pBox->Top % h) && !(pBox->Bottom % h)), D3DERR_INVALIDCALL); } @@ -312,8 +294,7 @@ NineVolume9_LockBox( struct NineVolume9 *This, if (!(Flags & (D3DLOCK_NO_DIRTY_UPDATE | D3DLOCK_READONLY))) { NineVolume9_MarkContainerDirty(This); - if (This->desc.Pool == D3DPOOL_MANAGED) - NineVolume9_AddDirtyRegion(This, &box); + NineVolume9_AddDirtyRegion(This, &box); } ++This->lock_count; @@ -333,42 +314,31 @@ NineVolume9_UnlockBox( struct NineVolume9 *This ) return D3D_OK; } - +/* When this function is called, we have already checked + * The copy regions fit the volumes */ HRESULT -NineVolume9_CopyVolume( struct NineVolume9 *This, - struct NineVolume9 *From, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_box *pSrcBox ) +NineVolume9_CopyMemToDefault( struct NineVolume9 *This, + struct NineVolume9 *From, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_box *pSrcBox ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *r_dst = This->resource; - struct pipe_resource *r_src = From->resource; - struct pipe_transfer *transfer; struct pipe_box src_box; struct pipe_box dst_box; - uint8_t *p_dst; const uint8_t *p_src; DBG("This=%p From=%p dstx=%u dsty=%u dstz=%u pSrcBox=%p\n", This, From, dstx, dsty, dstz, pSrcBox); - assert(This->desc.Pool != D3DPOOL_MANAGED && - From->desc.Pool != D3DPOOL_MANAGED); - user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); + assert(This->desc.Pool == D3DPOOL_DEFAULT && + From->desc.Pool == D3DPOOL_SYSTEMMEM); dst_box.x = dstx; dst_box.y = dsty; dst_box.z = dstz; if (pSrcBox) { - /* make sure it doesn't range outside the source volume */ - user_assert(pSrcBox->x >= 0 && - (pSrcBox->width - pSrcBox->x) <= From->desc.Width && - pSrcBox->y >= 0 && - (pSrcBox->height - pSrcBox->y) <= From->desc.Height && - pSrcBox->z >= 0 && - (pSrcBox->depth - pSrcBox->z) <= From->desc.Depth, - D3DERR_INVALIDCALL); src_box = *pSrcBox; } else { src_box.x = 0; @@ -378,101 +348,54 @@ NineVolume9_CopyVolume( struct NineVolume9 *This, src_box.height = From->desc.Height; src_box.depth = From->desc.Depth; } - /* limits */ - dst_box.width = This->desc.Width - dst_box.x; - dst_box.height = This->desc.Height - dst_box.y; - dst_box.depth = This->desc.Depth - dst_box.z; - - user_assert(src_box.width <= dst_box.width && - src_box.height <= dst_box.height && - src_box.depth <= dst_box.depth, D3DERR_INVALIDCALL); dst_box.width = src_box.width; dst_box.height = src_box.height; dst_box.depth = src_box.depth; - if (r_dst && r_src) { - pipe->resource_copy_region(pipe, - r_dst, This->level, - dst_box.x, dst_box.y, dst_box.z, - r_src, From->level, - &src_box); - } else - if (r_dst) { - p_src = NineVolume9_GetSystemMemPointer(From, - src_box.x, src_box.y, src_box.z); - - pipe->transfer_inline_write(pipe, r_dst, This->level, - 0, /* WRITE|DISCARD are implicit */ - &dst_box, p_src, - From->stride, From->layer_stride); - } else - if (r_src) { - p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0); - p_src = pipe->transfer_map(pipe, r_src, From->level, - PIPE_TRANSFER_READ, - &src_box, &transfer); - if (!p_src) - return D3DERR_DRIVERINTERNALERROR; - - util_copy_box(p_dst, This->info.format, - This->stride, This->layer_stride, - dst_box.x, dst_box.y, dst_box.z, - dst_box.width, dst_box.height, dst_box.depth, - p_src, - transfer->stride, transfer->layer_stride, - src_box.x, src_box.y, src_box.z); + p_src = NineVolume9_GetSystemMemPointer(From, + src_box.x, src_box.y, src_box.z); - pipe->transfer_unmap(pipe, transfer); - } else { - p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0); - p_src = NineVolume9_GetSystemMemPointer(From, 0, 0, 0); - - util_copy_box(p_dst, This->info.format, - This->stride, This->layer_stride, - dst_box.x, dst_box.y, dst_box.z, - dst_box.width, dst_box.height, dst_box.depth, - p_src, - From->stride, From->layer_stride, - src_box.x, src_box.y, src_box.z); - } + pipe->transfer_inline_write(pipe, r_dst, This->level, + 0, /* WRITE|DISCARD are implicit */ + &dst_box, p_src, + From->stride, From->layer_stride); - if (This->desc.Pool == D3DPOOL_DEFAULT) - NineVolume9_MarkContainerDirty(This); - if (!r_dst && This->resource) - NineVolume9_AddDirtyRegion(This, &dst_box); + NineVolume9_MarkContainerDirty(This); return D3D_OK; } HRESULT -NineVolume9_UploadSelf( struct NineVolume9 *This ) +NineVolume9_UploadSelf( struct NineVolume9 *This, + const struct pipe_box *damaged ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *res = This->resource; + struct pipe_box box; uint8_t *ptr; - unsigned i; - DBG("This=%p dirty=%i data=%p res=%p\n", This, NineVolume9_IsDirty(This), + DBG("This=%p damaged=%p data=%p res=%p\n", This, damaged, This->data, res); assert(This->desc.Pool == D3DPOOL_MANAGED); - - if (!NineVolume9_IsDirty(This)) - return D3D_OK; assert(res); - for (i = 0; i < Elements(This->dirty_box); ++i) { - const struct pipe_box *box = &This->dirty_box[i]; - if (box->width == 0) - break; - ptr = NineVolume9_GetSystemMemPointer(This, box->x, box->y, box->z); - - pipe->transfer_inline_write(pipe, res, This->level, - 0, - box, ptr, This->stride, This->layer_stride); + if (damaged) { + box = *damaged; + } else { + box.x = 0; + box.y = 0; + box.z = 0; + box.width = This->desc.Width; + box.height = This->desc.Height; + box.depth = This->desc.Depth; } - NineVolume9_ClearDirtyRegion(This); + + ptr = NineVolume9_GetSystemMemPointer(This, box.x, box.y, box.z); + + pipe->transfer_inline_write(pipe, res, This->level, 0, &box, + ptr, This->stride, This->layer_stride); return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h index fae2431..26ca8a3 100644 --- a/src/gallium/state_trackers/nine/volume9.h +++ b/src/gallium/state_trackers/nine/volume9.h @@ -50,8 +50,6 @@ struct NineVolume9 struct pipe_transfer *transfer; unsigned lock_count; - struct pipe_box dirty_box[2]; - struct pipe_context *pipe; /* for [GS]etPrivateData/FreePrivateData */ @@ -85,20 +83,15 @@ void NineVolume9_AddDirtyRegion( struct NineVolume9 *This, const struct pipe_box *box ); -static inline void -NineVolume9_ClearDirtyRegion( struct NineVolume9 *This ) -{ - memset(&This->dirty_box, 0, sizeof(This->dirty_box)); -} - HRESULT -NineVolume9_CopyVolume( struct NineVolume9 *This, - struct NineVolume9 *From, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_box *pSrcBox ); +NineVolume9_CopyMemToDefault( struct NineVolume9 *This, + struct NineVolume9 *From, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_box *pSrcBox ); HRESULT -NineVolume9_UploadSelf( struct NineVolume9 *This ); +NineVolume9_UploadSelf( struct NineVolume9 *This, + const struct pipe_box *damaged ); /*** Direct3D public ***/ diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 1193e12..e5b2b53 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -64,6 +64,13 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + /* Compressed formats are not compressed on depth component */ + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + info->screen = pParams->device->screen; info->target = PIPE_TEXTURE_3D; info->format = pf; @@ -116,6 +123,9 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, return hr; } + /* Textures start initially dirty */ + NineVolumeTexture9_AddDirtyBox(This, NULL); + return D3D_OK; } @@ -193,12 +203,14 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, { DBG("This=%p pDirtybox=%p\n", This, pDirtyBox); - if (This->base.base.pool != D3DPOOL_MANAGED) { + if (This->base.base.pool == D3DPOOL_DEFAULT) { return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyBox) { This->dirty_box.x = 0; diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am index fe5b0b1..e26ca33 100644 --- a/src/gallium/targets/d3dadapter9/Makefile.am +++ b/src/gallium/targets/d3dadapter9/Makefile.am @@ -54,6 +54,7 @@ pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = d3d.pc d3dadapter9_la_SOURCES = \ + description.c \ getproc.c \ drm.c diff --git a/src/gallium/targets/d3dadapter9/description.c b/src/gallium/targets/d3dadapter9/description.c new file mode 100644 index 0000000..c0a8678 --- /dev/null +++ b/src/gallium/targets/d3dadapter9/description.c @@ -0,0 +1,324 @@ +/* + * Copyright 2015 Patrick Rudolph <siro@das-labor.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include <string.h> +#include "adapter9.h" + +#define DBG_CHANNEL DBG_ADAPTER + +/* prototypes */ +void +d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid, + unsigned fallback_ven, + unsigned fallback_dev, + const char* fallback_name ); +void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid); +void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid); + +enum d3d_vendor_id +{ + HW_VENDOR_SOFTWARE = 0x0000, + HW_VENDOR_AMD = 0x1002, + HW_VENDOR_NVIDIA = 0x10de, + HW_VENDOR_VMWARE = 0x15ad, + HW_VENDOR_INTEL = 0x8086, +}; + +struct card_lookup_table { + const char *mesaname; + const char *d3d9name; +} +cards_amd[] = { + {"HAWAII", "AMD Radeon R9 290"}, + {"KAVERI", "AMD Radeon(TM) R7 Graphics"}, + {"KABINI", "AMD Radeon HD 8400 / R3 Series"}, + {"BONAIRE", "AMD Radeon HD 8770"}, + {"OLAND", "AMD Radeon HD 8670"}, + {"HAINAN", "AMD Radeon HD 8600M Series"}, + {"TAHITI", "AMD Radeon HD 7900 Series"}, + {"PITCAIRN", "AMD Radeon HD 7800 Series"}, + {"CAPE VERDE", "AMD Radeon HD 7700 Series"}, + {"ARUBA", "AMD Radeon HD 7660D"}, + {"CAYMAN", "AMD Radeon HD 6900 Series"}, + {"BARTS", "AMD Radeon HD 6800 Series"}, + {"TURKS", "AMD Radeon HD 6600 Series"}, + {"SUMO2", "AMD Radeon HD 6410D"}, + {"SUMO", "AMD Radeon HD 6550D"}, + {"CAICOS", "AMD Radeon HD 6400 Series"}, + {"PALM", "AMD Radeon HD 6300 series Graphics"}, + {"HEMLOCK", "ATI Radeon HD 5900 Series"}, + {"CYPRESS", "ATI Radeon HD 5800 Series"}, + {"JUNIPER", "ATI Radeon HD 5700 Series"}, + {"REDWOOD", "ATI Radeon HD 5600 Series"}, + {"CEDAR", "ATI Radeon HD 5500 Series"}, + {"R700", "ATI Radeon HD 4800 Series"}, + {"RV790", "ATI Radeon HD 4800 Series"}, + {"RV770", "ATI Radeon HD 4800 Series"}, + {"RV740", "ATI Radeon HD 4700 Series"}, + {"RV730", "ATI Radeon HD 4600 Series"}, + {"RV710", "ATI Radeon HD 4350"}, + {"RS880", "ATI Mobility Radeon HD 4200"}, + {"RS780", "ATI Radeon HD 3200 Graphics"}, + {"R680", "ATI Radeon HD 2900 XT"}, + {"R600", "ATI Radeon HD 2900 XT"}, + {"RV670", "ATI Radeon HD 2900 XT"}, + {"RV635", "ATI Mobility Radeon HD 2600"}, + {"RV630", "ATI Mobility Radeon HD 2600"}, + {"RV620", "ATI Mobility Radeon HD 2350"}, + {"RV610", "ATI Mobility Radeon HD 2350"}, + {"R580", "ATI Radeon X1600 Series"}, + {"R520", "ATI Radeon X1600 Series"}, + {"RV570", "ATI Radeon X1600 Series"}, + {"RV560", "ATI Radeon X1600 Series"}, + {"RV535", "ATI Radeon X1600 Series"}, + {"RV530", "ATI Radeon X1600 Series"}, + {"RV516", "ATI Radeon X700 SE"}, + {"RV515", "ATI Radeon X700 SE"}, + {"R481", "ATI Radeon X700 SE"}, + {"R480", "ATI Radeon X700 SE"}, + {"R430", "ATI Radeon X700 SE"}, + {"R423", "ATI Radeon X700 SE"}, + {"R420", "ATI Radeon X700 SE"}, + {"R410", "ATI Radeon X700 SE"}, + {"RV410", "ATI Radeon X700 SE"}, + {"RS740", "ATI RADEON XPRESS 200M Series"}, + {"RS690", "ATI RADEON XPRESS 200M Series"}, + {"RS600", "ATI RADEON XPRESS 200M Series"}, + {"RS485", "ATI RADEON XPRESS 200M Series"}, + {"RS482", "ATI RADEON XPRESS 200M Series"}, + {"RS480", "ATI RADEON XPRESS 200M Series"}, + {"RS400", "ATI RADEON XPRESS 200M Series"}, + {"R360", "ATI Radeon 9500"}, + {"R350", "ATI Radeon 9500"}, + {"R300", "ATI Radeon 9500"}, + {"RV370", "ATI Radeon 9500"}, + {"RV360", "ATI Radeon 9500"}, + {"RV351", "ATI Radeon 9500"}, + {"RV350", "ATI Radeon 9500"}, +}, +cards_nvidia[] = +{ + {"NV124", "NVIDIA GeForce GTX 970"}, + {"NV117", "NVIDIA GeForce GTX 750"}, + {"NVF1", "NVIDIA GeForce GTX 780 Ti"}, + {"NVF0", "NVIDIA GeForce GTX 780"}, + {"NVE6", "NVIDIA GeForce GTX 770M"}, + {"NVE4", "NVIDIA GeForce GTX 680"}, + {"NVD9", "NVIDIA GeForce GT 520"}, + {"NVCF", "NVIDIA GeForce GTX 550 Ti"}, + {"NVCE", "NVIDIA GeForce GTX 560"}, + {"NVC8", "NVIDIA GeForce GTX 570"}, + {"NVC4", "NVIDIA GeForce GTX 460"}, + {"NVC3", "NVIDIA GeForce GT 440"}, + {"NVC1", "NVIDIA GeForce GT 420"}, + {"NVC0", "NVIDIA GeForce GTX 480"}, + {"NVAF", "NVIDIA GeForce GT 320M"}, + {"NVAC", "NVIDIA GeForce 8200"}, + {"NVAA", "NVIDIA GeForce 8200"}, + {"NVA8", "NVIDIA GeForce 210"}, + {"NVA5", "NVIDIA GeForce GT 220"}, + {"NVA3", "NVIDIA GeForce GT 240"}, + {"NVA0", "NVIDIA GeForce GTX 280"}, + {"NV98", "NVIDIA GeForce 9200"}, + {"NV96", "NVIDIA GeForce 9400 GT"}, + {"NV94", "NVIDIA GeForce 9600 GT"}, + {"NV92", "NVIDIA GeForce 9800 GT"}, + {"NV86", "NVIDIA GeForce 8500 GT"}, + {"NV84", "NVIDIA GeForce 8600 GT"}, + {"NV50", "NVIDIA GeForce 8800 GTX"}, + {"NV68", "NVIDIA GeForce 6200"}, + {"NV67", "NVIDIA GeForce 6200"}, + {"NV63", "NVIDIA GeForce 6200"}, + {"NV4E", "NVIDIA GeForce 6200"}, + {"NV4C", "NVIDIA GeForce 6200"}, + {"NV4B", "NVIDIA GeForce 7600 GT"}, + {"NV4A", "NVIDIA GeForce 6200"}, + {"NV49", "NVIDIA GeForce 7800 GT"}, + {"NV47", "NVIDIA GeForce 7800 GT"}, + {"NV46", "NVIDIA GeForce Go 7400",}, + {"NV45", "NVIDIA GeForce 6800"}, + {"NV44", "NVIDIA GeForce 6200"}, + {"NV43", "NVIDIA GeForce 6600 GT"}, + {"NV42", "NVIDIA GeForce 6800"}, + {"NV41", "NVIDIA GeForce 6800"}, + {"NV40", "NVIDIA GeForce 6800"}, + {"NV38", "NVIDIA GeForce FX 5800"}, + {"NV36", "NVIDIA GeForce FX 5800"}, + {"NV35", "NVIDIA GeForce FX 5800"}, + {"NV34", "NVIDIA GeForce FX 5200"}, + {"NV31", "NVIDIA GeForce FX 5600"}, + {"NV30", "NVIDIA GeForce FX 5800"}, + {"nv28", "NVIDIA GeForce4 Ti 4200"}, + {"nv25", "NVIDIA GeForce4 Ti 4200"}, + {"nv20", "NVIDIA GeForce3"}, + {"nv1F", "NVIDIA GeForce4 MX 460"}, + {"nv1A", "NVIDIA GeForce2 GTS/GeForce2 Pro"}, + {"nv18", "NVIDIA GeForce4 MX 460"}, + {"nv17", "NVIDIA GeForce4 MX 460"}, + {"nv16", "NVIDIA GeForce2 GTS/GeForce2 Pro"}, + {"nv15", "NVIDIA GeForce2 GTS/GeForce2 Pro"}, + {"nv11", "NVIDIA GeForce2 MX/MX 400"}, + {"nv10", "NVIDIA GeForce 256"}, +}, +cards_vmware[] = +{ + {"SVGA3D", "VMware SVGA 3D (Microsoft Corporation - WDDM)"}, +}, +cards_intel[] = +{ + {"Haswell Mobile", "Intel(R) Haswell Mobile"}, + {"Ivybridge Server", "Intel(R) Ivybridge Server"}, + {"Ivybridge Mobile", "Intel(R) Ivybridge Mobile"}, + {"Ivybridge Desktop", "Intel(R) Ivybridge Desktop"}, + {"Sandybridge Server", "Intel(R) Sandybridge Server"}, + {"Sandybridge Mobile", "Intel(R) Sandybridge Mobile"}, + {"Sandybridge Desktop", "Intel(R) Sandybridge Desktop"}, + {"Ironlake Mobile", "Intel(R) Ironlake Mobile"}, + {"Ironlake Desktop", "Intel(R) Ironlake Desktop"}, + {"B43", "Intel(R) B43"}, + {"G41", "Intel(R) G41"}, + {"G45", "Intel(R) G45/G43"}, + {"Q45", "Intel(R) Q45/Q43"}, + {"Integrated Graphics Device", "Intel(R) Integrated Graphics Device"}, + {"GM45", "Mobile Intel(R) GM45 Express Chipset Family"}, + {"965GME", "Intel(R) 965GME"}, + {"965GM", "Mobile Intel(R) 965 Express Chipset Family"}, + {"946GZ", "Intel(R) 946GZ"}, + {"965G", "Intel(R) 965G"}, + {"965Q", "Intel(R) 965Q"}, + {"Pineview M", "Intel(R) IGD"}, + {"Pineview G", "Intel(R) IGD"}, + {"IGD", "Intel(R) IGD"}, + {"Q33", "Intel(R) Q33"}, + {"G33", "Intel(R) G33"}, + {"Q35", "Intel(R) Q35"}, + {"945GME", "Intel(R) 945GME"}, + {"945GM", "Mobile Intel(R) 945GM Express Chipset Family"}, + {"945G", "Intel(R) 945G"}, + {"915GM", "Mobile Intel(R) 915GM/GMS,910GML Express Chipset Family"}, + {"E7221G", "Intel(R) E7221G"}, + {"915G", "Intel(R) 82915G/GV/910GL Express Chipset Family"}, + {"865G", "Intel(R) 82865G Graphics Controller"}, + {"845G", "Intel(R) 845G"}, + {"855GM", "Intel(R) 82852/82855 GM/GME Graphics Controller"}, + {"830M", "Intel(R) 82830M Graphics Controller"}, +}; + +/* override VendorId, DeviceId and Description for unknown vendors */ +void +d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid, + unsigned fallback_ven, + unsigned fallback_dev, + const char* fallback_name ) +{ + if (drvid->VendorId == HW_VENDOR_INTEL || + drvid->VendorId == HW_VENDOR_VMWARE || + drvid->VendorId == HW_VENDOR_AMD || + drvid->VendorId == HW_VENDOR_NVIDIA) + return; + + DBG("unknown vendor 0x4%x, emulating 0x4%x\n", drvid->VendorId, fallback_ven); + drvid->VendorId = fallback_ven; + drvid->DeviceId = fallback_dev; + strncpy(drvid->Description, fallback_name, sizeof(drvid->Description)); +} + +/* fill in driver name and version */ +void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid) { + switch (drvid->VendorId) { + case HW_VENDOR_INTEL: + drvid->DriverVersionLowPart = 0x000A0682; + drvid->DriverVersionHighPart = 0x0006000F; + strncpy(drvid->Driver, "igdumd32.dll", sizeof(drvid->Driver)); + break; + case HW_VENDOR_VMWARE: + drvid->DriverVersionLowPart = 0x0001046E; + drvid->DriverVersionHighPart = 0x0006000E; + strncpy(drvid->Driver, "vm3dum.dll", sizeof(drvid->Driver)); + break; + case HW_VENDOR_AMD: + drvid->DriverVersionLowPart = 0x000A0500; + drvid->DriverVersionHighPart = 0x00060011; + strncpy(drvid->Driver, "atiumdag.dll", sizeof(drvid->Driver)); + break; + case HW_VENDOR_NVIDIA: + drvid->DriverVersionLowPart = 0x000D0FD4; + drvid->DriverVersionHighPart = 0x00060012; + strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver)); + break; + default: + break; + } +} + +/* try to match the device name and override it with Windows like device names */ +void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid) { + unsigned i; + switch (drvid->VendorId) { + case HW_VENDOR_INTEL: + for (i = 0; i < sizeof(cards_intel) / sizeof(cards_intel[0]); i++) { + if (strstr(drvid->Description, cards_intel[i].mesaname)) { + strncpy(drvid->Description, cards_intel[i].d3d9name, sizeof(drvid->Description)); + return; + } + } + /* use a fall-back if nothing matches */ + DBG("Unknown card name %s!\n", drvid->DeviceName); + strncpy(drvid->Description, cards_intel[0].d3d9name, sizeof(drvid->Description)); + break; + case HW_VENDOR_VMWARE: + for (i = 0; i < sizeof(cards_vmware) / sizeof(cards_vmware[0]); i++) { + if (strstr(drvid->Description, cards_vmware[i].mesaname)) { + strncpy(drvid->Description, cards_vmware[i].d3d9name, sizeof(drvid->Description)); + return; + } + } + /* use a fall-back if nothing matches */ + DBG("Unknown card name %s!\n", drvid->DeviceName); + strncpy(drvid->Description, cards_vmware[0].d3d9name, sizeof(drvid->Description)); + break; + case HW_VENDOR_AMD: + for (i = 0; i < sizeof(cards_amd) / sizeof(cards_amd[0]); i++) { + if (strstr(drvid->Description, cards_amd[i].mesaname)) { + strncpy(drvid->Description, cards_amd[i].d3d9name, sizeof(drvid->Description)); + return; + } + } + /* use a fall-back if nothing matches */ + DBG("Unknown card name %s!\n", drvid->DeviceName); + strncpy(drvid->Description, cards_amd[0].d3d9name, sizeof(drvid->Description)); + break; + case HW_VENDOR_NVIDIA: + for (i = 0; i < sizeof(cards_nvidia) / sizeof(cards_nvidia[0]); i++) { + if (strstr(drvid->Description, cards_nvidia[i].mesaname)) { + strncpy(drvid->Description, cards_nvidia[i].d3d9name, sizeof(drvid->Description)); + return; + } + } + /* use a fall-back if nothing matches */ + DBG("Unknown card name %s!\n", drvid->DeviceName); + strncpy(drvid->Description, cards_nvidia[0].d3d9name, sizeof(drvid->Description)); + break; + default: + break; + } +} diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index 680f516..fabc820 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -46,12 +46,6 @@ #define DBG_CHANNEL DBG_ADAPTER -#define VERSION_DWORD(hi, lo) \ - ((DWORD)( \ - ((DWORD)((hi) & 0xFFFF) << 16) | \ - (DWORD)((lo) & 0xFFFF) \ - )) - const char __driConfigOptionsNine[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -63,12 +57,21 @@ DRI_CONF_BEGIN DRI_CONF_SECTION_END DRI_CONF_END; -/* Regarding os versions, we should not define our own as that would simply be - * weird. Defaulting to Win2k/XP seems sane considering the origin of D3D9. The - * driver also defaults to being a generic D3D9 driver, which of course only - * matters if you're actually using the DDI. */ -#define VERSION_HIGH VERSION_DWORD(0x0006, 0x000E) /* winxp, d3d9 */ -#define VERSION_LOW VERSION_DWORD(0x0000, 0x0001) /* version, build */ +/* define fallback value here: NVIDIA GeForce GTX 970 */ +#define FALLBACK_NAME "NV124" +#define FALLBACK_DEVID 0x13C2 +#define FALLBACK_VENID 0x10de + +/* prototypes */ +void +d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid, + unsigned fallback_ven, + unsigned fallback_dev, + const char* fallback_name ); + +void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid); + +void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid); struct d3dadapter9drm_context { @@ -152,9 +155,9 @@ get_bus_info( int fd, *subsysid = 0; *revision = 0; } else { - DBG("Unable to detect card. Fake GTX 680.\n"); - *vendorid = 0x10de; /* NV GTX 680 */ - *deviceid = 0x1180; + DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME); + *vendorid = FALLBACK_VENID; + *deviceid = FALLBACK_DEVID; *subsysid = 0; *revision = 0; } @@ -169,33 +172,23 @@ read_descriptor( struct d3dadapter9_context *ctx, memset(drvid, 0, sizeof(*drvid)); get_bus_info(fd, &drvid->VendorId, &drvid->DeviceId, &drvid->SubSysId, &drvid->Revision); + snprintf(drvid->DeviceName, sizeof(drvid->DeviceName), + "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal)); + strncpy(drvid->Description, ctx->hal->get_name(ctx->hal), + sizeof(drvid->Description)); + + /* choose fall-back vendor if necessary to allow + * the following functions to return sane results */ + d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME); + /* fill in driver name and version info */ + d3d_fill_driver_version(drvid); + /* override Description field with Windows like names */ + d3d_fill_cardname(drvid); + + /* this driver isn't WHQL certified */ + drvid->WHQLLevel = 0; - strncpy(drvid->Driver, "libd3dadapter9.so", sizeof(drvid->Driver)); - strncpy(drvid->DeviceName, ctx->hal->get_name(ctx->hal), 32); - snprintf(drvid->Description, sizeof(drvid->Description), - "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal)); - - drvid->DriverVersionLowPart = VERSION_LOW; - drvid->DriverVersionHighPart = VERSION_HIGH; - - /* To make a pseudo-real GUID we use the PCI bus data and some string */ - drvid->DeviceIdentifier.Data1 = drvid->VendorId; - drvid->DeviceIdentifier.Data2 = drvid->DeviceId; - drvid->DeviceIdentifier.Data3 = drvid->SubSysId; - memcpy(drvid->DeviceIdentifier.Data4, "Gallium3D", 8); - - drvid->WHQLLevel = 1; /* This fakes WHQL validaion */ - - /* XXX Fake NVIDIA binary driver on Windows. - * - * OS version: 4=95/98/NT4, 5=2000, 6=2000/XP, 7=Vista, 8=Win7 - */ - strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver)); - strncpy(drvid->Description, "NVIDIA GeForce GTX 680", sizeof(drvid->Description)); - drvid->DriverVersionLowPart = VERSION_DWORD(12, 6658); /* minor, build */ - drvid->DriverVersionHighPart = VERSION_DWORD(6, 15); /* OS, major */ - drvid->SubSysId = 0; - drvid->Revision = 0; + /* this value is fixed */ drvid->DeviceIdentifier.Data1 = 0xaeb2cdd4; drvid->DeviceIdentifier.Data2 = 0x6e41; drvid->DeviceIdentifier.Data3 = 0x43ea; @@ -207,7 +200,6 @@ read_descriptor( struct d3dadapter9_context *ctx, drvid->DeviceIdentifier.Data4[5] = 0x76; drvid->DeviceIdentifier.Data4[6] = 0x07; drvid->DeviceIdentifier.Data4[7] = 0x81; - drvid->WHQLLevel = 0; } static HRESULT WINAPI diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 7168e1d..a33d7f8 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -77,8 +77,8 @@ gallium_DRIVERS += libmesa_pipe_r600 LOCAL_CFLAGS += -DGALLIUM_R600 endif ifneq ($(filter radeonsi,$(MESA_GPU_DRIVERS)),) -gallium_DRIVERS += libmesa_pipe_radeonsi -LOCAL_SHARED_LIBRARIES += libLLVM +gallium_DRIVERS += libmesa_pipe_radeonsi libmesa_winsys_amdgpu +LOCAL_SHARED_LIBRARIES += libLLVM libdrm_amdgpu LOCAL_CFLAGS += -DGALLIUM_RADEONSI endif gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon diff --git a/src/gallium/winsys/amdgpu/drm/Android.mk b/src/gallium/winsys/amdgpu/drm/Android.mk index 7d507aa..5773234 100644 --- a/src/gallium/winsys/amdgpu/drm/Android.mk +++ b/src/gallium/winsys/amdgpu/drm/Android.mk @@ -30,6 +30,16 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(C_SOURCES) +LOCAL_CFLAGS := \ + $(AMDGPU_CFLAGS) \ + -DBRAHMA_BUILD=1 + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/addrlib \ + $(LOCAL_PATH)/addrlib/core \ + $(LOCAL_PATH)/addrlib/inc/chip/r800 \ + $(LOCAL_PATH)/addrlib/r800/chip + LOCAL_SHARED_LIBRARIES := libdrm libdrm_amdgpu LOCAL_MODULE := libmesa_winsys_amdgpu diff --git a/src/gallium/winsys/amdgpu/drm/Makefile.sources b/src/gallium/winsys/amdgpu/drm/Makefile.sources index 6b33841..2363004 100644 --- a/src/gallium/winsys/amdgpu/drm/Makefile.sources +++ b/src/gallium/winsys/amdgpu/drm/Makefile.sources @@ -11,9 +11,7 @@ C_SOURCES := \ addrlib/core/addrobject.h \ addrlib/inc/chip/r800/si_gb_reg.h \ addrlib/inc/lnx_common_defs.h \ - addrlib/r800/chip/si_ci_merged_enum.h \ addrlib/r800/chip/si_ci_vi_merged_enum.h \ - addrlib/r800/chip/si_enum.h \ addrlib/r800/ciaddrlib.cpp \ addrlib/r800/ciaddrlib.h \ addrlib/r800/egbaddrlib.cpp \ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 0842259..12c6b62 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -77,8 +77,8 @@ struct amdgpu_cs { int buffer_indices_hashlist[512]; - unsigned used_vram; - unsigned used_gart; + uint64_t used_vram; + uint64_t used_gart; unsigned max_dependencies; }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 7a267f9..f04a696 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -97,22 +97,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, { int i; - csc->buf = MALLOC(ws->ib_max_size); - if (!csc->buf) - return FALSE; csc->fd = ws->fd; csc->nrelocs = 512; csc->relocs_bo = (struct radeon_bo**) CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); if (!csc->relocs_bo) { - FREE(csc->buf); return FALSE; } csc->relocs = (struct drm_radeon_cs_reloc*) CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); if (!csc->relocs) { - FREE(csc->buf); FREE(csc->relocs_bo); return FALSE; } @@ -165,7 +160,6 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc) radeon_cs_context_cleanup(csc); FREE(csc->relocs_bo); FREE(csc->relocs); - FREE(csc->buf); } @@ -206,7 +200,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; cs->base.ring_type = ring_type; - cs->base.max_dw = ws->ib_max_size / 4; + cs->base.max_dw = ARRAY_SIZE(cs->csc->buf); p_atomic_inc(&ws->num_cs); return &cs->base; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index ab15494..6ceb8e9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -30,7 +30,7 @@ #include "radeon_drm_bo.h" struct radeon_cs_context { - uint32_t *buf; + uint32_t buf[16 * 1024]; int fd; struct drm_radeon_cs cs; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index b70bbaa..f7784fb 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -395,20 +395,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) } ws->info.r600_virtual_address = FALSE; - ws->ib_max_size = 64 * 1024; - if (ws->info.drm_minor >= 13) { + uint32_t ib_vm_max_size; + ws->info.r600_virtual_address = TRUE; if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, &ws->va_start)) ws->info.r600_virtual_address = FALSE; - - if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, - &ws->ib_max_size)) - ws->ib_max_size *= 4; /* the kernel returns the size in dwords */ - else + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, + &ib_vm_max_size)) ws->info.r600_virtual_address = FALSE; - radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, &ws->va_unmap_working); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index c1a8d6a..308b5bd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -73,7 +73,6 @@ struct radeon_drm_winsys { enum radeon_generation gen; struct radeon_info info; - uint32_t ib_max_size; uint32_t va_start; uint32_t va_unmap_working; uint32_t accel_working2; diff --git a/src/gallium/winsys/sw/kms-dri/Makefile.am b/src/gallium/winsys/sw/kms-dri/Makefile.am index 7f26b1b..8162553 100644 --- a/src/gallium/winsys/sw/kms-dri/Makefile.am +++ b/src/gallium/winsys/sw/kms-dri/Makefile.am @@ -31,5 +31,3 @@ AM_CFLAGS = \ noinst_LTLIBRARIES = libswkmsdri.la libswkmsdri_la_SOURCES = $(C_SOURCES) - -EXTRA_DIST = SConscript diff --git a/src/gbm/main/backend.c b/src/gbm/main/backend.c index 4929d73..37ec9c1 100644 --- a/src/gbm/main/backend.c +++ b/src/gbm/main/backend.c @@ -65,7 +65,7 @@ static const struct backend_desc * find_backend(const char *name) { const struct backend_desc *backend = NULL; - int i; + unsigned i; for (i = 0; i < ARRAY_SIZE(backends); ++i) { if (strcmp(backends[i].name, name) == 0) { @@ -82,7 +82,7 @@ _gbm_create_device(int fd) { const struct gbm_backend *backend = NULL; struct gbm_device *dev = NULL; - int i; + unsigned i; const char *b; b = getenv("GBM_BACKEND"); diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index b491ad4..da38e35 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -25,6 +25,9 @@ NIR_FILES = \ nir/nir_array.h \ nir/nir_builder.h \ nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ nir/nir_intrinsics.c \ diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index 27e84d1..ae399f0 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -290,6 +290,21 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, "1.30 and later"); } } + + /* From page 27 of the GLSL ES 3.1 specification: + * + * "When aggregated into arrays within a shader, images can only be + * indexed with a constant integral expression." + * + * On the other hand the desktop GL specification extension allows + * non-constant indexing of image arrays, but behavior is left undefined + * in cases where the indexing expression is not dynamically uniform. + */ + if (state->es_shader && array->type->without_array()->is_image()) { + _mesa_glsl_error(&loc, state, + "image arrays indexed with non-constant " + "expressions are forbidden in GLSL ES."); + } } /* After performing all of the error checking, generate the IR for the diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index fa2c09d..981438d 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2099,10 +2099,10 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, static bool validate_binding_qualifier(struct _mesa_glsl_parse_state *state, YYLTYPE *loc, - ir_variable *var, + const glsl_type *type, const ast_type_qualifier *qual) { - if (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniforms and " "shader storage buffer objects"); @@ -2115,10 +2115,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, } const struct gl_context *const ctx = state->ctx; - unsigned elements = var->type->is_array() ? var->type->length : 1; + unsigned elements = type->is_array() ? type->length : 1; unsigned max_index = qual->binding + elements - 1; + const glsl_type *base_type = type->without_array(); - if (var->type->is_interface()) { + if (base_type->is_interface()) { /* UBOs. From page 60 of the GLSL 4.20 specification: * "If the binding point for any uniform block instance is less than zero, * or greater than or equal to the implementation-dependent maximum @@ -2129,7 +2130,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, * * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS. */ - if (var->data.mode == ir_var_uniform && + if (qual->flags.q.uniform && max_index >= ctx->Const.MaxUniformBufferBindings) { _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds " "the maximum number of UBO binding points (%d)", @@ -2137,6 +2138,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, ctx->Const.MaxUniformBufferBindings); return false; } + /* SSBOs. From page 67 of the GLSL 4.30 specification: * "If the binding point for any uniform or shader storage block instance * is less than zero, or greater than or equal to the @@ -2146,7 +2148,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, * N, all elements of the array from binding through binding + N – 1 must * be within this range." */ - if (var->data.mode == ir_var_shader_storage && + if (qual->flags.q.buffer && max_index >= ctx->Const.MaxShaderStorageBufferBindings) { _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds " "the maximum number of SSBO binding points (%d)", @@ -2154,8 +2156,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, ctx->Const.MaxShaderStorageBufferBindings); return false; } - } else if (var->type->is_sampler() || - (var->type->is_array() && var->type->fields.array->is_sampler())) { + } else if (base_type->is_sampler()) { /* Samplers. From page 63 of the GLSL 4.20 specification: * "If the binding is less than zero, or greater than or equal to the * implementation-dependent maximum supported number of units, a @@ -2172,7 +2173,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, return false; } - } else if (var->type->contains_atomic()) { + } else if (base_type->contains_atomic()) { assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) { _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " @@ -2182,10 +2183,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, return false; } + } else if (state->is_version(420, 310) && base_type->is_image()) { + assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); + if (max_index >= ctx->Const.MaxImageUnits) { + _mesa_glsl_error(loc, state, "Image binding %d exceeds the " + " maximum number of image units (%d)", max_index, + ctx->Const.MaxImageUnits); + return false; + } + } else { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniform " - "blocks, samplers, atomic counters, or arrays thereof"); + "blocks, opaque variables, or arrays thereof"); return false; } @@ -2446,14 +2456,38 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, var->data.image_format = qual->image_format; } else { - if (var->data.mode == ir_var_uniform && !qual->flags.q.write_only) { - _mesa_glsl_error(loc, state, "uniforms not qualified with " - "`writeonly' must have a format layout " - "qualifier"); + if (var->data.mode == ir_var_uniform) { + if (state->es_shader) { + _mesa_glsl_error(loc, state, "all image uniforms " + "must have a format layout qualifier"); + + } else if (!qual->flags.q.write_only) { + _mesa_glsl_error(loc, state, "image uniforms not qualified with " + "`writeonly' must have a format layout " + "qualifier"); + } } var->data.image_format = GL_NONE; } + + /* From page 70 of the GLSL ES 3.1 specification: + * + * "Except for image variables qualified with the format qualifiers + * r32f, r32i, and r32ui, image variables must specify either memory + * qualifier readonly or the memory qualifier writeonly." + */ + if (state->es_shader && + var->data.image_format != GL_R32F && + var->data.image_format != GL_R32I && + var->data.image_format != GL_R32UI && + !var->data.image_read_only && + !var->data.image_write_only) { + _mesa_glsl_error(loc, state, "image variables of format other than " + "r32f, r32i or r32ui must be qualified `readonly' or " + "`writeonly'"); + } + } else if (qual->flags.q.read_only || qual->flags.q.write_only || qual->flags.q.coherent || @@ -2759,7 +2793,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, } if (qual->flags.q.explicit_binding && - validate_binding_qualifier(state, loc, var, qual)) { + validate_binding_qualifier(state, loc, var->type, qual)) { var->data.explicit_binding = true; var->data.binding = qual->binding; } @@ -3293,7 +3327,7 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, &state->tcs_output_size, - "geometry shader input"); + "tessellation control shader output"); } /** @@ -3390,7 +3424,7 @@ validate_identifier(const char *identifier, YYLTYPE loc, static bool precision_qualifier_allowed(const glsl_type *type) { - /* Precision qualifiers apply to floating point, integer and sampler + /* Precision qualifiers apply to floating point, integer and opaque * types. * * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says: @@ -3420,7 +3454,7 @@ precision_qualifier_allowed(const glsl_type *type) return type->is_float() || type->is_integer() || type->is_record() - || type->is_sampler(); + || type->contains_opaque(); } ir_rvalue * @@ -4131,7 +4165,7 @@ ast_declarator_list::hir(exec_list *instructions, _mesa_glsl_error(&loc, state, "precision qualifiers apply only to floating point" - ", integer and sampler types"); + ", integer and opaque types"); } /* From section 4.1.7 of the GLSL 4.40 spec: @@ -5439,6 +5473,8 @@ is_valid_default_precision_type(const struct glsl_type *const type) /* "int" and "float" are valid, but vectors and matrices are not. */ return type->vector_elements == 1 && type->matrix_columns == 1; case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: return true; default: return false; @@ -5487,7 +5523,7 @@ ast_type_specifier::hir(exec_list *instructions, if (!is_valid_default_precision_type(type)) { _mesa_glsl_error(&loc, state, "default precision statements apply only to " - "float, int, and sampler types"); + "float, int, and opaque types"); return NULL; } @@ -6067,6 +6103,8 @@ ast_interface_block::hir(exec_list *instructions, num_variables, packing, this->block_name); + if (this->layout.flags.q.explicit_binding) + validate_binding_qualifier(state, &loc, block_type, &this->layout); if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { YYLTYPE loc = this->get_location(); @@ -6197,6 +6235,10 @@ ast_interface_block::hir(exec_list *instructions, "not allowed"); } + if (this->layout.flags.q.explicit_binding) + validate_binding_qualifier(state, &loc, block_array_type, + &this->layout); + var = new(state) ir_variable(block_array_type, this->instance_name, var_mode); diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 2175c66..1bc3de4 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -136,6 +136,13 @@ v140(const _mesa_glsl_parse_state *state) } static bool +v400_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool es31(const _mesa_glsl_parse_state *state) { return state->is_version(0, 310); @@ -270,6 +277,13 @@ texture_array(const _mesa_glsl_parse_state *state) static bool texture_multisample(const _mesa_glsl_parse_state *state) { + return state->is_version(150, 310) || + state->ARB_texture_multisample_enable; +} + +static bool +texture_multisample_array(const _mesa_glsl_parse_state *state) +{ return state->is_version(150, 0) || state->ARB_texture_multisample_enable; } @@ -394,11 +408,25 @@ shader_trinary_minmax(const _mesa_glsl_parse_state *state) static bool shader_image_load_store(const _mesa_glsl_parse_state *state) { + return (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_atomic(const _mesa_glsl_parse_state *state) +{ return (state->is_version(420, 0) || state->ARB_shader_image_load_store_enable); } static bool +shader_image_size(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 310) || + state->ARB_shader_image_size_enable; +} + +static bool gs_streams(const _mesa_glsl_parse_state *state) { return gpu_shader5(state) && gs_only(state); @@ -492,13 +520,19 @@ private: /** Create a new function and add the given signatures. */ void add_function(const char *name, ...); + typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + enum image_function_flags { IMAGE_FUNCTION_EMIT_STUB = (1 << 0), IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), IMAGE_FUNCTION_READ_ONLY = (1 << 4), - IMAGE_FUNCTION_WRITE_ONLY = (1 << 5) + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), + IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6) }; /** @@ -507,6 +541,7 @@ private: */ void add_image_function(const char *name, const char *intrinsic_name, + image_prototype_ctr prototype, unsigned num_arguments, unsigned flags); @@ -663,7 +698,7 @@ private: const glsl_type *stream_type); B0(barrier) - B2(textureQueryLod); + BA2(textureQueryLod); B1(textureQueryLevels); B1(dFdx); B1(dFdy); @@ -708,7 +743,12 @@ private: const char *intrinsic_name, unsigned num_arguments, unsigned flags); - ir_function_signature *_image(const glsl_type *image_type, + ir_function_signature *_image_size_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image(image_prototype_ctr prototype, + const glsl_type *image_type, const char *intrinsic_name, unsigned num_arguments, unsigned flags); @@ -1367,9 +1407,9 @@ builtin_builder::create_builtins() _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type), _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), - _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), NULL); add_function("texture", @@ -1632,9 +1672,9 @@ builtin_builder::create_builtins() _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), - _texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), NULL); add_function("texelFetchOffset", @@ -1944,40 +1984,77 @@ builtin_builder::create_builtins() add_function("barrier", _barrier(), NULL); add_function("textureQueryLOD", - _textureQueryLod(glsl_type::sampler1D_type, glsl_type::float_type), - _textureQueryLod(glsl_type::isampler1D_type, glsl_type::float_type), - _textureQueryLod(glsl_type::usampler1D_type, glsl_type::float_type), - - _textureQueryLod(glsl_type::sampler2D_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::isampler2D_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::usampler2D_type, glsl_type::vec2_type), - - _textureQueryLod(glsl_type::sampler3D_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isampler3D_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usampler3D_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::samplerCube_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isamplerCube_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::sampler1DArray_type, glsl_type::float_type), - _textureQueryLod(glsl_type::isampler1DArray_type, glsl_type::float_type), - _textureQueryLod(glsl_type::usampler1DArray_type, glsl_type::float_type), - - _textureQueryLod(glsl_type::sampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::isampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::usampler2DArray_type, glsl_type::vec2_type), - - _textureQueryLod(glsl_type::samplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), - - _textureQueryLod(glsl_type::sampler1DShadow_type, glsl_type::float_type), - _textureQueryLod(glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), - _textureQueryLod(glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), - _textureQueryLod(glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), - _textureQueryLod(glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLod", + _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), NULL); add_function("textureQueryLevels", @@ -2552,6 +2629,7 @@ builtin_builder::add_function(const char *name, ...) void builtin_builder::add_image_function(const char *name, const char *intrinsic_name, + image_prototype_ctr prototype, unsigned num_arguments, unsigned flags) { @@ -2590,12 +2668,13 @@ builtin_builder::add_image_function(const char *name, glsl_type::uimage2DMS_type, glsl_type::uimage2DMSArray_type }; + ir_function *f = new(mem_ctx) ir_function(name); for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { if (types[i]->sampler_type != GLSL_TYPE_FLOAT || (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) - f->add_signature(_image(types[i], intrinsic_name, + f->add_signature(_image(prototype, types[i], intrinsic_name, num_arguments, flags)); } @@ -2608,43 +2687,60 @@ builtin_builder::add_image_functions(bool glsl) const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", - "__intrinsic_image_load", 0, - (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + "__intrinsic_image_load", + &builtin_builder::_image_prototype, 0, + (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | IMAGE_FUNCTION_READ_ONLY)); add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", - "__intrinsic_image_store", 1, + "__intrinsic_image_store", + &builtin_builder::_image_prototype, 1, (flags | IMAGE_FUNCTION_RETURNS_VOID | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | IMAGE_FUNCTION_WRITE_ONLY)); + const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC; + add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", - "__intrinsic_image_atomic_add", 1, flags); + "__intrinsic_image_atomic_add", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", - "__intrinsic_image_atomic_min", 1, flags); + "__intrinsic_image_atomic_min", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", - "__intrinsic_image_atomic_max", 1, flags); + "__intrinsic_image_atomic_max", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", - "__intrinsic_image_atomic_and", 1, flags); + "__intrinsic_image_atomic_and", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", - "__intrinsic_image_atomic_or", 1, flags); + "__intrinsic_image_atomic_or", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", - "__intrinsic_image_atomic_xor", 1, flags); + "__intrinsic_image_atomic_xor", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function((glsl ? "imageAtomicExchange" : "__intrinsic_image_atomic_exchange"), - "__intrinsic_image_atomic_exchange", 1, flags); + "__intrinsic_image_atomic_exchange", + &builtin_builder::_image_prototype, 1, atom_flags); add_image_function((glsl ? "imageAtomicCompSwap" : "__intrinsic_image_atomic_comp_swap"), - "__intrinsic_image_atomic_comp_swap", 2, flags); + "__intrinsic_image_atomic_comp_swap", + &builtin_builder::_image_prototype, 2, atom_flags); + + add_image_function(glsl ? "imageSize" : "__intrinsic_image_size", + "__intrinsic_image_size", + &builtin_builder::_image_size_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE); } ir_variable * @@ -4314,13 +4410,14 @@ builtin_builder::_barrier() } ir_function_signature * -builtin_builder::_textureQueryLod(const glsl_type *sampler_type, +builtin_builder::_textureQueryLod(builtin_available_predicate avail, + const glsl_type *sampler_type, const glsl_type *coord_type) { ir_variable *s = in_var(sampler_type, "sampler"); ir_variable *coord = in_var(coord_type, "coord"); /* The sampler and coordinate always exist; add optional parameters later. */ - MAKE_SIG(glsl_type::vec2_type, texture_query_lod, 2, s, coord); + MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord); ir_texture *tex = new(mem_ctx) ir_texture(ir_lod); tex->coordinate = var_ref(coord); @@ -4787,8 +4884,10 @@ builtin_builder::_image_prototype(const glsl_type *image_type, ir_variable *coord = in_var( glsl_type::ivec(image_type->coordinate_components()), "coord"); - ir_function_signature *sig = new_sig( - ret_type, shader_image_load_store, 2, image, coord); + const builtin_available_predicate avail = + (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : + shader_image_load_store); + ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); /* Sample index for multisample images. */ if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) @@ -4818,13 +4917,55 @@ builtin_builder::_image_prototype(const glsl_type *image_type, } ir_function_signature * -builtin_builder::_image(const glsl_type *image_type, +builtin_builder::_image_size_prototype(const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + const glsl_type *ret_type; + unsigned num_components = image_type->coordinate_components(); + + /* From the ARB_shader_image_size extension: + * "Cube images return the dimensions of one face." + */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + !image_type->sampler_array) { + num_components = 2; + } + + /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is + * supported by mesa. + */ + ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1); + + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image); + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image(image_prototype_ctr prototype, + const glsl_type *image_type, const char *intrinsic_name, unsigned num_arguments, unsigned flags) { - ir_function_signature *sig = _image_prototype(image_type, intrinsic_name, - num_arguments, flags); + ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name, + num_arguments, flags); if (flags & IMAGE_FUNCTION_EMIT_STUB) { ir_factory body(&sig->body, mem_ctx); diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp index ffbc5e6..9cf198f 100644 --- a/src/glsl/builtin_types.cpp +++ b/src/glsl/builtin_types.cpp @@ -182,7 +182,7 @@ const static struct builtin_type_versions { T(samplerCubeArray, 400, 999) T(sampler2DRect, 140, 999) T(samplerBuffer, 140, 999) - T(sampler2DMS, 150, 999) + T(sampler2DMS, 150, 310) T(sampler2DMSArray, 150, 999) T(isampler1D, 130, 999) @@ -194,7 +194,7 @@ const static struct builtin_type_versions { T(isamplerCubeArray, 400, 999) T(isampler2DRect, 140, 999) T(isamplerBuffer, 140, 999) - T(isampler2DMS, 150, 999) + T(isampler2DMS, 150, 310) T(isampler2DMSArray, 150, 999) T(usampler1D, 130, 999) @@ -206,7 +206,7 @@ const static struct builtin_type_versions { T(usamplerCubeArray, 400, 999) T(usampler2DRect, 140, 999) T(usamplerBuffer, 140, 999) - T(usampler2DMS, 150, 999) + T(usampler2DMS, 150, 310) T(usampler2DMSArray, 150, 999) T(sampler1DShadow, 110, 999) @@ -220,40 +220,40 @@ const static struct builtin_type_versions { T(struct_gl_DepthRangeParameters, 110, 100) T(image1D, 420, 999) - T(image2D, 420, 999) - T(image3D, 420, 999) + T(image2D, 420, 310) + T(image3D, 420, 310) T(image2DRect, 420, 999) - T(imageCube, 420, 999) + T(imageCube, 420, 310) T(imageBuffer, 420, 999) T(image1DArray, 420, 999) - T(image2DArray, 420, 999) + T(image2DArray, 420, 310) T(imageCubeArray, 420, 999) T(image2DMS, 420, 999) T(image2DMSArray, 420, 999) T(iimage1D, 420, 999) - T(iimage2D, 420, 999) - T(iimage3D, 420, 999) + T(iimage2D, 420, 310) + T(iimage3D, 420, 310) T(iimage2DRect, 420, 999) - T(iimageCube, 420, 999) + T(iimageCube, 420, 310) T(iimageBuffer, 420, 999) T(iimage1DArray, 420, 999) - T(iimage2DArray, 420, 999) + T(iimage2DArray, 420, 310) T(iimageCubeArray, 420, 999) T(iimage2DMS, 420, 999) T(iimage2DMSArray, 420, 999) T(uimage1D, 420, 999) - T(uimage2D, 420, 999) - T(uimage3D, 420, 999) + T(uimage2D, 420, 310) + T(uimage3D, 420, 310) T(uimage2DRect, 420, 999) - T(uimageCube, 420, 999) + T(uimageCube, 420, 310) T(uimageBuffer, 420, 999) T(uimage1DArray, 420, 999) - T(uimage2DArray, 420, 999) + T(uimage2DArray, 420, 310) T(uimageCubeArray, 420, 999) T(uimage2DMS, 420, 999) T(uimage2DMSArray, 420, 999) - T(atomic_uint, 420, 999) + T(atomic_uint, 420, 310) }; static const glsl_type *const deprecated_types[] = { diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index 53d3500..dd7804f 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -744,25 +744,31 @@ builtin_variable_generator::generate_constants() */ } - if (state->is_version(420, 0) || + if (state->is_version(420, 310) || state->ARB_shader_image_load_store_enable) { add_const("gl_MaxImageUnits", state->Const.MaxImageUnits); - add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", - state->Const.MaxCombinedImageUnitsAndFragmentOutputs); - add_const("gl_MaxImageSamples", - state->Const.MaxImageSamples); add_const("gl_MaxVertexImageUniforms", state->Const.MaxVertexImageUniforms); - add_const("gl_MaxTessControlImageUniforms", 0); - add_const("gl_MaxTessEvaluationImageUniforms", 0); - add_const("gl_MaxGeometryImageUniforms", - state->Const.MaxGeometryImageUniforms); add_const("gl_MaxFragmentImageUniforms", state->Const.MaxFragmentImageUniforms); add_const("gl_MaxCombinedImageUniforms", state->Const.MaxCombinedImageUniforms); + if (!state->es_shader) { + add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", + state->Const.MaxCombinedShaderOutputResources); + add_const("gl_MaxImageSamples", + state->Const.MaxImageSamples); + add_const("gl_MaxGeometryImageUniforms", + state->Const.MaxGeometryImageUniforms); + } + + if (state->is_version(450, 310)) { + add_const("gl_MaxCombinedShaderOutputResources", + state->Const.MaxCombinedShaderOutputResources); + } + if (state->is_version(400, 0) || state->ARB_tessellation_shader_enable) { add_const("gl_MaxTessControlImageUniforms", diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index dd5ec2a..18e50af 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2478,6 +2478,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_shader_image_load_store) add_builtin_define(parser, "GL_ARB_shader_image_load_store", 1); + if (extensions->ARB_shader_image_size) + add_builtin_define(parser, "GL_ARB_shader_image_size", 1); + if (extensions->ARB_derivative_control) add_builtin_define(parser, "GL_ARB_derivative_control", 1); diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll index efa0bb6..24998c1 100644 --- a/src/glsl/glsl_lexer.ll +++ b/src/glsl/glsl_lexer.ll @@ -343,9 +343,10 @@ usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY); /* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */ /* these are reserved but not defined in GLSL 3.00 */ -sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); -isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); -usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); + /* [iu]sampler2DMS are defined in GLSL ES 3.10 */ +sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); +isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); +usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMSARRAY); isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMSARRAY); usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMSARRAY); @@ -368,35 +369,35 @@ precise KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_gpu_shader5_enable, PRECI /* keywords available with ARB_shader_image_load_store */ image1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D); -image2D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2D); -image3D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE3D); +image2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D); +image3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D); image2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT); -imageCube KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE); +imageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE); imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER); image1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY); -image2DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY); +image2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY); imageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY); image2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS); image2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY); iimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D); -iimage2D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D); -iimage3D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D); +iimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D); +iimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D); iimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT); -iimageCube KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE); +iimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE); iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER); iimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY); -iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY); +iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY); iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY); iimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS); iimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY); uimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D); -uimage2D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D); -uimage3D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D); +uimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D); +uimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D); uimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT); -uimageCube KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE); +uimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE); uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER); uimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY); -uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY); +uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY); uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY); uimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS); uimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY); @@ -405,11 +406,11 @@ image2DShadow KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW); image1DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW); image2DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW); -coherent KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, COHERENT); -volatile KEYWORD_WITH_ALT(110, 100, 420, 0, yyextra->ARB_shader_image_load_store_enable, VOLATILE); -restrict KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, RESTRICT); -readonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, READONLY); -writeonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, WRITEONLY); +coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, COHERENT); +volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable, VOLATILE); +restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, RESTRICT); +readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, READONLY); +writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, WRITEONLY); atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 97648c1..e1b3908 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1258,56 +1258,65 @@ layout_qualifier_id: /* Layout qualifiers for ARB_shader_image_load_store. */ if (state->ARB_shader_image_load_store_enable || - state->is_version(420, 0)) { + state->is_version(420, 310)) { if (!$$.flags.i) { static const struct { const char *name; GLenum format; glsl_base_type base_type; + /** Minimum desktop GLSL version required for the image + * format. Use 130 if already present in the original + * ARB extension. + */ + unsigned required_glsl; + /** Minimum GLSL ES version required for the image format. */ + unsigned required_essl; } map[] = { - { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT }, - { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT }, - { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT }, - { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT }, - { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT }, - { "r32f", GL_R32F, GLSL_TYPE_FLOAT }, - { "r16f", GL_R16F, GLSL_TYPE_FLOAT }, - { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT }, - { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT }, - { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT }, - { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT }, - { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT }, - { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT }, - { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT }, - { "r32ui", GL_R32UI, GLSL_TYPE_UINT }, - { "r16ui", GL_R16UI, GLSL_TYPE_UINT }, - { "r8ui", GL_R8UI, GLSL_TYPE_UINT }, - { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT }, - { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT }, - { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT }, - { "rg32i", GL_RG32I, GLSL_TYPE_INT }, - { "rg16i", GL_RG16I, GLSL_TYPE_INT }, - { "rg8i", GL_RG8I, GLSL_TYPE_INT }, - { "r32i", GL_R32I, GLSL_TYPE_INT }, - { "r16i", GL_R16I, GLSL_TYPE_INT }, - { "r8i", GL_R8I, GLSL_TYPE_INT }, - { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT }, - { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT }, - { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT }, - { "rg16", GL_RG16, GLSL_TYPE_FLOAT }, - { "rg8", GL_RG8, GLSL_TYPE_FLOAT }, - { "r16", GL_R16, GLSL_TYPE_FLOAT }, - { "r8", GL_R8, GLSL_TYPE_FLOAT }, - { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT }, - { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT }, - { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT }, - { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT }, - { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT }, - { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT } + { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310 }, + { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0 }, + { "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310 }, + { "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0 }, + { "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310 }, + { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310 }, + { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310 }, + { "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0 }, + { "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0 }, + { "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0 }, + { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310 }, + { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0 }, + { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0 }, + { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0 } }; for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { - if (match_layout_qualifier($1, map[i].name, state) == 0) { + if (state->is_version(map[i].required_glsl, + map[i].required_essl) && + match_layout_qualifier($1, map[i].name, state) == 0) { $$.flags.q.explicit_image_format = 1; $$.image_format = map[i].format; $$.image_base_type = map[i].base_type; @@ -1521,11 +1530,10 @@ layout_qualifier_id: "invalid %s of %d specified", local_size_qualifiers[i], $3); YYERROR; - } else if (!state->is_version(430, 0) && - !state->ARB_compute_shader_enable) { + } else if (!state->has_compute_shader()) { _mesa_glsl_error(& @3, state, "%s qualifier requires GLSL 4.30 or " - "ARB_compute_shader", + "GLSL ES 3.10 or ARB_compute_shader", local_size_qualifiers[i]); YYERROR; } else { diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index ae2f356..ca772e8 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -143,7 +143,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; - this->Const.MaxCombinedImageUnitsAndFragmentOutputs = ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs; + this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms; this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms; @@ -601,6 +601,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), + EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), EXT(ARB_shader_precision, true, false, ARB_shader_precision), EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), EXT(ARB_shader_storage_buffer_object, true, false, ARB_shader_storage_buffer_object), diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index eb325f0..e2145be 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -236,6 +236,11 @@ struct _mesa_glsl_parse_state { return ARB_shading_language_420pack_enable || is_version(420, 0); } + bool has_compute_shader() const + { + return ARB_compute_shader_enable || is_version(430, 310); + } + void process_version_directive(YYLTYPE *locp, int version, const char *ident); @@ -390,7 +395,7 @@ struct _mesa_glsl_parse_state { /* ARB_shader_image_load_store */ unsigned MaxImageUnits; - unsigned MaxCombinedImageUnitsAndFragmentOutputs; + unsigned MaxCombinedShaderOutputResources; unsigned MaxImageSamples; unsigned MaxVertexImageUniforms; unsigned MaxTessControlImageUniforms; @@ -495,6 +500,8 @@ struct _mesa_glsl_parse_state { bool ARB_shader_bit_encoding_warn; bool ARB_shader_image_load_store_enable; bool ARB_shader_image_load_store_warn; + bool ARB_shader_image_size_enable; + bool ARB_shader_image_size_warn; bool ARB_shader_precision_enable; bool ARB_shader_precision_warn; bool ARB_shader_stencil_export_enable; diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index c482fbf..f238513 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -101,8 +101,13 @@ copy_constant_to_storage(union gl_constant_value *storage, } } +/** + * Initialize an opaque uniform from the value of an explicit binding + * qualifier specified in the shader. Atomic counters are different because + * they have no storage and should be handled elsewhere. + */ void -set_sampler_binding(gl_shader_program *prog, const char *name, int binding) +set_opaque_binding(gl_shader_program *prog, const char *name, int binding) { struct gl_uniform_storage *const storage = get_storage(prog->UniformStorage, prog->NumUniformStorage, name); @@ -128,11 +133,20 @@ set_sampler_binding(gl_shader_program *prog, const char *name, int binding) for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { gl_shader *shader = prog->_LinkedShaders[sh]; - if (shader && storage->sampler[sh].active) { - for (unsigned i = 0; i < elements; i++) { - unsigned index = storage->sampler[sh].index + i; + if (shader) { + if (storage->type->base_type == GLSL_TYPE_SAMPLER && + storage->sampler[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->sampler[sh].index + i; + shader->SamplerUnits[index] = storage->storage[i].i; + } - shader->SamplerUnits[index] = storage->storage[i].i; + } else if (storage->type->base_type == GLSL_TYPE_IMAGE && + storage->image[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->image[sh].index + i; + shader->ImageUnits[index] = storage->storage[i].i; + } } } } @@ -268,8 +282,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog, if (var->data.explicit_binding) { const glsl_type *const type = var->type; - if (type->without_array()->is_sampler()) { - linker::set_sampler_binding(prog, var->name, var->data.binding); + if (type->without_array()->is_sampler() || + type->without_array()->is_image()) { + linker::set_opaque_binding(prog, var->name, var->data.binding); } else if (var->is_in_buffer_block()) { const glsl_type *const iface_type = var->get_interface_type(); diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index a7cd820..47f7d25 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2900,7 +2900,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) linker_error(prog, "Too many combined image uniforms\n"); if (total_image_units + fragment_outputs > - ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs) + ctx->Const.MaxCombinedShaderOutputResources) linker_error(prog, "Too many combined image uniforms and fragment outputs\n"); } diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp index b29912a..c1aed61 100644 --- a/src/glsl/lower_subroutine.cpp +++ b/src/glsl/lower_subroutine.cpp @@ -98,7 +98,7 @@ lower_subroutine_visitor::visit_leave(ir_call *ir) else last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch); - if (s > 0) + if (return_deref && s > 0) return_deref = return_deref->clone(mem_ctx, NULL); } if (last_branch) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 27dabd3..af97da9 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -26,6 +26,7 @@ */ #include "glsl_to_nir.h" +#include "nir_control_flow.h" #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" #include "ir.h" @@ -43,7 +44,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader, struct gl_shader *sh, gl_shader_stage stage); + nir_visitor(nir_shader *shader, gl_shader *sh); ~nir_visitor(); virtual void visit(ir_variable *); @@ -86,7 +87,6 @@ private: struct gl_shader *sh; nir_shader *shader; - gl_shader_stage stage; nir_function_impl *impl; exec_list *cf_node_list; nir_instr *result; /* result of the expression tree last visited */ @@ -133,9 +133,9 @@ private: nir_shader * glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) { - nir_shader *shader = nir_shader_create(NULL, options); + nir_shader *shader = nir_shader_create(NULL, sh->Stage, options); - nir_visitor v1(shader, sh, sh->Stage); + nir_visitor v1(shader, sh); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); @@ -143,13 +143,11 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) return shader; } -nir_visitor::nir_visitor(nir_shader *shader, struct gl_shader *sh, - gl_shader_stage stage) +nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh) { this->supports_ints = shader->options->native_integers; this->shader = shader; this->sh = sh; - this->stage = stage; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -266,7 +264,7 @@ nir_visitor::visit(ir_variable *ir) break; case ir_var_shader_in: - if (stage == MESA_SHADER_FRAGMENT && + if (shader->stage == MESA_SHADER_FRAGMENT && ir->data.location == VARYING_SLOT_FACE) { /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ var->data.location = SYSTEM_VALUE_FRONT_FACE; @@ -646,6 +644,8 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_image_atomic_comp_swap; } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { op = nir_intrinsic_memory_barrier; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) { + op = nir_intrinsic_image_size; } else { unreachable("not reached"); } @@ -671,7 +671,8 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_image_atomic_or: case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: { + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: { nir_ssa_undef_instr *instr_undef = nir_ssa_undef_instr_create(shader, 1); nir_instr_insert_after_cf_list(this->cf_node_list, @@ -686,6 +687,17 @@ nir_visitor::visit(ir_call *ir) instr->variables[0] = evaluate_deref(&instr->instr, image); param = param->get_next(); + /* Set the intrinsic destination. */ + if (ir->return_deref) { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[instr->intrinsic]; + nir_ssa_dest_init(&instr->instr, &instr->dest, + info->dest_components, NULL); + } + + if (op == nir_intrinsic_image_size) + break; + /* Set the address argument, extending the coordinate vector to four * components. */ @@ -726,11 +738,6 @@ nir_visitor::visit(ir_call *ir) instr->src[3] = evaluate_rvalue((ir_dereference *)param); param = param->get_next(); } - - /* Set the intrinsic destination. */ - if (ir->return_deref) - nir_ssa_dest_init(&instr->instr, &instr->dest, - ir->return_deref->type->vector_elements, NULL); break; } case nir_intrinsic_memory_barrier: diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 2f7cbae..77cc4f0 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -26,10 +26,13 @@ */ #include "nir.h" +#include "nir_control_flow_private.h" #include <assert.h> nir_shader * -nir_shader_create(void *mem_ctx, const nir_shader_compiler_options *options) +nir_shader_create(void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options) { nir_shader *shader = ralloc(mem_ctx, nir_shader); @@ -49,6 +52,8 @@ nir_shader_create(void *mem_ctx, const nir_shader_compiler_options *options) shader->num_outputs = 0; shader->num_uniforms = 0; + shader->stage = stage; + return shader; } @@ -180,11 +185,6 @@ nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx) dest->saturate = src->saturate; } -static inline void -block_add_pred(nir_block *block, nir_block *pred) -{ - _mesa_set_add(block->predecessors, pred); -} static void cf_init(nir_cf_node *node, nir_cf_node_type type) @@ -194,45 +194,6 @@ cf_init(nir_cf_node *node, nir_cf_node_type type) node->type = type; } -static void -link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) -{ - pred->successors[0] = succ1; - block_add_pred(succ1, pred); - - pred->successors[1] = succ2; - if (succ2 != NULL) - block_add_pred(succ2, pred); -} - -static void -unlink_blocks(nir_block *pred, nir_block *succ) -{ - if (pred->successors[0] == succ) { - pred->successors[0] = pred->successors[1]; - pred->successors[1] = NULL; - } else { - assert(pred->successors[1] == succ); - pred->successors[1] = NULL; - } - - struct set_entry *entry = _mesa_set_search(succ->predecessors, pred); - - assert(entry); - - _mesa_set_remove(succ->predecessors, entry); -} - -static void -unlink_block_successors(nir_block *block) -{ - if (block->successors[0] != NULL) - unlink_blocks(block, block->successors[0]); - if (block->successors[1] != NULL) - unlink_blocks(block, block->successors[1]); -} - - nir_function_impl * nir_function_impl_create(nir_function_overload *overload) { @@ -262,14 +223,12 @@ nir_function_impl_create(nir_function_overload *overload) nir_block *end_block = nir_block_create(mem_ctx); start_block->cf_node.parent = &impl->cf_node; end_block->cf_node.parent = &impl->cf_node; - impl->start_block = start_block; impl->end_block = end_block; exec_list_push_tail(&impl->body, &start_block->cf_node.node); start_block->successors[0] = end_block; - block_add_pred(end_block, start_block); - + _mesa_set_add(end_block->predecessors, start_block); return impl; } @@ -335,7 +294,7 @@ nir_loop_create(void *mem_ctx) body->cf_node.parent = &loop->cf_node; body->successors[0] = body; - block_add_pred(body, body); + _mesa_set_add(body->predecessors, body); return loop; } @@ -646,250 +605,6 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) return load; } -/** - * \name Control flow modification - * - * These functions modify the control flow tree while keeping the control flow - * graph up-to-date. The invariants respected are: - * 1. Each then statement, else statement, or loop body must have at least one - * control flow node. - * 2. Each if-statement and loop must have one basic block before it and one - * after. - * 3. Two basic blocks cannot be directly next to each other. - * 4. If a basic block has a jump instruction, there must be only one and it - * must be at the end of the block. - * 5. The CFG must always be connected - this means that we must insert a fake - * CFG edge for loops with no break statement. - * - * The purpose of the second one is so that we have places to insert code during - * GCM, as well as eliminating the possibility of critical edges. - */ -/*@{*/ - -static void -link_non_block_to_block(nir_cf_node *node, nir_block *block) -{ - if (node->type == nir_cf_node_if) { - /* - * We're trying to link an if to a block after it; this just means linking - * the last block of the then and else branches. - */ - - nir_if *if_stmt = nir_cf_node_as_if(node); - - nir_cf_node *last_then = nir_if_last_then_node(if_stmt); - assert(last_then->type == nir_cf_node_block); - nir_block *last_then_block = nir_cf_node_as_block(last_then); - - nir_cf_node *last_else = nir_if_last_else_node(if_stmt); - assert(last_else->type == nir_cf_node_block); - nir_block *last_else_block = nir_cf_node_as_block(last_else); - - if (exec_list_is_empty(&last_then_block->instr_list) || - nir_block_last_instr(last_then_block)->type != nir_instr_type_jump) { - unlink_block_successors(last_then_block); - link_blocks(last_then_block, block, NULL); - } - - if (exec_list_is_empty(&last_else_block->instr_list) || - nir_block_last_instr(last_else_block)->type != nir_instr_type_jump) { - unlink_block_successors(last_else_block); - link_blocks(last_else_block, block, NULL); - } - } else { - assert(node->type == nir_cf_node_loop); - - /* - * We can only get to this codepath if we're inserting a new loop, or - * at least a loop with no break statements; we can't insert break - * statements into a loop when we haven't inserted it into the CFG - * because we wouldn't know which block comes after the loop - * and therefore, which block should be the successor of the block with - * the break). Therefore, we need to insert a fake edge (see invariant - * #5). - */ - - nir_loop *loop = nir_cf_node_as_loop(node); - - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - - last_block->successors[1] = block; - block_add_pred(block, last_block); - } -} - -static void -link_block_to_non_block(nir_block *block, nir_cf_node *node) -{ - if (node->type == nir_cf_node_if) { - /* - * We're trying to link a block to an if after it; this just means linking - * the block to the first block of the then and else branches. - */ - - nir_if *if_stmt = nir_cf_node_as_if(node); - - nir_cf_node *first_then = nir_if_first_then_node(if_stmt); - assert(first_then->type == nir_cf_node_block); - nir_block *first_then_block = nir_cf_node_as_block(first_then); - - nir_cf_node *first_else = nir_if_first_else_node(if_stmt); - assert(first_else->type == nir_cf_node_block); - nir_block *first_else_block = nir_cf_node_as_block(first_else); - - unlink_block_successors(block); - link_blocks(block, first_then_block, first_else_block); - } else { - /* - * For similar reasons as the corresponding case in - * link_non_block_to_block(), don't worry about if the loop header has - * any predecessors that need to be unlinked. - */ - - assert(node->type == nir_cf_node_loop); - - nir_loop *loop = nir_cf_node_as_loop(node); - - nir_cf_node *loop_header = nir_loop_first_cf_node(loop); - assert(loop_header->type == nir_cf_node_block); - nir_block *loop_header_block = nir_cf_node_as_block(loop_header); - - unlink_block_successors(block); - link_blocks(block, loop_header_block, NULL); - } - -} - -/** - * Takes a basic block and inserts a new empty basic block before it, making its - * predecessors point to the new block. This essentially splits the block into - * an empty header and a body so that another non-block CF node can be inserted - * between the two. Note that this does *not* link the two basic blocks, so - * some kind of cleanup *must* be performed after this call. - */ - -static nir_block * -split_block_beginning(nir_block *block) -{ - nir_block *new_block = nir_block_create(ralloc_parent(block)); - new_block->cf_node.parent = block->cf_node.parent; - exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); - - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - - unlink_blocks(pred, block); - link_blocks(pred, new_block, NULL); - } - - return new_block; -} - -static void -rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_foreach_phi_src(phi, src) { - if (src->pred == old_pred) { - src->pred = new_pred; - break; - } - } - } -} - -/** - * Moves the successors of source to the successors of dest, leaving both - * successors of source NULL. - */ - -static void -move_successors(nir_block *source, nir_block *dest) -{ - nir_block *succ1 = source->successors[0]; - nir_block *succ2 = source->successors[1]; - - if (succ1) { - unlink_blocks(source, succ1); - rewrite_phi_preds(succ1, source, dest); - } - - if (succ2) { - unlink_blocks(source, succ2); - rewrite_phi_preds(succ2, source, dest); - } - - unlink_block_successors(dest); - link_blocks(dest, succ1, succ2); -} - -static nir_block * -split_block_end(nir_block *block) -{ - nir_block *new_block = nir_block_create(ralloc_parent(block)); - new_block->cf_node.parent = block->cf_node.parent; - exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); - - move_successors(block, new_block); - - return new_block; -} - -/** - * Inserts a non-basic block between two basic blocks and links them together. - */ - -static void -insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) -{ - node->parent = before->cf_node.parent; - exec_node_insert_after(&before->cf_node.node, &node->node); - link_block_to_non_block(before, node); - link_non_block_to_block(node, after); -} - -/** - * Inserts a non-basic block before a basic block. - */ - -static void -insert_non_block_before_block(nir_cf_node *node, nir_block *block) -{ - /* split off the beginning of block into new_block */ - nir_block *new_block = split_block_beginning(block); - - /* insert our node in between new_block and block */ - insert_non_block(new_block, node, block); -} - -static void -insert_non_block_after_block(nir_block *block, nir_cf_node *node) -{ - /* split off the end of block into new_block */ - nir_block *new_block = split_block_end(block); - - /* insert our node in between block and new_block */ - insert_non_block(block, node, new_block); -} - -/* walk up the control flow tree to find the innermost enclosed loop */ -static nir_loop * -nearest_loop(nir_cf_node *node) -{ - while (node->type != nir_cf_node_loop) { - node = node->parent; - } - - return nir_cf_node_as_loop(node); -} - nir_function_impl * nir_cf_node_get_function(nir_cf_node *node) { @@ -900,384 +615,6 @@ nir_cf_node_get_function(nir_cf_node *node) return nir_cf_node_as_function(node); } -/* - * update the CFG after a jump instruction has been added to the end of a block - */ - -static void -handle_jump(nir_block *block) -{ - nir_instr *instr = nir_block_last_instr(block); - nir_jump_instr *jump_instr = nir_instr_as_jump(instr); - - unlink_block_successors(block); - - nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); - nir_metadata_preserve(impl, nir_metadata_none); - - if (jump_instr->type == nir_jump_break || - jump_instr->type == nir_jump_continue) { - nir_loop *loop = nearest_loop(&block->cf_node); - - if (jump_instr->type == nir_jump_continue) { - nir_cf_node *first_node = nir_loop_first_cf_node(loop); - assert(first_node->type == nir_cf_node_block); - nir_block *first_block = nir_cf_node_as_block(first_node); - link_blocks(block, first_block, NULL); - } else { - nir_cf_node *after = nir_cf_node_next(&loop->cf_node); - assert(after->type == nir_cf_node_block); - nir_block *after_block = nir_cf_node_as_block(after); - link_blocks(block, after_block, NULL); - - /* If we inserted a fake link, remove it */ - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - if (last_block->successors[1] != NULL) - unlink_blocks(last_block, after_block); - } - } else { - assert(jump_instr->type == nir_jump_return); - link_blocks(block, impl->end_block, NULL); - } -} - -static void -handle_remove_jump(nir_block *block, nir_jump_type type) -{ - unlink_block_successors(block); - - if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { - nir_cf_node *parent = block->cf_node.parent; - if (parent->type == nir_cf_node_if) { - nir_cf_node *next = nir_cf_node_next(parent); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - link_blocks(block, next_block, NULL); - } else { - assert(parent->type == nir_cf_node_loop); - nir_loop *loop = nir_cf_node_as_loop(parent); - - nir_cf_node *head = nir_loop_first_cf_node(loop); - assert(head->type == nir_cf_node_block); - nir_block *head_block = nir_cf_node_as_block(head); - - link_blocks(block, head_block, NULL); - } - } else { - nir_cf_node *next = nir_cf_node_next(&block->cf_node); - if (next->type == nir_cf_node_if) { - nir_if *next_if = nir_cf_node_as_if(next); - - nir_cf_node *first_then = nir_if_first_then_node(next_if); - assert(first_then->type == nir_cf_node_block); - nir_block *first_then_block = nir_cf_node_as_block(first_then); - - nir_cf_node *first_else = nir_if_first_else_node(next_if); - assert(first_else->type == nir_cf_node_block); - nir_block *first_else_block = nir_cf_node_as_block(first_else); - - link_blocks(block, first_then_block, first_else_block); - } else { - assert(next->type == nir_cf_node_loop); - nir_loop *next_loop = nir_cf_node_as_loop(next); - - nir_cf_node *first = nir_loop_first_cf_node(next_loop); - assert(first->type == nir_cf_node_block); - nir_block *first_block = nir_cf_node_as_block(first); - - link_blocks(block, first_block, NULL); - } - } - - if (type == nir_jump_break) { - nir_loop *loop = nearest_loop(&block->cf_node); - - nir_cf_node *next = nir_cf_node_next(&loop->cf_node); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - if (next_block->predecessors->entries == 0) { - /* insert fake link */ - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - - last_block->successors[1] = next_block; - block_add_pred(next_block, last_block); - } - } - - nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); - nir_metadata_preserve(impl, nir_metadata_none); -} - -/** - * Inserts a basic block before another by merging the instructions. - * - * @param block the target of the insertion - * @param before the block to be inserted - must not have been inserted before - * @param has_jump whether \before has a jump instruction at the end - */ - -static void -insert_block_before_block(nir_block *block, nir_block *before, bool has_jump) -{ - assert(!has_jump || exec_list_is_empty(&block->instr_list)); - - foreach_list_typed(nir_instr, instr, node, &before->instr_list) { - instr->block = block; - } - - exec_list_prepend(&block->instr_list, &before->instr_list); - - if (has_jump) - handle_jump(block); -} - -/** - * Inserts a basic block after another by merging the instructions. - * - * @param block the target of the insertion - * @param after the block to be inserted - must not have been inserted before - * @param has_jump whether \after has a jump instruction at the end - */ - -static void -insert_block_after_block(nir_block *block, nir_block *after, bool has_jump) -{ - foreach_list_typed(nir_instr, instr, node, &after->instr_list) { - instr->block = block; - } - - exec_list_append(&block->instr_list, &after->instr_list); - - if (has_jump) - handle_jump(block); -} - -static void -update_if_uses(nir_cf_node *node) -{ - if (node->type != nir_cf_node_if) - return; - - nir_if *if_stmt = nir_cf_node_as_if(node); - - if_stmt->condition.parent_if = if_stmt; - if (if_stmt->condition.is_ssa) { - list_addtail(&if_stmt->condition.use_link, - &if_stmt->condition.ssa->if_uses); - } else { - list_addtail(&if_stmt->condition.use_link, - &if_stmt->condition.reg.reg->if_uses); - } -} - -void -nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after) -{ - update_if_uses(after); - - if (after->type == nir_cf_node_block) { - /* - * either node or the one after it must be a basic block, by invariant #2; - * in either case, just merge the blocks together. - */ - nir_block *after_block = nir_cf_node_as_block(after); - - bool has_jump = !exec_list_is_empty(&after_block->instr_list) && - nir_block_last_instr(after_block)->type == nir_instr_type_jump; - - if (node->type == nir_cf_node_block) { - insert_block_after_block(nir_cf_node_as_block(node), after_block, - has_jump); - } else { - nir_cf_node *next = nir_cf_node_next(node); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - insert_block_before_block(next_block, after_block, has_jump); - } - } else { - if (node->type == nir_cf_node_block) { - insert_non_block_after_block(nir_cf_node_as_block(node), after); - } else { - /* - * We have to insert a non-basic block after a non-basic block. Since - * every non-basic block has a basic block after it, this is equivalent - * to inserting a non-basic block before a basic block. - */ - - nir_cf_node *next = nir_cf_node_next(node); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - insert_non_block_before_block(after, next_block); - } - } - - nir_function_impl *impl = nir_cf_node_get_function(node); - nir_metadata_preserve(impl, nir_metadata_none); -} - -void -nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before) -{ - update_if_uses(before); - - if (before->type == nir_cf_node_block) { - nir_block *before_block = nir_cf_node_as_block(before); - - bool has_jump = !exec_list_is_empty(&before_block->instr_list) && - nir_block_last_instr(before_block)->type == nir_instr_type_jump; - - if (node->type == nir_cf_node_block) { - insert_block_before_block(nir_cf_node_as_block(node), before_block, - has_jump); - } else { - nir_cf_node *prev = nir_cf_node_prev(node); - assert(prev->type == nir_cf_node_block); - nir_block *prev_block = nir_cf_node_as_block(prev); - - insert_block_after_block(prev_block, before_block, has_jump); - } - } else { - if (node->type == nir_cf_node_block) { - insert_non_block_before_block(before, nir_cf_node_as_block(node)); - } else { - /* - * We have to insert a non-basic block before a non-basic block. This - * is equivalent to inserting a non-basic block after a basic block. - */ - - nir_cf_node *prev_node = nir_cf_node_prev(node); - assert(prev_node->type == nir_cf_node_block); - nir_block *prev_block = nir_cf_node_as_block(prev_node); - - insert_non_block_after_block(prev_block, before); - } - } - - nir_function_impl *impl = nir_cf_node_get_function(node); - nir_metadata_preserve(impl, nir_metadata_none); -} - -void -nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node) -{ - nir_cf_node *begin = exec_node_data(nir_cf_node, list->head, node); - nir_cf_node_insert_before(begin, node); -} - -void -nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node) -{ - nir_cf_node *end = exec_node_data(nir_cf_node, list->tail_pred, node); - nir_cf_node_insert_after(end, node); -} - -/** - * Stitch two basic blocks together into one. The aggregate must have the same - * predecessors as the first and the same successors as the second. - */ - -static void -stitch_blocks(nir_block *before, nir_block *after) -{ - /* - * We move after into before, so we have to deal with up to 2 successors vs. - * possibly a large number of predecessors. - * - * TODO: special case when before is empty and after isn't? - */ - - move_successors(after, before); - - foreach_list_typed(nir_instr, instr, node, &after->instr_list) { - instr->block = before; - } - - exec_list_append(&before->instr_list, &after->instr_list); - exec_node_remove(&after->cf_node.node); -} - -static void -remove_defs_uses(nir_instr *instr); - -static void -cleanup_cf_node(nir_cf_node *node) -{ - switch (node->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(node); - /* We need to walk the instructions and clean up defs/uses */ - nir_foreach_instr(block, instr) - remove_defs_uses(instr); - break; - } - - case nir_cf_node_if: { - nir_if *if_stmt = nir_cf_node_as_if(node); - foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) - cleanup_cf_node(child); - foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) - cleanup_cf_node(child); - - list_del(&if_stmt->condition.use_link); - break; - } - - case nir_cf_node_loop: { - nir_loop *loop = nir_cf_node_as_loop(node); - foreach_list_typed(nir_cf_node, child, node, &loop->body) - cleanup_cf_node(child); - break; - } - case nir_cf_node_function: { - nir_function_impl *impl = nir_cf_node_as_function(node); - foreach_list_typed(nir_cf_node, child, node, &impl->body) - cleanup_cf_node(child); - break; - } - default: - unreachable("Invalid CF node type"); - } -} - -void -nir_cf_node_remove(nir_cf_node *node) -{ - nir_function_impl *impl = nir_cf_node_get_function(node); - nir_metadata_preserve(impl, nir_metadata_none); - - if (node->type == nir_cf_node_block) { - /* - * Basic blocks can't really be removed by themselves, since they act as - * padding between the non-basic blocks. So all we do here is empty the - * block of instructions. - * - * TODO: could we assert here? - */ - exec_list_make_empty(&nir_cf_node_as_block(node)->instr_list); - } else { - nir_cf_node *before = nir_cf_node_prev(node); - assert(before->type == nir_cf_node_block); - nir_block *before_block = nir_cf_node_as_block(before); - - nir_cf_node *after = nir_cf_node_next(node); - assert(after->type == nir_cf_node_block); - nir_block *after_block = nir_cf_node_as_block(after); - - exec_node_remove(&node->node); - stitch_blocks(before_block, after_block); - } - - cleanup_cf_node(node); -} - static bool add_use_cb(nir_src *src, void *state) { @@ -1348,7 +685,7 @@ nir_instr_insert_after(nir_instr *instr, nir_instr *after) exec_node_insert_after(&instr->node, &after->node); if (after->type == nir_instr_type_jump) - handle_jump(after->block); + nir_handle_add_jump(after->block); } void @@ -1362,7 +699,7 @@ nir_instr_insert_before_block(nir_block *block, nir_instr *before) exec_list_push_head(&block->instr_list, &before->node); if (before->type == nir_instr_type_jump) - handle_jump(block); + nir_handle_add_jump(block); } void @@ -1378,7 +715,7 @@ nir_instr_insert_after_block(nir_block *block, nir_instr *after) exec_list_push_tail(&block->instr_list, &after->node); if (after->type == nir_instr_type_jump) - handle_jump(block); + nir_handle_add_jump(block); } void @@ -1456,7 +793,7 @@ void nir_instr_remove(nir_instr *instr) if (instr->type == nir_instr_type_jump) { nir_jump_instr *jump_instr = nir_instr_as_jump(instr); - handle_remove_jump(instr->block, jump_instr->type); + nir_handle_remove_jump(instr->block, jump_instr->type); } } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 70af06e..f78596d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1122,6 +1122,8 @@ typedef struct { #define nir_foreach_phi_src(phi, entry) \ foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs) +#define nir_foreach_phi_src_safe(phi, entry) \ + foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs) typedef struct { nir_instr instr; @@ -1330,7 +1332,7 @@ typedef struct { struct exec_list body; /** < list of nir_cf_node */ - nir_block *start_block, *end_block; + nir_block *end_block; /** list for all local variables in the function */ struct exec_list locals; @@ -1357,6 +1359,12 @@ typedef struct { nir_metadata valid_metadata; } nir_function_impl; +static inline nir_block * +nir_start_block(nir_function_impl *impl) +{ + return (nir_block *) exec_list_get_head(&impl->body); +} + static inline nir_cf_node * nir_cf_node_next(nir_cf_node *node) { @@ -1488,8 +1496,8 @@ typedef struct nir_shader { */ unsigned num_inputs, num_uniforms, num_outputs; - /** the number of uniforms that are only accessed directly */ - unsigned num_direct_uniforms; + /** The shader stage, such as MESA_SHADER_VERTEX. */ + gl_shader_stage stage; } nir_shader; #define nir_foreach_overload(shader, overload) \ @@ -1498,6 +1506,7 @@ typedef struct nir_shader { &(func)->overload_list) nir_shader *nir_shader_create(void *mem_ctx, + gl_shader_stage stage, const nir_shader_compiler_options *options); /** creates a register, including assigning it an index and adding it to the list */ @@ -1521,21 +1530,6 @@ nir_loop *nir_loop_create(void *mem_ctx); nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); -/** puts a control flow node immediately after another control flow node */ -void nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after); - -/** puts a control flow node immediately before another control flow node */ -void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before); - -/** puts a control flow node at the beginning of a list from an if, loop, or function */ -void nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node); - -/** puts a control flow node at the end of a list from an if, loop, or function */ -void nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node); - -/** removes a control flow node, doing any cleanup necessary */ -void nir_cf_node_remove(nir_cf_node *node); - /** requests that the given pieces of metadata be generated */ void nir_metadata_require(nir_function_impl *impl, nir_metadata required); /** dirties all but the preserved metadata */ @@ -1660,15 +1654,10 @@ void nir_lower_locals_to_regs(nir_shader *shader); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, - bool is_scalar); -void nir_assign_var_locations_direct_first(nir_shader *shader, - struct exec_list *var_list, - unsigned *direct_size, - unsigned *size, - bool is_scalar); - -void nir_lower_io(nir_shader *shader, bool is_scalar); + int (*type_size)(const struct glsl_type *)); +void nir_lower_io(nir_shader *shader, + int (*type_size)(const struct glsl_type *)); void nir_lower_vars_to_ssa(nir_shader *shader); void nir_remove_dead_variables(nir_shader *shader); @@ -1680,8 +1669,7 @@ void nir_lower_load_const_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, - const struct gl_shader_program *shader_program, - gl_shader_stage stage); + const struct gl_shader_program *shader_program); void nir_lower_samplers_for_vk(nir_shader *shader); void nir_lower_system_values(nir_shader *shader); diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c new file mode 100644 index 0000000..5c03375 --- /dev/null +++ b/src/glsl/nir/nir_control_flow.c @@ -0,0 +1,769 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_control_flow_private.h" + +/** + * \name Control flow modification + * + * These functions modify the control flow tree while keeping the control flow + * graph up-to-date. The invariants respected are: + * 1. Each then statement, else statement, or loop body must have at least one + * control flow node. + * 2. Each if-statement and loop must have one basic block before it and one + * after. + * 3. Two basic blocks cannot be directly next to each other. + * 4. If a basic block has a jump instruction, there must be only one and it + * must be at the end of the block. + * 5. The CFG must always be connected - this means that we must insert a fake + * CFG edge for loops with no break statement. + * + * The purpose of the second one is so that we have places to insert code during + * GCM, as well as eliminating the possibility of critical edges. + */ +/*@{*/ + +static bool +block_ends_in_jump(nir_block *block) +{ + return !exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump; +} + +static inline void +block_add_pred(nir_block *block, nir_block *pred) +{ + _mesa_set_add(block->predecessors, pred); +} + +static void +link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) +{ + pred->successors[0] = succ1; + if (succ1 != NULL) + block_add_pred(succ1, pred); + + pred->successors[1] = succ2; + if (succ2 != NULL) + block_add_pred(succ2, pred); +} + +static void +unlink_blocks(nir_block *pred, nir_block *succ) +{ + if (pred->successors[0] == succ) { + pred->successors[0] = pred->successors[1]; + pred->successors[1] = NULL; + } else { + assert(pred->successors[1] == succ); + pred->successors[1] = NULL; + } + + struct set_entry *entry = _mesa_set_search(succ->predecessors, pred); + + assert(entry); + + _mesa_set_remove(succ->predecessors, entry); +} + +static void +unlink_block_successors(nir_block *block) +{ + if (block->successors[0] != NULL) + unlink_blocks(block, block->successors[0]); + if (block->successors[1] != NULL) + unlink_blocks(block, block->successors[1]); +} + +static void +link_non_block_to_block(nir_cf_node *node, nir_block *block) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link an if to a block after it; this just means linking + * the last block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *last_then = nir_if_last_then_node(if_stmt); + assert(last_then->type == nir_cf_node_block); + nir_block *last_then_block = nir_cf_node_as_block(last_then); + + nir_cf_node *last_else = nir_if_last_else_node(if_stmt); + assert(last_else->type == nir_cf_node_block); + nir_block *last_else_block = nir_cf_node_as_block(last_else); + + if (!block_ends_in_jump(last_then_block)) { + unlink_block_successors(last_then_block); + link_blocks(last_then_block, block, NULL); + } + + if (!block_ends_in_jump(last_else_block)) { + unlink_block_successors(last_else_block); + link_blocks(last_else_block, block, NULL); + } + } else { + assert(node->type == nir_cf_node_loop); + + /* + * We can only get to this codepath if we're inserting a new loop, or + * at least a loop with no break statements; we can't insert break + * statements into a loop when we haven't inserted it into the CFG + * because we wouldn't know which block comes after the loop + * and therefore, which block should be the successor of the block with + * the break). Therefore, we need to insert a fake edge (see invariant + * #5). + */ + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = block; + block_add_pred(block, last_block); + } +} + +static void +link_block_to_non_block(nir_block *block, nir_cf_node *node) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link a block to an if after it; this just means linking + * the block to the first block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *first_then = nir_if_first_then_node(if_stmt); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(if_stmt); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + unlink_block_successors(block); + link_blocks(block, first_then_block, first_else_block); + } else { + /* + * For similar reasons as the corresponding case in + * link_non_block_to_block(), don't worry about if the loop header has + * any predecessors that need to be unlinked. + */ + + assert(node->type == nir_cf_node_loop); + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *loop_header = nir_loop_first_cf_node(loop); + assert(loop_header->type == nir_cf_node_block); + nir_block *loop_header_block = nir_cf_node_as_block(loop_header); + + unlink_block_successors(block); + link_blocks(block, loop_header_block, NULL); + } + +} + +/** + * Takes a basic block and inserts a new empty basic block before it, making its + * predecessors point to the new block. This essentially splits the block into + * an empty header and a body so that another non-block CF node can be inserted + * between the two. Note that this does *not* link the two basic blocks, so + * some kind of cleanup *must* be performed after this call. + */ + +static nir_block * +split_block_beginning(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + unlink_blocks(pred, block); + link_blocks(pred, new_block, NULL); + } + + /* Any phi nodes must stay part of the new block, or else their + * sourcse will be messed up. This will reverse the order of the phi's, but + * order shouldn't matter. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + exec_node_remove(&instr->node); + instr->block = new_block; + exec_list_push_head(&new_block->instr_list, &instr->node); + } + + return new_block; +} + +static void +rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi, src) { + if (src->pred == old_pred) { + src->pred = new_pred; + break; + } + } + } +} + +static void +insert_phi_undef(nir_block *block, nir_block *pred) +{ + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(ralloc_parent(phi), + phi->dest.ssa.num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = pred; + src->src.parent_instr = &phi->instr; + src->src.is_ssa = true; + src->src.ssa = &undef->def; + + list_addtail(&src->src.use_link, &undef->def.uses); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + +/** + * Moves the successors of source to the successors of dest, leaving both + * successors of source NULL. + */ + +static void +move_successors(nir_block *source, nir_block *dest) +{ + nir_block *succ1 = source->successors[0]; + nir_block *succ2 = source->successors[1]; + + if (succ1) { + unlink_blocks(source, succ1); + rewrite_phi_preds(succ1, source, dest); + } + + if (succ2) { + unlink_blocks(source, succ2); + rewrite_phi_preds(succ2, source, dest); + } + + unlink_block_successors(dest); + link_blocks(dest, succ1, succ2); +} + +/* Given a basic block with no successors that has been inserted into the + * control flow tree, gives it the successors it would normally have assuming + * it doesn't end in a jump instruction. Also inserts phi sources with undefs + * if necessary. + */ +static void +block_add_normal_succs(nir_block *block) +{ + if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { + nir_cf_node *parent = block->cf_node.parent; + if (parent->type == nir_cf_node_if) { + nir_cf_node *next = nir_cf_node_next(parent); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + link_blocks(block, next_block, NULL); + } else { + assert(parent->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(parent); + + nir_cf_node *head = nir_loop_first_cf_node(loop); + assert(head->type == nir_cf_node_block); + nir_block *head_block = nir_cf_node_as_block(head); + + link_blocks(block, head_block, NULL); + insert_phi_undef(head_block, block); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next->type == nir_cf_node_if) { + nir_if *next_if = nir_cf_node_as_if(next); + + nir_cf_node *first_then = nir_if_first_then_node(next_if); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(next_if); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + link_blocks(block, first_then_block, first_else_block); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *next_loop = nir_cf_node_as_loop(next); + + nir_cf_node *first = nir_loop_first_cf_node(next_loop); + assert(first->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first); + + link_blocks(block, first_block, NULL); + insert_phi_undef(first_block, block); + } + } +} + +static nir_block * +split_block_end(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); + + if (block_ends_in_jump(block)) { + /* Figure out what successor block would've had if it didn't have a jump + * instruction, and make new_block have that successor. + */ + block_add_normal_succs(new_block); + } else { + move_successors(block, new_block); + } + + return new_block; +} + +static nir_block * +split_block_before_instr(nir_instr *instr) +{ + assert(instr->type != nir_instr_type_phi); + nir_block *new_block = split_block_beginning(instr->block); + + nir_foreach_instr_safe(instr->block, cur_instr) { + if (cur_instr == instr) + break; + + exec_node_remove(&cur_instr->node); + cur_instr->block = new_block; + exec_list_push_tail(&new_block->instr_list, &cur_instr->node); + } + + return new_block; +} + +/* Splits a basic block at the point specified by the cursor. The "before" and + * "after" arguments are filled out with the blocks resulting from the split + * if non-NULL. Note that the "beginning" of the block is actually interpreted + * as before the first non-phi instruction, and it's illegal to split a block + * before a phi instruction. + */ + +static void +split_block_cursor(nir_cursor cursor, + nir_block **_before, nir_block **_after) +{ + nir_block *before, *after; + switch (cursor.option) { + case nir_cursor_before_block: + after = cursor.block; + before = split_block_beginning(cursor.block); + break; + + case nir_cursor_after_block: + before = cursor.block; + after = split_block_end(cursor.block); + break; + + case nir_cursor_before_instr: + after = cursor.instr->block; + before = split_block_before_instr(cursor.instr); + break; + + case nir_cursor_after_instr: + /* We lower this to split_block_before_instr() so that we can keep the + * after-a-jump-instr case contained to split_block_end(). + */ + if (nir_instr_is_last(cursor.instr)) { + before = cursor.instr->block; + after = split_block_end(cursor.instr->block); + } else { + after = cursor.instr->block; + before = split_block_before_instr(nir_instr_next(cursor.instr)); + } + break; + } + + if (_before) + *_before = before; + if (_after) + *_after = after; +} + +/** + * Inserts a non-basic block between two basic blocks and links them together. + */ + +static void +insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) +{ + node->parent = before->cf_node.parent; + exec_node_insert_after(&before->cf_node.node, &node->node); + link_block_to_non_block(before, node); + link_non_block_to_block(node, after); +} + +/* walk up the control flow tree to find the innermost enclosed loop */ +static nir_loop * +nearest_loop(nir_cf_node *node) +{ + while (node->type != nir_cf_node_loop) { + node = node->parent; + } + + return nir_cf_node_as_loop(node); +} + +/* + * update the CFG after a jump instruction has been added to the end of a block + */ + +void +nir_handle_add_jump(nir_block *block) +{ + nir_instr *instr = nir_block_last_instr(block); + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + + unlink_block_successors(block); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); + + if (jump_instr->type == nir_jump_break || + jump_instr->type == nir_jump_continue) { + nir_loop *loop = nearest_loop(&block->cf_node); + + if (jump_instr->type == nir_jump_continue) { + nir_cf_node *first_node = nir_loop_first_cf_node(loop); + assert(first_node->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first_node); + link_blocks(block, first_block, NULL); + } else { + nir_cf_node *after = nir_cf_node_next(&loop->cf_node); + assert(after->type == nir_cf_node_block); + nir_block *after_block = nir_cf_node_as_block(after); + link_blocks(block, after_block, NULL); + + /* If we inserted a fake link, remove it */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + if (last_block->successors[1] != NULL) + unlink_blocks(last_block, after_block); + } + } else { + assert(jump_instr->type == nir_jump_return); + link_blocks(block, impl->end_block, NULL); + } +} + +static void +remove_phi_src(nir_block *block, nir_block *pred) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(phi, src) { + if (src->pred == pred) { + list_del(&src->src.use_link); + exec_node_remove(&src->node); + } + } + } +} + +/* Removes the successor of a block with a jump, and inserts a fake edge for + * infinite loops. Note that the jump to be eliminated may be free-floating. + */ + +static +void unlink_jump(nir_block *block, nir_jump_type type) +{ + if (block->successors[0]) + remove_phi_src(block->successors[0], block); + if (block->successors[1]) + remove_phi_src(block->successors[1], block); + + if (type == nir_jump_break) { + nir_block *next = block->successors[0]; + + if (next->predecessors->entries == 1) { + nir_loop *loop = + nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node)); + + /* insert fake link */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = next; + block_add_pred(next, last_block); + } + } + + unlink_block_successors(block); +} + +void +nir_handle_remove_jump(nir_block *block, nir_jump_type type) +{ + unlink_jump(block, type); + + block_add_normal_succs(block); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +update_if_uses(nir_cf_node *node) +{ + if (node->type != nir_cf_node_if) + return; + + nir_if *if_stmt = nir_cf_node_as_if(node); + + if_stmt->condition.parent_if = if_stmt; + if (if_stmt->condition.is_ssa) { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.ssa->if_uses); + } else { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.reg.reg->if_uses); + } +} + +/** + * Stitch two basic blocks together into one. The aggregate must have the same + * predecessors as the first and the same successors as the second. + */ + +static void +stitch_blocks(nir_block *before, nir_block *after) +{ + /* + * We move after into before, so we have to deal with up to 2 successors vs. + * possibly a large number of predecessors. + * + * TODO: special case when before is empty and after isn't? + */ + + if (block_ends_in_jump(before)) { + assert(exec_list_is_empty(&after->instr_list)); + if (after->successors[0]) + remove_phi_src(after->successors[0], after); + if (after->successors[1]) + remove_phi_src(after->successors[1], after); + unlink_block_successors(after); + exec_node_remove(&after->cf_node.node); + } else { + move_successors(after, before); + + foreach_list_typed(nir_instr, instr, node, &after->instr_list) { + instr->block = before; + } + + exec_list_append(&before->instr_list, &after->instr_list); + exec_node_remove(&after->cf_node.node); + } +} + +void +nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node) +{ + nir_block *before, *after; + + split_block_cursor(cursor, &before, &after); + + if (node->type == nir_cf_node_block) { + nir_block *block = nir_cf_node_as_block(node); + exec_node_insert_after(&before->cf_node.node, &block->cf_node.node); + block->cf_node.parent = before->cf_node.parent; + /* stitch_blocks() assumes that any block that ends with a jump has + * already been setup with the correct successors, so we need to set + * up jumps here as the block is being inserted. + */ + if (block_ends_in_jump(block)) + nir_handle_add_jump(block); + + stitch_blocks(block, after); + stitch_blocks(before, block); + } else { + update_if_uses(node); + insert_non_block(before, node, after); + } +} + +static bool +replace_ssa_def_uses(nir_ssa_def *def, void *void_impl) +{ + nir_function_impl *impl = void_impl; + void *mem_ctx = ralloc_parent(impl); + + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(mem_ctx, def->num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def), mem_ctx); + return true; +} + +static void +cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl) +{ + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + /* We need to walk the instructions and clean up defs/uses */ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_jump) { + nir_jump_type jump_type = nir_instr_as_jump(instr)->type; + unlink_jump(block, jump_type); + } else { + nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl); + nir_instr_remove(instr); + } + } + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) + cleanup_cf_node(child, impl); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) + cleanup_cf_node(child, impl); + + list_del(&if_stmt->condition.use_link); + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + foreach_list_typed(nir_cf_node, child, node, &loop->body) + cleanup_cf_node(child, impl); + break; + } + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + foreach_list_typed(nir_cf_node, child, node, &impl->body) + cleanup_cf_node(child, impl); + break; + } + default: + unreachable("Invalid CF node type"); + } +} + +void +nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) +{ + nir_block *block_begin, *block_end, *block_before, *block_after; + + /* In the case where begin points to an instruction in some basic block and + * end points to the end of the same basic block, we rely on the fact that + * splitting on an instruction moves earlier instructions into a new basic + * block. If the later instructions were moved instead, then the end cursor + * would be pointing to the same place that begin used to point to, which + * is obviously not what we want. + */ + split_block_cursor(begin, &block_before, &block_begin); + split_block_cursor(end, &block_end, &block_after); + + extracted->impl = nir_cf_node_get_function(&block_begin->cf_node); + exec_list_make_empty(&extracted->list); + + nir_cf_node *cf_node = &block_begin->cf_node; + nir_cf_node *cf_node_end = &block_end->cf_node; + while (true) { + nir_cf_node *next = nir_cf_node_next(cf_node); + + exec_node_remove(&cf_node->node); + cf_node->parent = NULL; + exec_list_push_tail(&extracted->list, &cf_node->node); + + if (cf_node == cf_node_end) + break; + + cf_node = next; + } + + stitch_blocks(block_before, block_after); +} + +void +nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) +{ + nir_block *before, *after; + + split_block_cursor(cursor, &before, &after); + + foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { + exec_node_remove(&node->node); + node->parent = before->cf_node.parent; + exec_node_insert_node_before(&after->cf_node.node, &node->node); + } + + stitch_blocks(before, + nir_cf_node_as_block(nir_cf_node_next(&before->cf_node))); + stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)), + after); +} + +void +nir_cf_delete(nir_cf_list *cf_list) +{ + foreach_list_typed(nir_cf_node, node, node, &cf_list->list) { + cleanup_cf_node(node, cf_list->impl); + } +} diff --git a/src/glsl/nir/nir_control_flow.h b/src/glsl/nir/nir_control_flow.h new file mode 100644 index 0000000..5efd41c --- /dev/null +++ b/src/glsl/nir/nir_control_flow.h @@ -0,0 +1,251 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/** NIR Control Flow Modification + * + * This file contains various API's that make modifying control flow in NIR, + * while maintaining the invariants checked by the validator, much easier. + * There are two parts to this: + * + * 1. Inserting control flow (if's and loops) in various places, for creating + * IR either from scratch or as part of some lowering pass. + * 2. Taking existing pieces of the IR and either moving them around or + * deleting them. + */ + +/* Helper struct for representing a point to extract/insert. Helps reduce the + * combinatorial explosion of possible points to extract. + */ + +typedef enum { + nir_cursor_before_block, + nir_cursor_after_block, + nir_cursor_before_instr, + nir_cursor_after_instr, +} nir_cursor_option; + +typedef struct { + nir_cursor_option option; + union { + nir_block *block; + nir_instr *instr; + }; +} nir_cursor; + +static inline nir_cursor +nir_before_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_after_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_before_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_after_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_before_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_before_block(nir_cf_node_as_block(node)); + + return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); +} + +static inline nir_cursor +nir_after_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); +} + +static inline nir_cursor +nir_before_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *first_node = exec_node_data(nir_cf_node, + exec_list_get_head(cf_list), node); + return nir_before_cf_node(first_node); +} + +static inline nir_cursor +nir_after_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *last_node = exec_node_data(nir_cf_node, + exec_list_get_tail(cf_list), node); + return nir_after_cf_node(last_node); +} + +/** Control flow insertion. */ + +/** puts a control flow node where the cursor is */ +void nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node); + +/** puts a control flow node immediately after another control flow node */ +static inline void +nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after) +{ + nir_cf_node_insert(nir_after_cf_node(node), after); +} + +/** puts a control flow node immediately before another control flow node */ +static inline void +nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before) +{ + nir_cf_node_insert(nir_before_cf_node(node), before); +} + +/** puts a control flow node at the beginning of a list from an if, loop, or function */ +static inline void +nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node_insert(nir_before_cf_list(list), node); +} + +/** puts a control flow node at the end of a list from an if, loop, or function */ +static inline void +nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node_insert(nir_after_cf_list(list), node); +} + + +/** Control flow motion. + * + * These functions let you take a part of a control flow list (basically + * equivalent to a series of statement in GLSL) and "extract" it from the IR, + * so that it's a free-floating piece of IR that can be either re-inserted + * somewhere else or deleted entirely. A few notes on using it: + * + * 1. Phi nodes are considered attached to the piece of control flow that + * their sources come from. There are three places where phi nodes can + * occur, which are the three places where a block can have multiple + * predecessors: + * + * 1) After an if statement, if neither branch ends in a jump. + * 2) After a loop, if there are multiple break's. + * 3) At the beginning of a loop. + * + * For #1, the phi node is considered to be part of the if, and for #2 and + * #3 the phi node is considered to be part of the loop. This allows us to + * keep phi's intact, but it means that phi nodes cannot be separated from + * the control flow they come from. For example, extracting an if without + * extracting all the phi nodes after it is not allowed, and neither is + * extracting only some of the phi nodes at the beginning of a block. It + * also means that extracting from the beginning of a basic block actually + * means extracting from the first non-phi instruction, since there's no + * situation where extracting phi nodes without extracting what comes + * before them makes any sense. + * + * 2. Phi node sources are guaranteed to remain valid, meaning that they still + * correspond one-to-one with the predecessors of the basic block they're + * part of. In addition, the original sources will be preserved unless they + * correspond to a break or continue that was deleted. However, no attempt + * is made to ensure that SSA form is maintained. In particular, it is + * *not* guaranteed that definitions of SSA values will dominate all their + * uses after all is said and done. Either the caller must ensure that this + * is the case, or it must insert extra phi nodes to restore SSA. + * + * 3. It is invalid to move a piece of IR with a break/continue outside of the + * loop it references. Doing this will result in invalid + * successors/predecessors and phi node sources. + * + * 4. It is invalid to move a piece of IR from one function implementation to + * another. + * + * 5. Extracting a control flow list will leave lots of dangling references to + * and from other pieces of the IR. It also leaves things in a not 100% + * consistent state. This means that some things (e.g. inserting + * instructions) might not work reliably on the extracted control flow. It + * also means that extracting control flow without re-inserting it or + * deleting it is a Bad Thing (tm). + */ + +typedef struct { + struct exec_list list; + nir_function_impl *impl; /* for cleaning up if the list is deleted */ +} nir_cf_list; + +void nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end); + +void nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor); + +void nir_cf_delete(nir_cf_list *cf_list); + +static inline void +nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list) +{ + nir_cf_extract(extracted, nir_before_cf_list(cf_list), + nir_after_cf_list(cf_list)); +} + +/** removes a control flow node, doing any cleanup necessary */ +static inline void +nir_cf_node_remove(nir_cf_node *node) +{ + nir_cf_list list; + nir_cf_extract(&list, nir_before_cf_node(node), nir_after_cf_node(node)); + nir_cf_delete(&list); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/glsl/nir/nir_control_flow_private.h b/src/glsl/nir/nir_control_flow_private.h new file mode 100644 index 0000000..f32b57a --- /dev/null +++ b/src/glsl/nir/nir_control_flow_private.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_control_flow.h" + +#pragma once + +/* Internal control-flow modification functions used when inserting/removing + * instructions. + */ + +void nir_handle_add_jump(nir_block *block); +void nir_handle_remove_jump(nir_block *block, nir_jump_type type); diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c index 2f50db1..af4caae 100644 --- a/src/glsl/nir/nir_dominance.c +++ b/src/glsl/nir/nir_dominance.c @@ -42,7 +42,7 @@ static bool init_block_cb(nir_block *block, void *_state) { dom_state *state = (dom_state *) _state; - if (block == state->impl->start_block) + if (block == nir_start_block(state->impl)) block->imm_dom = block; else block->imm_dom = NULL; @@ -78,7 +78,7 @@ static bool calc_dominance_cb(nir_block *block, void *_state) { dom_state *state = (dom_state *) _state; - if (block == state->impl->start_block) + if (block == nir_start_block(state->impl)) return true; nir_block *new_idom = NULL; @@ -209,12 +209,13 @@ nir_calc_dominance_impl(nir_function_impl *impl) nir_foreach_block(impl, calc_dom_frontier_cb, &state); - impl->start_block->imm_dom = NULL; + nir_block *start_block = nir_start_block(impl); + start_block->imm_dom = NULL; calc_dom_children(impl); unsigned dfs_index = 0; - calc_dfs_indicies(impl->start_block, &dfs_index); + calc_dfs_indicies(start_block, &dfs_index); } void diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 6486130..1f24f9f 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -123,6 +123,8 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) #define SYSTEM_VALUE(name, components) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ @@ -139,12 +141,18 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The last index is the base address to load from. Indirect loads have an - * additional register input, which is added to the constant address to - * compute the final address to load from. For UBO's (and SSBO's), the first - * source is the (possibly constant) UBO buffer index and the indirect (if it - * exists) is the second source, and the first index is the descriptor set - * index. + * The format of the indices depends on the type of the load. For uniforms, + * the first index is the base address and the second index is an offset that + * should be added to the base address. (This way you can determine in the + * back-end which variable is being accessed even in an array.) For inputs, + * the one and only index corresponds to the attribute slot. UBO loads + * have two indices the first of which is the descriptor set and the second + * is the base address to load from. + * + * UBO loads have a (possibly constant) source which is the UBO buffer index. + * For each type of load, the _indirect variant has one additional source + * (the second in the case of UBO's) that is the is an indirect to be added to + * the constant address or base offset to compute the final offset. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -152,14 +160,14 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ -#define LOAD(name, extra_srcs, extra_indices, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1 + extra_indices, flags) \ +#define LOAD(name, extra_srcs, indices, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 1 + extra_indices, flags) + true, 0, 0, indices, flags) -LOAD(uniform, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* LOAD(ssbo, 1, 0) */ /* diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 6a4494d..c9697e7 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -32,103 +32,17 @@ */ #include "nir.h" +#include "nir_builder.h" struct lower_io_state { + nir_builder builder; void *mem_ctx; - bool is_scalar; + int (*type_size)(const struct glsl_type *type); }; -static int -type_size_vec4(const struct glsl_type *type) -{ - unsigned int i; - int size; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) { - return glsl_get_matrix_columns(type); - } else { - return 1; - } - case GLSL_TYPE_ARRAY: - return type_size_vec4(glsl_get_array_element(type)) * glsl_get_length(type); - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < glsl_get_length(type); i++) { - size += type_size_vec4(glsl_get_struct_field(type, i)); - } - return size; - case GLSL_TYPE_SUBROUTINE: - return 1; - case GLSL_TYPE_SAMPLER: - return 0; - case GLSL_TYPE_ATOMIC_UINT: - return 0; - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - unreachable("not reached"); - } - - return 0; -} - -static unsigned -type_size_scalar(const struct glsl_type *type) -{ - unsigned int size, i; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - return glsl_get_components(type); - case GLSL_TYPE_ARRAY: - return type_size_scalar(glsl_get_array_element(type)) * glsl_get_length(type); - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < glsl_get_length(type); i++) { - size += type_size_scalar(glsl_get_struct_field(type, i)); - } - return size; - case GLSL_TYPE_SUBROUTINE: - return 1; - case GLSL_TYPE_SAMPLER: - return 0; - case GLSL_TYPE_ATOMIC_UINT: - return 0; - case GLSL_TYPE_INTERFACE: - return 0; - case GLSL_TYPE_IMAGE: - return 0; - case GLSL_TYPE_FUNCTION: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_DOUBLE: - unreachable("not reached"); - } - - return 0; -} - -static unsigned -type_size(const struct glsl_type *type, bool is_scalar) -{ - if (is_scalar) - return type_size_scalar(type); - else - return type_size_vec4(type); -} - void -nir_assign_var_locations(struct exec_list *var_list, unsigned *size, bool is_scalar) +nir_assign_var_locations(struct exec_list *var_list, unsigned *size, + int (*type_size)(const struct glsl_type *)) { unsigned location = 0; @@ -142,7 +56,7 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, bool is_sca continue; var->data.driver_location = location; - location += type_size(var->type, is_scalar); + location += type_size(var->type); } *size = location; @@ -162,80 +76,6 @@ deref_has_indirect(nir_deref_var *deref) return false; } -static bool -mark_indirect_uses_block(nir_block *block, void *void_state) -{ - struct set *indirect_set = void_state; - - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - for (unsigned i = 0; - i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) { - if (deref_has_indirect(intrin->variables[i])) - _mesa_set_add(indirect_set, intrin->variables[i]->var); - } - } - - return true; -} - -/* Identical to nir_assign_var_locations_packed except that it assigns - * locations to the variables that are used 100% directly first and then - * assigns locations to variables that are used indirectly. - */ -void -nir_assign_var_locations_direct_first(nir_shader *shader, - struct exec_list *var_list, - unsigned *direct_size, - unsigned *size, - bool is_scalar) -{ - struct set *indirect_set = _mesa_set_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - nir_foreach_overload(shader, overload) { - if (overload->impl) - nir_foreach_block(overload->impl, mark_indirect_uses_block, - indirect_set); - } - - unsigned location = 0; - - foreach_list_typed(nir_variable, var, node, var_list) { - if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && - var->interface_type != NULL) - continue; - - if (_mesa_set_search(indirect_set, var)) - continue; - - var->data.driver_location = location; - location += type_size(var->type, is_scalar); - } - - *direct_size = location; - - foreach_list_typed(nir_variable, var, node, var_list) { - if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && - var->interface_type != NULL) - continue; - - if (!_mesa_set_search(indirect_set, var)) - continue; - - var->data.driver_location = location; - location += type_size(var->type, is_scalar); - } - - *size = location; - - _mesa_set_destroy(indirect_set, NULL); -} - static unsigned get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, struct lower_io_state *state) @@ -243,6 +83,9 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, bool found_indirect = false; unsigned base_offset = 0; + nir_builder *b = &state->builder; + nir_builder_insert_before_instr(b, instr); + nir_deref *tail = &deref->deref; while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; @@ -250,56 +93,56 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, if (tail->deref_type == nir_deref_type_array) { nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = type_size(tail->type, state->is_scalar); + unsigned size = state->type_size(tail->type); base_offset += size * deref_array->base_offset; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_load_const_instr *load_const = - nir_load_const_instr_create(state->mem_ctx, 1); - load_const->value.u[0] = size; - nir_instr_insert_before(instr, &load_const->instr); - - nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, - nir_op_imul); - mul->src[0].src.is_ssa = true; - mul->src[0].src.ssa = &load_const->def; - nir_src_copy(&mul->src[1].src, &deref_array->indirect, - state->mem_ctx); - mul->dest.write_mask = 1; - nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); - nir_instr_insert_before(instr, &mul->instr); + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, size), + nir_ssa_for_src(b, deref_array->indirect, 1)); if (found_indirect) { - nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx, - nir_op_iadd); - add->src[0].src = *indirect; - add->src[1].src.is_ssa = true; - add->src[1].src.ssa = &mul->dest.dest.ssa; - add->dest.write_mask = 1; - nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); - nir_instr_insert_before(instr, &add->instr); - - indirect->is_ssa = true; - indirect->ssa = &add->dest.dest.ssa; + indirect->ssa = + nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul); } else { - indirect->is_ssa = true; - indirect->ssa = &mul->dest.dest.ssa; - found_indirect = true; + indirect->ssa = mul; } + indirect->is_ssa = true; + found_indirect = true; } } else if (tail->deref_type == nir_deref_type_struct) { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - for (unsigned i = 0; i < deref_struct->index; i++) - base_offset += type_size(glsl_get_struct_field(parent_type, i), - state->is_scalar); + for (unsigned i = 0; i < deref_struct->index; i++) { + base_offset += + state->type_size(glsl_get_struct_field(parent_type, i)); + } } } return base_offset; } +static nir_intrinsic_op +load_op(nir_variable_mode mode, bool has_indirect) +{ + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + op = has_indirect ? nir_intrinsic_load_input_indirect : + nir_intrinsic_load_input; + break; + case nir_var_uniform: + op = has_indirect ? nir_intrinsic_load_uniform_indirect : + nir_intrinsic_load_uniform; + break; + default: + unreachable("Unknown variable mode"); + } + return op; +} + static bool nir_lower_io_block(nir_block *block, void *void_state) { @@ -319,31 +162,22 @@ nir_lower_io_block(nir_block *block, void *void_state) bool has_indirect = deref_has_indirect(intrin->variables[0]); - /* Figure out the opcode */ - nir_intrinsic_op load_op; - switch (mode) { - case nir_var_shader_in: - load_op = has_indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; - break; - case nir_var_uniform: - load_op = has_indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; - break; - default: - unreachable("Unknown variable mode"); - } - - nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(state->mem_ctx, + load_op(mode, has_indirect)); load->num_components = intrin->num_components; nir_src indirect; unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, &indirect, state); - offset += intrin->variables[0]->var->data.driver_location; - load->const_index[0] = offset; + unsigned location = intrin->variables[0]->var->data.driver_location; + if (mode == nir_var_uniform) { + load->const_index[0] = location; + load->const_index[1] = offset; + } else { + load->const_index[0] = location + offset; + } if (has_indirect) load->src[0] = indirect; @@ -406,12 +240,13 @@ nir_lower_io_block(nir_block *block, void *void_state) } static void -nir_lower_io_impl(nir_function_impl *impl, bool is_scalar) +nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_type *)) { struct lower_io_state state; + nir_builder_init(&state.builder, impl); state.mem_ctx = ralloc_parent(impl); - state.is_scalar = is_scalar; + state.type_size = type_size; nir_foreach_block(impl, nir_lower_io_block, &state); @@ -420,10 +255,10 @@ nir_lower_io_impl(nir_function_impl *impl, bool is_scalar) } void -nir_lower_io(nir_shader *shader, bool is_scalar) +nir_lower_io(nir_shader *shader, int(*type_size)(const struct glsl_type *)) { nir_foreach_overload(shader, overload) { if (overload->impl) - nir_lower_io_impl(overload->impl, is_scalar); + nir_lower_io_impl(overload->impl, type_size); } } diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 9a9cdd1..438caac 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -192,12 +192,12 @@ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_progr } extern "C" void -nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, - gl_shader_stage stage) +nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program) { nir_foreach_overload(shader, overload) { if (overload->impl) - lower_impl(overload->impl, shader_program, stage); + lower_impl(overload->impl, shader_program, shader->stage); } } diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index ccb8f99..4ff2166 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -935,7 +935,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) nir_foreach_block(impl, register_variable_uses_block, &state); insert_phi_nodes(&state); - rename_variables_block(impl->start_block, &state); + rename_variables_block(nir_start_block(impl), &state); nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index d7c1740..226e0a8 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -113,6 +113,8 @@ optimizations = [ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + (('fne', ('fneg', a), a), ('fne', a, 0.0)), + (('feq', ('fneg', a), a), ('feq', a, 0.0)), # Emulating booleans (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c index 44068bf..5b412ee 100644 --- a/src/glsl/nir/nir_opt_gcm.c +++ b/src/glsl/nir/nir_opt_gcm.c @@ -256,7 +256,7 @@ gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state) /* Start with the instruction at the top. As we iterate over the * sources, it will get moved down as needed. */ - instr->block = state->impl->start_block; + instr->block = nir_start_block(state->impl); state->instr = instr; nir_foreach_src(instr, gcm_schedule_early_src, state); diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c index 6620e5d..26ec4ed 100644 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ b/src/glsl/nir/nir_opt_peephole_select.c @@ -26,6 +26,7 @@ */ #include "nir.h" +#include "nir_control_flow.h" /* * Implements a small peephole optimization that looks for diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h index 3254f10..1f09174 100644 --- a/src/glsl/nir/nir_spirv.h +++ b/src/glsl/nir/nir_spirv.h @@ -37,6 +37,7 @@ extern "C" { #endif nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, const nir_shader_compiler_options *options); #ifdef __cplusplus diff --git a/src/glsl/nir/nir_to_ssa.c b/src/glsl/nir/nir_to_ssa.c index a3c35fa..b089df7 100644 --- a/src/glsl/nir/nir_to_ssa.c +++ b/src/glsl/nir/nir_to_ssa.c @@ -516,7 +516,7 @@ nir_convert_to_ssa_impl(nir_function_impl *impl) rewrite_state state; init_rewrite_state(impl, &state); - rewrite_block(impl->start_block, &state); + rewrite_block(nir_start_block(impl), &state); remove_unused_regs(impl, &state); diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index dc79941..9938c0e 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -75,6 +75,9 @@ typedef struct { /* the current if statement being validated */ nir_if *if_stmt; + /* the current loop being visited */ + nir_loop *loop; + /* the parent of the current cf node being visited */ nir_cf_node *parent_node; @@ -594,9 +597,85 @@ validate_block(nir_block *block, validate_state *state) } } + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + const nir_block *pred = entry->key; + assert(pred->successors[0] == block || + pred->successors[1] == block); + } + if (!exec_list_is_empty(&block->instr_list) && - nir_block_last_instr(block)->type == nir_instr_type_jump) + nir_block_last_instr(block)->type == nir_instr_type_jump) { assert(block->successors[1] == NULL); + nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); + switch (jump->type) { + case nir_jump_break: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node)); + assert(block->successors[0] == after); + break; + } + + case nir_jump_continue: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + break; + } + + case nir_jump_return: + assert(block->successors[0] == state->impl->end_block); + break; + + default: + unreachable("bad jump type"); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next == NULL) { + switch (state->parent_node->type) { + case nir_cf_node_loop: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + /* due to the hack for infinite loops, block->successors[1] may + * point to the block after the loop. + */ + break; + } + + case nir_cf_node_if: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(state->parent_node)); + assert(block->successors[0] == after); + assert(block->successors[1] == NULL); + break; + } + + case nir_cf_node_function: + assert(block->successors[0] == state->impl->end_block); + assert(block->successors[1] == NULL); + break; + + default: + unreachable("unknown control flow node type"); + } + } else { + if (next->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(next); + assert(&block->successors[0]->cf_node == + nir_if_first_then_node(if_stmt)); + assert(&block->successors[1]->cf_node == + nir_if_first_else_node(if_stmt)); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(next); + assert(&block->successors[0]->cf_node == + nir_loop_first_cf_node(loop)); + assert(block->successors[1] == NULL); + } + } + } } static void @@ -608,12 +687,6 @@ validate_if(nir_if *if_stmt, validate_state *state) nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); assert(prev_node->type == nir_cf_node_block); - nir_block *prev_block = nir_cf_node_as_block(prev_node); - assert(&prev_block->successors[0]->cf_node == - nir_if_first_then_node(if_stmt)); - assert(&prev_block->successors[1]->cf_node == - nir_if_first_else_node(if_stmt)); - assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next)); nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); assert(next_node->type == nir_cf_node_block); @@ -647,10 +720,6 @@ validate_loop(nir_loop *loop, validate_state *state) nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node); assert(prev_node->type == nir_cf_node_block); - nir_block *prev_block = nir_cf_node_as_block(prev_node); - assert(&prev_block->successors[0]->cf_node == nir_loop_first_cf_node(loop)); - assert(prev_block->successors[1] == NULL); - assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next)); nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node); assert(next_node->type == nir_cf_node_block); @@ -659,6 +728,8 @@ validate_loop(nir_loop *loop, validate_state *state) nir_cf_node *old_parent = state->parent_node; state->parent_node = &loop->cf_node; + nir_loop *old_loop = state->loop; + state->loop = loop; exec_list_validate(&loop->body); foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { @@ -666,6 +737,7 @@ validate_loop(nir_loop *loop, validate_state *state) } state->parent_node = old_parent; + state->loop = old_loop; } static void @@ -921,6 +993,7 @@ init_validate_state(validate_state *state) state->regs_found = NULL; state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + state->loop = NULL; } static void diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c index 0eed23f..a7b8c0f 100644 --- a/src/glsl/nir/spirv2nir.c +++ b/src/glsl/nir/spirv2nir.c @@ -49,6 +49,6 @@ int main(int argc, char **argv) const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); assert(map != NULL); - nir_shader *shader = spirv_to_nir(map, word_count, NULL); + nir_shader *shader = spirv_to_nir(map, word_count, MESA_SHADER_FRAGMENT, NULL); nir_print_shader(shader, stderr); } diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 65a995c..771637e 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -27,6 +27,7 @@ #include "spirv_to_nir_private.h" #include "nir_vla.h" +#include "nir_control_flow.h" static struct vtn_ssa_value * vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, @@ -2927,6 +2928,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, nir_shader * spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, const nir_shader_compiler_options *options) { const uint32_t *word_end = words + word_count; @@ -2942,7 +2944,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, words+= 5; - nir_shader *shader = nir_shader_create(NULL, options); + nir_shader *shader = nir_shader_create(NULL, stage, options); /* Initialize the stn_builder object */ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp index 10be8e8..5221417 100644 --- a/src/glsl/opt_constant_propagation.cpp +++ b/src/glsl/opt_constant_propagation.cpp @@ -110,6 +110,8 @@ public: virtual ir_visitor_status visit_enter(class ir_if *); void add_constant(ir_assignment *ir); + void constant_folding(ir_rvalue **rvalue); + void constant_propagation(ir_rvalue **rvalue); void kill(ir_variable *ir, unsigned write_mask); void handle_if_block(exec_list *instructions); void handle_rvalue(ir_rvalue **rvalue); @@ -132,8 +134,38 @@ public: void -ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) -{ +ir_constant_propagation_visitor::constant_folding(ir_rvalue **rvalue) { + + if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) + return; + + /* Note that we visit rvalues one leaving. So if an expression has a + * non-constant operand, no need to go looking down it to find if it's + * constant. This cuts the time of this pass down drastically. + */ + ir_expression *expr = (*rvalue)->as_expression(); + if (expr) { + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + if (!expr->operands[i]->as_constant()) + return; + } + } + + /* Ditto for swizzles. */ + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (swiz && !swiz->val->as_constant()) + return; + + ir_constant *constant = (*rvalue)->constant_expression_value(); + if (constant) { + *rvalue = constant; + this->progress = true; + } +} + +void +ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) { + if (this->in_assignee || !*rvalue) return; @@ -216,6 +248,13 @@ ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) this->progress = true; } +void +ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + constant_propagation(rvalue); + constant_folding(rvalue); +} + ir_visitor_status ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir) { @@ -243,6 +282,8 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir) ir_visitor_status ir_constant_propagation_visitor::visit_leave(ir_assignment *ir) { + constant_folding(&ir->rhs); + if (this->in_assignee) return visit_continue; diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 3fef7c4..0c64af0 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -48,158 +48,6 @@ typedef enum #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1) /** - * Bitflags for system values. - */ -#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID) -#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS) -#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN) -/** - * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be - * one of these values. If a NIR variable's mode is nir_var_system_value, it - * will be one of these values. - */ -typedef enum -{ - /** - * \name Vertex shader system values - */ - /*@{*/ - /** - * OpenGL-style vertex ID. - * - * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the - * OpenGL 3.3 core profile spec says: - * - * "gl_VertexID holds the integer index i implicitly passed by - * DrawArrays or one of the other drawing commands defined in section - * 2.8.3." - * - * Section 2.8.3 (Drawing Commands) of the same spec says: - * - * "The commands....are equivalent to the commands with the same base - * name (without the BaseVertex suffix), except that the ith element - * transferred by the corresponding draw call will be taken from - * element indices[i] + basevertex of each enabled array." - * - * Additionally, the overview in the GL_ARB_shader_draw_parameters spec - * says: - * - * "In unextended GL, vertex shaders have inputs named gl_VertexID and - * gl_InstanceID, which contain, respectively the index of the vertex - * and instance. The value of gl_VertexID is the implicitly passed - * index of the vertex being processed, which includes the value of - * baseVertex, for those commands that accept it." - * - * gl_VertexID gets basevertex added in. This differs from DirectX where - * SV_VertexID does \b not get basevertex added in. - * - * \note - * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be - * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus - * \c SYSTEM_VALUE_BASE_VERTEX. - * - * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID, - - /** - * Instanced ID as supplied to gl_InstanceID - * - * Values assigned to gl_InstanceID always begin with zero, regardless of - * the value of baseinstance. - * - * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec - * says: - * - * "gl_InstanceID holds the integer instance number of the current - * primitive in an instanced draw call (see section 10.5)." - * - * Through a big chain of pseudocode, section 10.5 describes that - * baseinstance is not counted by gl_InstanceID. In that section, notice - * - * "If an enabled vertex attribute array is instanced (it has a - * non-zero divisor as specified by VertexAttribDivisor), the element - * index that is transferred to the GL, for all vertices, is given by - * - * floor(instance/divisor) + baseinstance - * - * If an array corresponding to an attribute required by a vertex - * shader is not enabled, then the corresponding element is taken from - * the current attribute state (see section 10.2)." - * - * Note that baseinstance is \b not included in the value of instance. - */ - SYSTEM_VALUE_INSTANCE_ID, - - /** - * DirectX-style vertex ID. - * - * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include - * the value of basevertex. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, - - /** - * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar - * functions. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE - */ - SYSTEM_VALUE_BASE_VERTEX, - /*@}*/ - - /** - * \name Geometry shader system values - */ - /*@{*/ - SYSTEM_VALUE_INVOCATION_ID, /**< (Also in Tessellation Control shader) */ - /*@}*/ - - /** - * \name Fragment shader system values - */ - /*@{*/ - SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */ - SYSTEM_VALUE_SAMPLE_ID, - SYSTEM_VALUE_SAMPLE_POS, - SYSTEM_VALUE_SAMPLE_MASK_IN, - /*@}*/ - - /** - * \name Tessellation Evaluation shader system values - */ - /*@{*/ - SYSTEM_VALUE_TESS_COORD, - SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */ - SYSTEM_VALUE_PRIMITIVE_ID, /**< (currently not used by GS) */ - SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */ - SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */ - /*@}*/ - - SYSTEM_VALUE_MAX /**< Number of values */ -} gl_system_value; - - -/** - * The possible interpolation qualifiers that can be applied to a fragment - * shader input in GLSL. - * - * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the - * gl_fragment_program data structure to 0 causes the default behavior. - */ -enum glsl_interp_qualifier -{ - INTERP_QUALIFIER_NONE = 0, - INTERP_QUALIFIER_SMOOTH, - INTERP_QUALIFIER_FLAT, - INTERP_QUALIFIER_NOPERSPECTIVE, - INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */ -}; - - -/** * Indexes for vertex program attributes. * GL_NV_vertex_program aliases generic attributes over the conventional * attributes. In GL_ARB_vertex_program shader the aliasing is optional. @@ -305,7 +153,6 @@ typedef enum BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) /*@}*/ - /** * Indexes for vertex shader outputs, geometry shader inputs/outputs, and * fragment shader inputs. @@ -346,9 +193,6 @@ typedef enum VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */ VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */ VARYING_SLOT_VAR0, /* First generic varying slot */ - VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING, - VARYING_SLOT_PATCH0 = VARYING_SLOT_MAX, - VARYING_SLOT_TESS_MAX = VARYING_SLOT_PATCH0 + MAX_VARYING } gl_varying_slot; @@ -383,9 +227,161 @@ typedef enum #define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) #define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE) #define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC) +#define VARYING_BIT_TESS_LEVEL_OUTER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER) +#define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) #define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V)) /*@}*/ +/** + * Bitflags for system values. + */ +#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID) +#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS) +#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN) +/** + * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. If a NIR variable's mode is nir_var_system_value, it + * will be one of these values. + */ +typedef enum +{ + /** + * \name Vertex shader system values + */ + /*@{*/ + /** + * OpenGL-style vertex ID. + * + * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the + * OpenGL 3.3 core profile spec says: + * + * "gl_VertexID holds the integer index i implicitly passed by + * DrawArrays or one of the other drawing commands defined in section + * 2.8.3." + * + * Section 2.8.3 (Drawing Commands) of the same spec says: + * + * "The commands....are equivalent to the commands with the same base + * name (without the BaseVertex suffix), except that the ith element + * transferred by the corresponding draw call will be taken from + * element indices[i] + basevertex of each enabled array." + * + * Additionally, the overview in the GL_ARB_shader_draw_parameters spec + * says: + * + * "In unextended GL, vertex shaders have inputs named gl_VertexID and + * gl_InstanceID, which contain, respectively the index of the vertex + * and instance. The value of gl_VertexID is the implicitly passed + * index of the vertex being processed, which includes the value of + * baseVertex, for those commands that accept it." + * + * gl_VertexID gets basevertex added in. This differs from DirectX where + * SV_VertexID does \b not get basevertex added in. + * + * \note + * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be + * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus + * \c SYSTEM_VALUE_BASE_VERTEX. + * + * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID, + + /** + * Instanced ID as supplied to gl_InstanceID + * + * Values assigned to gl_InstanceID always begin with zero, regardless of + * the value of baseinstance. + * + * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec + * says: + * + * "gl_InstanceID holds the integer instance number of the current + * primitive in an instanced draw call (see section 10.5)." + * + * Through a big chain of pseudocode, section 10.5 describes that + * baseinstance is not counted by gl_InstanceID. In that section, notice + * + * "If an enabled vertex attribute array is instanced (it has a + * non-zero divisor as specified by VertexAttribDivisor), the element + * index that is transferred to the GL, for all vertices, is given by + * + * floor(instance/divisor) + baseinstance + * + * If an array corresponding to an attribute required by a vertex + * shader is not enabled, then the corresponding element is taken from + * the current attribute state (see section 10.2)." + * + * Note that baseinstance is \b not included in the value of instance. + */ + SYSTEM_VALUE_INSTANCE_ID, + + /** + * DirectX-style vertex ID. + * + * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include + * the value of basevertex. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + + /** + * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar + * functions. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE + */ + SYSTEM_VALUE_BASE_VERTEX, + /*@}*/ + + /** + * \name Geometry shader system values + */ + /*@{*/ + SYSTEM_VALUE_INVOCATION_ID, /**< (Also in Tessellation Control shader) */ + /*@}*/ + + /** + * \name Fragment shader system values + */ + /*@{*/ + SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */ + SYSTEM_VALUE_SAMPLE_ID, + SYSTEM_VALUE_SAMPLE_POS, + SYSTEM_VALUE_SAMPLE_MASK_IN, + /*@}*/ + + /** + * \name Tessellation Evaluation shader system values + */ + /*@{*/ + SYSTEM_VALUE_TESS_COORD, + SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */ + SYSTEM_VALUE_PRIMITIVE_ID, /**< (currently not used by GS) */ + SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */ + SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */ + /*@}*/ + + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + +/** + * The possible interpolation qualifiers that can be applied to a fragment + * shader input in GLSL. + * + * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the + * gl_fragment_program data structure to 0 causes the default behavior. + */ +enum glsl_interp_qualifier +{ + INTERP_QUALIFIER_NONE = 0, + INTERP_QUALIFIER_SMOOTH, + INTERP_QUALIFIER_FLAT, + INTERP_QUALIFIER_NOPERSPECTIVE, + INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */ +}; /** * Fragment program results @@ -405,8 +401,6 @@ typedef enum * any are written, FRAG_RESULT_COLOR will not be written. */ FRAG_RESULT_DATA0 = 4, - FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) } gl_frag_result; - #endif /* SHADER_ENUMS_H */ diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index 63c8de3..eedcd46 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -253,8 +253,7 @@ __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGB, bindToTextureRgb), static int scalarEqual(struct glx_config *mode, unsigned int attrib, unsigned int value) { - unsigned int glxValue; - int i; + unsigned glxValue, i; for (i = 0; i < ARRAY_SIZE(attribMap); i++) if (attribMap[i].attrib == attrib) { diff --git a/src/glx/dri_common_query_renderer.c b/src/glx/dri_common_query_renderer.c index b3e107d..ebeedc9 100644 --- a/src/glx/dri_common_query_renderer.c +++ b/src/glx/dri_common_query_renderer.c @@ -56,7 +56,7 @@ static const struct { static int dri2_convert_glx_query_renderer_attribs(int attribute) { - int i; + unsigned i; for (i = 0; i < ARRAY_SIZE(query_renderer_map); i++) if (query_renderer_map[i].glx_attrib == attribute) diff --git a/src/glx/glxext.c b/src/glx/glxext.c index fdc24d4..dc87fb9 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -138,6 +138,9 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) if (!glxDraw) return False; + aevent->serial = _XSetLastRequestRead(dpy, (xGenericReply *) wire); + aevent->send_event = (awire->type & 0x80) != 0; + aevent->display = dpy; aevent->event_type = awire->event_type; aevent->drawable = glxDraw->xDrawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml index 94ddfb7..dee5027 100644 --- a/src/mapi/glapi/gen/GL4x.xml +++ b/src/mapi/glapi/gen/GL4x.xml @@ -44,4 +44,10 @@ <enum name="DEPTH_STENCIL_TEXTURE_MODE" value="0x90EA"/> </category> +<category name="4.5"> + <function name="MemoryBarrierByRegion" es2="3.1"> + <param name="barriers" type="GLbitfield"/> + </function> +</category> + </OpenGLAPI> diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index 7d9d1a6..86a9243 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -154,6 +154,7 @@ API_XML = \ ARB_shader_image_load_store.xml \ ARB_shader_subroutine.xml \ ARB_sync.xml \ + ARB_tessellation_shader.xml \ ARB_texture_barrier.xml \ ARB_texture_buffer_object.xml \ ARB_texture_buffer_range.xml \ diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index 97d961b..bb840ea 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -53,10 +53,12 @@ TODO: document the other workarounds. <application name="Unigine OilRush (32-bit)" executable="OilRush_x86"> <option name="disable_blend_func_extended" value="true" /> + <option name="allow_glsl_extension_directive_midshader" value="true" /> </application> <application name="Unigine OilRush (64-bit)" executable="OilRush_x64"> <option name="disable_blend_func_extended" value="true" /> + <option name="allow_glsl_extension_directive_midshader" value="true" /> </application> <application name="Savage 2" executable="savage2.bin"> diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index b51b263..43d90d9 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -452,7 +452,7 @@ int driGetConfigAttrib(const __DRIconfig *config, unsigned int attrib, unsigned int *value) { - int i; + unsigned i; for (i = 0; i < ARRAY_SIZE(attribMap); i++) if (attribMap[i].attrib == attrib) diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 149e921..e6fa8f2 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -91,7 +91,7 @@ do_blit_readpixels(struct gl_context * ctx, if (ctx->_ImageTransferState || !_mesa_format_matches_format_and_type(irb->mt->format, format, type, - false)) { + false, NULL)) { DBG("%s - bad format for blit\n", __func__); return false; } diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c index 0a213e9..5ab60d1 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_image.c +++ b/src/mesa/drivers/dri/i915/intel_tex_image.c @@ -134,7 +134,7 @@ try_pbo_upload(struct gl_context *ctx, } if (!_mesa_format_matches_format_and_type(intelImage->mt->format, - format, type, false)) { + format, type, false, NULL)) { DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", __func__, _mesa_get_format_name(intelImage->mt->format), format, type); diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c index 6d37c3b..122a4ec 100644 --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -56,6 +56,12 @@ set_predicate_for_result(struct brw_context *brw, assert(query->bo != NULL); + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering. + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 328662d..0ee5ab2 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -514,7 +514,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = BRW_MAX_IMAGES; ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs = + ctx->Const.MaxCombinedShaderOutputResources = MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; ctx->Const.MaxImageSamples = 0; ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index e092ef4..e5de420 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -383,7 +383,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo); } - for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { struct intel_renderbuffer *irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); @@ -626,7 +626,7 @@ brw_draw_init(struct brw_context *brw) void brw_draw_destroy(struct brw_context *brw) { - int i; + unsigned i; for (i = 0; i < brw->vb.nr_buffers; i++) { drm_intel_bo_unreference(brw->vb.buffers[i].bo); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index cbfd585..21d8f1e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -395,7 +395,8 @@ brw_prepare_vertices(struct brw_context *brw) GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; - int delta, i, j; + unsigned i; + int delta, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -418,10 +419,10 @@ brw_prepare_vertices(struct brw_context *brw) /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { - GLuint i = ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + GLuint index = ffsll(vs_inputs) - 1; + struct brw_vertex_element *input = &brw->vb.inputs[index]; - vs_inputs &= ~BITFIELD64_BIT(i); + vs_inputs &= ~BITFIELD64_BIT(index); brw->vb.enabled[brw->vb.nr_enabled++] = input; } @@ -438,7 +439,7 @@ brw_prepare_vertices(struct brw_context *brw) if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); - int k; + unsigned k; /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer @@ -792,21 +793,6 @@ brw_emit_vertices(struct brw_context *brw) ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } - if (brw->gen >= 6 && gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { uint32_t dw0 = 0, dw1 = 0; uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; @@ -847,6 +833,21 @@ brw_emit_vertices(struct brw_context *brw) OUT_BATCH(dw1); } + if (brw->gen >= 6 && gen6_edgeflag_input) { + uint32_t format = + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); + + OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + GEN6_VE0_EDGE_FLAG_ENABLE | + (format << BRW_VE0_FORMAT_SHIFT) | + (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); + } + ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0e091dd..159f716 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -455,8 +455,8 @@ fs_reg::component_size(unsigned width) const return MAX2(width * stride, 1) * type_sz(type); } -int -fs_visitor::type_size(const struct glsl_type *type) +extern "C" int +type_size_scalar(const struct glsl_type *type) { unsigned int size, i; @@ -467,11 +467,11 @@ fs_visitor::type_size(const struct glsl_type *type) case GLSL_TYPE_BOOL: return type->components(); case GLSL_TYPE_ARRAY: - return type_size(type->fields.array) * type->length; + return type_size_scalar(type->fields.array) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += type_size(type->fields.structure[i].type); + size += type_size_scalar(type->fields.structure[i].type); } return size; case GLSL_TYPE_SAMPLER: @@ -907,7 +907,7 @@ fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), + return fs_reg(GRF, alloc.allocate(type_size_scalar(type) * reg_width), brw_type_for_base_type(type)); } @@ -944,15 +944,17 @@ fs_visitor::import_uniforms(fs_visitor *v) } void -fs_visitor::setup_vector_uniform_values(const gl_constant_value *values, unsigned n) +fs_visitor::setup_vec4_uniform_value(unsigned param_offset, + const gl_constant_value *values, + unsigned n) { static const gl_constant_value zero = { 0 }; for (unsigned i = 0; i < n; ++i) - stage_prog_data->param[uniforms++] = &values[i]; + stage_prog_data->param[param_offset + i] = &values[i]; for (unsigned i = n; i < 4; ++i) - stage_prog_data->param[uniforms++] = &zero; + stage_prog_data->param[param_offset + i] = &zero; } fs_reg * @@ -1769,21 +1771,21 @@ fs_visitor::compact_virtual_grfs() return progress; } -/* - * Implements array access of uniforms by inserting a - * PULL_CONSTANT_LOAD instruction. +/** + * Assign UNIFORM file registers to either push constants or pull constants. * - * Unlike temporary GRF array access (where we don't support it due to - * the difficulty of doing relative addressing on instruction - * destinations), we could potentially do array access of uniforms - * that were loaded in GRF space as push constants. In real-world - * usage we've seen, though, the arrays being used are always larger - * than we could load as push constants, so just always move all - * uniform array access out to a pull constant buffer. + * We allow a fragment shader to have more than the specified minimum + * maximum number of fragment shader uniform components (64). If + * there are too many of these, they'd fill up all of register space. + * So, this will push some of them out to the pull constant buffer and + * update the program to load them. We also use pull constants for all + * indirect constant loads because we don't support indirect accesses in + * registers yet. */ void -fs_visitor::move_uniform_array_access_to_pull_constants() +fs_visitor::assign_constant_locations() { + /* Only the first compile (SIMD8 mode) gets to decide on locations. */ if (dispatch_width != 8) return; @@ -1820,23 +1822,6 @@ fs_visitor::move_uniform_array_access_to_pull_constants() } } } -} - -/** - * Assign UNIFORM file registers to either push constants or pull constants. - * - * We allow a fragment shader to have more than the specified minimum - * maximum number of fragment shader uniform components (64). If - * there are too many of these, they'd fill up all of register space. - * So, this will push some of them out to the pull constant buffer and - * update the program to load them. - */ -void -fs_visitor::assign_constant_locations() -{ - /* Only the first compile (SIMD8 mode) gets to decide on locations. */ - if (dispatch_width != 8) - return; /* Find which UNIFORM registers are still in use. */ bool is_live[uniforms]; @@ -4823,7 +4808,6 @@ fs_visitor::optimize() split_virtual_grfs(); - move_uniform_array_access_to_pull_constants(); assign_constant_locations(); demote_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 975183e..31f39fe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -112,7 +112,6 @@ public: void swizzle_result(ir_texture_opcode op, int dest_components, fs_reg orig_val, uint32_t sampler); - int type_size(const struct glsl_type *type); fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, const fs_reg ®); @@ -147,7 +146,6 @@ public: void spill_reg(int spill_reg); void split_virtual_grfs(); bool compact_virtual_grfs(); - void move_uniform_array_access_to_pull_constants(); void assign_constant_locations(); void demote_pull_constants(); void invalidate_live_intervals(); @@ -291,8 +289,9 @@ public: struct brw_reg interp_reg(int location, int channel); - virtual void setup_vector_uniform_values(const gl_constant_value *values, - unsigned n); + virtual void setup_vec4_uniform_value(unsigned param_offset, + const gl_constant_value *values, + unsigned n); int implied_mrf_writes(fs_inst *inst); @@ -318,9 +317,6 @@ public: /** Number of uniform variable components visited. */ unsigned uniforms; - /** Total number of direct uniforms we can get from NIR */ - unsigned num_direct_uniforms; - /** Byte-offset for the next available spot in the scratch space buffer. */ unsigned last_scratch; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 93a36cc..6272b61 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -131,7 +131,7 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) switch (stage) { case MESA_SHADER_VERTEX: - for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) { + for (int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) { int output = var->data.location + i; this->outputs[output] = offset(reg, bld, 4 * i); this->output_components[output] = vector_elements; @@ -175,19 +175,9 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) void fs_visitor::nir_setup_uniforms(nir_shader *shader) { - num_direct_uniforms = shader->num_direct_uniforms; - if (dispatch_width != 8) return; - /* We split the uniform register file in half. The first half is - * entirely direct uniforms. The second half is indirect. - */ - if (num_direct_uniforms > 0) - param_size[0] = num_direct_uniforms; - if (shader->num_uniforms > num_direct_uniforms) - param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms; - uniforms = shader->num_uniforms; if (shader_prog) { @@ -200,15 +190,19 @@ fs_visitor::nir_setup_uniforms(nir_shader *shader) nir_setup_builtin_uniform(var); else nir_setup_uniform(var); + + param_size[var->data.driver_location] = type_size_scalar(var->type); } } else { - /* prog_to_nir doesn't create uniform variables; set param up directly. */ + /* prog_to_nir only creates a single giant uniform variable so we can + * just set param up directly. */ for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) { for (unsigned int i = 0; i < 4; i++) { stage_prog_data->param[4 * p + i] = &prog->Parameters->ParameterValues[p][i]; } } + param_size[0] = prog->Parameters->NumParameters * 4; } } @@ -239,15 +233,7 @@ fs_visitor::nir_setup_uniform(nir_variable *var) } if (storage->type->is_image()) { - /* Images don't get a valid location assigned by nir_lower_io() - * because their size is driver-specific, so we need to allocate - * space for them here at the end of the parameter array. - */ - var->data.driver_location = uniforms; - param_size[uniforms] = - BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1); - - setup_image_uniform_values(storage); + setup_image_uniform_values(index, storage); } else { unsigned slots = storage->type->component_slots(); if (storage->array_elements) @@ -1406,6 +1392,51 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_image_size: { + /* Get the referenced image variable and type. */ + const nir_variable *var = instr->variables[0]->var; + const glsl_type *type = var->type->without_array(); + + /* Get the size of the image. */ + const fs_reg image = get_nir_image_deref(instr->variables[0]); + const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET); + + /* For 1DArray image types, the array index is stored in the Z component. + * Fix this by swizzling the Z component to the Y component. + */ + const bool is_1d_array_image = + type->sampler_dimensionality == GLSL_SAMPLER_DIM_1D && + type->sampler_array; + + /* For CubeArray images, we should count the number of cubes instead + * of the number of faces. Fix it by dividing the (Z component) by 6. + */ + const bool is_cube_array_image = + type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + type->sampler_array; + + /* Copy all the components. */ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + for (unsigned c = 0; c < info->dest_components; ++c) { + if ((int)c >= type->coordinate_components()) { + bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), + fs_reg(1)); + } else if (c == 1 && is_1d_array_image) { + bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), + offset(size, bld, 2)); + } else if (c == 2 && is_cube_array_image) { + bld.emit(SHADER_OPCODE_INT_QUOTIENT, + offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), + offset(size, bld, c), fs_reg(6)); + } else { + bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), + offset(size, bld, c)); + } + } + + break; + } + case nir_intrinsic_load_front_face: bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), *emit_frontfacing_interpolation()); @@ -1467,21 +1498,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr has_indirect = true; /* fallthrough */ case nir_intrinsic_load_uniform: { - unsigned index = instr->const_index[0]; - - fs_reg uniform_reg; - if (index < num_direct_uniforms) { - uniform_reg = fs_reg(UNIFORM, 0); - } else { - uniform_reg = fs_reg(UNIFORM, num_direct_uniforms); - index -= num_direct_uniforms; - } + fs_reg uniform_reg(UNIFORM, instr->const_index[0]); + uniform_reg.reg_offset = instr->const_index[1]; for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), bld, index); + fs_reg src = offset(retype(uniform_reg, dest.type), bld, j); if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; bld.MOV(dest, src); dest = offset(dest, bld, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b70895e..6eb9889 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -156,7 +156,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) } uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count); + struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count, false); if (devinfo->gen >= 6) ra_set_allocate_round_robin(regs); int *classes = ralloc_array(compiler, int, class_count); @@ -232,7 +232,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) for (int base_reg = j; base_reg < j + (class_sizes[i] + 1) / 2; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_add_reg_conflict(regs, base_reg, reg); } reg++; @@ -246,7 +246,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_add_reg_conflict(regs, base_reg, reg); } reg++; @@ -255,6 +255,12 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) } assert(reg == ra_reg_count); + /* Applying transitivity to all of the base registers gives us the + * appropreate register conflict relationships everywhere. + */ + for (int reg = 0; reg < base_reg_count; reg++) + ra_make_reg_conflicts_transitive(regs, reg); + /* Add a special class for aligned pairs, which we'll put delta_xy * in on Gen <= 6 so that we can do PLN. */ diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index e9d9467..2751152 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -878,7 +878,8 @@ brw_upload_invariant_state(struct brw_context *brw) { const bool is_965 = brw->gen == 4 && !brw->is_g4x; - brw_select_pipeline(brw, BRW_RENDER_PIPELINE); + brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE); + brw->last_pipeline = BRW_RENDER_PIPELINE; if (brw->gen < 6) { /* Disable depth offset clamping. */ diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 79e31d8..0276d47 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -22,6 +22,7 @@ */ #include "brw_nir.h" +#include "brw_shader.h" #include "glsl/glsl_parser_extras.h" #include "glsl/nir/glsl_to_nir.h" #include "program/prog_to_nir.h" @@ -130,22 +131,24 @@ brw_process_nir(nir_shader *nir, nir_optimize(nir, is_scalar); if (is_scalar) { - nir_assign_var_locations_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms, - is_scalar); - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, is_scalar); + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + type_size_scalar); + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar); + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar); + nir_lower_io(nir, type_size_scalar); } else { nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, - is_scalar); + type_size_vec4); + + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_vec4); foreach_list_typed(nir_variable, var, node, &nir->outputs) var->data.driver_location = var->data.location; - } - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar); - nir_lower_io(nir, is_scalar); + nir_lower_io(nir, type_size_vec4); + } nir_validate_shader(nir); @@ -153,7 +156,7 @@ brw_process_nir(nir_shader *nir, nir_validate_shader(nir); if (shader_prog) { - nir_lower_samplers(nir, shader_prog, stage); + nir_lower_samplers(nir, shader_prog); } else { nir_lower_samplers_for_vk(nir); } diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c index 6ed79d7..c8d9002 100644 --- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c +++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c @@ -91,7 +91,7 @@ can_cut_index_handle_prims(struct gl_context *ctx, return false; } - for (int i = 0; i < nr_prims; i++) { + for (unsigned i = 0; i < nr_prims; i++) { switch (prim[i].mode) { case GL_POINTS: case GL_LINES: diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index d6b012c..a8e5aba 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -66,20 +66,11 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - uint32_t flags; - - flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_DEPTH_STALL); - - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ - if (brw->predicate.supported) - flags |= PIPE_CONTROL_FLUSH_ENABLE; - - brw_emit_pipe_control_write(brw, flags, query_bo, - idx * sizeof(uint64_t), 0, 0); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_DEPTH_STALL, + query_bo, idx * sizeof(uint64_t), + 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 67b8dde..0007e5c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -122,7 +122,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true; compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; - if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) { if (compiler->scalar_vs) { /* If we're using the scalar backend for vertex shaders, we need to * configure these accordingly. @@ -135,7 +135,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options; } - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) { compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options; } @@ -1421,7 +1421,8 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_ } void -backend_shader::setup_image_uniform_values(const gl_uniform_storage *storage) +backend_shader::setup_image_uniform_values(unsigned param_offset, + const gl_uniform_storage *storage) { const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target); @@ -1432,18 +1433,19 @@ backend_shader::setup_image_uniform_values(const gl_uniform_storage *storage) /* Upload the brw_image_param structure. The order is expected to match * the BRW_IMAGE_PARAM_*_OFFSET defines. */ - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, (const gl_constant_value *)¶m->surface_idx, 1); - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET, (const gl_constant_value *)param->offset, 2); - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET, (const gl_constant_value *)param->size, 3); - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET, (const gl_constant_value *)param->stride, 4); - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET, (const gl_constant_value *)param->tiling, 3); - setup_vector_uniform_values( + setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, (const gl_constant_value *)param->swizzling, 2); + param_offset += BRW_IMAGE_PARAM_SIZE; brw_mark_surface_used( stage_prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 2cc97f2..ccccf4d 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -270,9 +270,11 @@ public: virtual void invalidate_live_intervals() = 0; - virtual void setup_vector_uniform_values(const gl_constant_value *values, - unsigned n) = 0; - void setup_image_uniform_values(const gl_uniform_storage *storage); + virtual void setup_vec4_uniform_value(unsigned param_offset, + const gl_constant_value *values, + unsigned n) = 0; + void setup_image_uniform_values(unsigned param_offset, + const gl_uniform_storage *storage); }; uint32_t brw_texture_offset(int *offsets, unsigned num_components); @@ -307,6 +309,9 @@ bool brw_cs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); +int type_size_scalar(const struct glsl_type *type); +int type_size_vec4(const struct glsl_type *type); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 5effb4c..e817ecf 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -208,7 +208,7 @@ brw_lookup_prog(const struct brw_cache *cache, const void *data, unsigned data_size) { const struct brw_context *brw = cache->brw; - int i; + unsigned i; const struct brw_cache_item *item; for (i = 0; i < cache->size; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index b8b0393..e96732a 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -555,7 +555,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw, if (mt->compressed) img_height /= mt->align_h; - for (int q = 0; q < mt->level[level].depth; q++) { + for (unsigned q = 0; q < mt->level[level].depth; q++) { if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) { intel_miptree_set_image_offset(mt, level, q, 0, q * img_height); } else { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 341c516..673a29e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -177,8 +177,9 @@ public: void fail(const char *msg, ...); void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); - virtual void setup_vector_uniform_values(const gl_constant_value *values, - unsigned n); + virtual void setup_vec4_uniform_value(unsigned param_offset, + const gl_constant_value *values, + unsigned n); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); @@ -409,7 +410,6 @@ public: void visit_atomic_counter_intrinsic(ir_call *ir); - int type_size(const struct glsl_type *type); bool is_high_sampler(src_reg sampler); virtual void emit_nir_code(); @@ -447,7 +447,6 @@ public: dst_reg *nir_locals; dst_reg *nir_ssa_values; src_reg *nir_inputs; - unsigned *nir_uniform_driver_location; dst_reg *nir_system_values; protected: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index d85fb6f..8a8dd57 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -44,7 +44,7 @@ vec4_gs_visitor::nir_setup_inputs(nir_shader *shader) */ assert(var->type->length > 0); int length = var->type->length; - int size = type_size(var->type) / length; + int size = type_size_vec4(var->type) / length; for (int i = 0; i < length; i++) { int location = var->data.location + i * BRW_VARYING_SLOT_COUNT; for (int j = 0; j < size; j++) { @@ -55,7 +55,7 @@ vec4_gs_visitor::nir_setup_inputs(nir_shader *shader) } } } else { - int size = type_size(var->type); + int size = type_size_vec4(var->type); for (int i = 0; i < size; i++) { src_reg src = src_reg(ATTR, var->data.location + i, var->type); src = retype(src, brw_type_for_base_type(var->type)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index fd3d556..d5a24d8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -119,7 +119,7 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader) foreach_list_typed(nir_variable, var, node, &shader->inputs) { int offset = var->data.driver_location; - unsigned size = type_size(var->type); + unsigned size = type_size_vec4(var->type); for (unsigned i = 0; i < size; i++) { src_reg src = src_reg(ATTR, var->data.location + i, var->type); nir_inputs[offset + i] = src; @@ -132,20 +132,17 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader) { uniforms = 0; - nir_uniform_driver_location = - rzalloc_array(mem_ctx, unsigned, this->uniform_array_size); - if (shader_prog) { foreach_list_typed(nir_variable, var, node, &shader->uniforms) { /* UBO's, atomics and samplers don't take up space in the uniform file */ if (var->interface_type != NULL || var->type->contains_atomic() || - type_size(var->type) == 0) { + type_size_vec4(var->type) == 0) { continue; } assert(uniforms < uniform_array_size); - this->uniform_size[uniforms] = type_size(var->type); + this->uniform_size[uniforms] = type_size_vec4(var->type); if (strncmp(var->name, "gl_", 3) == 0) nir_setup_builtin_uniform(var); @@ -161,7 +158,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader) strcmp(var->name, "parameters") == 0); assert(uniforms < uniform_array_size); - this->uniform_size[uniforms] = type_size(var->type); + this->uniform_size[uniforms] = type_size_vec4(var->type); struct gl_program_parameter_list *plist = prog->Parameters; for (unsigned p = 0; p < plist->NumParameters; p++) { @@ -182,7 +179,6 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader) stage_prog_data->param[uniforms * 4 + i] = &zero; } - nir_uniform_driver_location[uniforms] = var->data.driver_location; uniforms++; } } @@ -230,7 +226,6 @@ vec4_visitor::nir_setup_uniform(nir_variable *var) stage_prog_data->param[uniforms * 4 + i] = &zero; } - nir_uniform_driver_location[uniforms] = var->data.driver_location; uniforms++; } } @@ -263,7 +258,6 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var) (var->type->is_scalar() || var->type->is_vector() || var->type->is_matrix() ? var->type->vector_elements : 4); - nir_uniform_driver_location[uniforms] = var->data.driver_location; uniforms++; } } @@ -458,13 +452,28 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) dst_reg reg = dst_reg(GRF, alloc.allocate(1)); reg.type = BRW_REGISTER_TYPE_F; + unsigned remaining = brw_writemask_for_size(instr->def.num_components); + /* @FIXME: consider emitting vector operations to save some MOVs in * cases where the components are representable in 8 bits. - * By now, we emit a MOV for each component. + * For now, we emit a MOV for each distinct value. */ - for (unsigned i = 0; i < instr->def.num_components; ++i) { - reg.writemask = 1 << i; + for (unsigned i = 0; i < instr->def.num_components; i++) { + unsigned writemask = 1 << i; + + if ((remaining & writemask) == 0) + continue; + + for (unsigned j = i; j < instr->def.num_components; j++) { + if (instr->value.u[i] == instr->value.u[j]) { + writemask |= 1 << j; + } + } + + reg.writemask = writemask; emit(MOV(reg, src_reg(instr->value.f[i]))); + + remaining &= ~writemask; } /* Set final writemask */ @@ -555,24 +564,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) has_indirect = true; /* fallthrough */ case nir_intrinsic_load_uniform: { - int uniform = instr->const_index[0]; - dest = get_nir_dest(instr->dest); - if (has_indirect) { - /* Split addressing into uniform and offset */ - int offset = uniform - nir_uniform_driver_location[uniform]; - assert(offset >= 0); + src = src_reg(dst_reg(UNIFORM, instr->const_index[0])); + src.reg_offset = instr->const_index[1]; - uniform -= offset; - assert(uniform >= 0); - - src = src_reg(dst_reg(UNIFORM, uniform)); - src.reg_offset = offset; + if (has_indirect) { src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); src.reladdr = new(mem_ctx) src_reg(tmp); - } else { - src = src_reg(dst_reg(UNIFORM, uniform)); } emit(MOV(dest, src)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 617c988..62ed708 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -115,7 +115,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf); compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); ralloc_free(compiler->vec4_reg_set.regs); - compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count); + compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count, false); if (compiler->devinfo->gen >= 6) ra_set_allocate_round_robin(compiler->vec4_reg_set.regs); ralloc_free(compiler->vec4_reg_set.classes); @@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); + ra_add_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -158,6 +158,9 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) } assert(reg == ra_reg_count); + for (int reg = 0; reg < base_reg_count; reg++) + ra_make_reg_conflicts_transitive(compiler->vec4_reg_set.regs, reg); + ra_set_finalize(compiler->vec4_reg_set.regs, q_values); for (int i = 0; i < MAX_VGRF_SIZE; i++) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 20b628e..499f628 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -597,8 +597,8 @@ vec4_visitor::visit_instructions(const exec_list *list) * This method is useful to calculate how much register space is needed to * store a particular type. */ -int -vec4_visitor::type_size(const struct glsl_type *type) +extern "C" int +type_size_vec4(const struct glsl_type *type) { unsigned int i; int size; @@ -620,11 +620,11 @@ vec4_visitor::type_size(const struct glsl_type *type) } case GLSL_TYPE_ARRAY: assert(type->length > 0); - return type_size(type->fields.array) * type->length; + return type_size_vec4(type->fields.array) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { - size += type_size(type->fields.structure[i].type); + size += type_size_vec4(type->fields.structure[i].type); } return size; case GLSL_TYPE_SUBROUTINE: @@ -655,7 +655,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(v->type_size(type)); + this->reg = v->alloc.allocate(type_size_vec4(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -673,7 +673,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->alloc.allocate(v->type_size(type) * size); + this->reg = v->alloc.allocate(type_size_vec4(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -685,7 +685,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(v->type_size(type)); + this->reg = v->alloc.allocate(type_size_vec4(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -697,18 +697,21 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) } void -vec4_visitor::setup_vector_uniform_values(const gl_constant_value *values, - unsigned n) +vec4_visitor::setup_vec4_uniform_value(unsigned param_offset, + const gl_constant_value *values, + unsigned n) { static const gl_constant_value zero = { 0 }; + assert(param_offset % 4 == 0); + for (unsigned i = 0; i < n; ++i) - stage_prog_data->param[4 * uniforms + i] = &values[i]; + stage_prog_data->param[param_offset + i] = &values[i]; for (unsigned i = n; i < 4; ++i) - stage_prog_data->param[4 * uniforms + i] = &zero; + stage_prog_data->param[param_offset + i] = &zero; - uniform_vector_size[uniforms++] = n; + uniform_vector_size[param_offset / 4] = n; } /* Our support for uniforms is piggy-backed on the struct @@ -744,9 +747,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) storage->type->matrix_columns); const unsigned vector_size = storage->type->vector_elements; - for (unsigned s = 0; s < vector_count; s++) - setup_vector_uniform_values(&storage->storage[s * vector_size], - vector_size); + for (unsigned s = 0; s < vector_count; s++) { + setup_vec4_uniform_value(uniforms * 4, + &storage->storage[s * vector_size], + vector_size); + uniforms++; + } } } @@ -1070,7 +1076,7 @@ vec4_visitor::visit(ir_variable *ir) assert(ir->data.location != -1); reg = new(mem_ctx) dst_reg(this, ir->type); - for (int i = 0; i < type_size(ir->type); i++) { + for (int i = 0; i < type_size_vec4(ir->type); i++) { output_reg[ir->data.location + i] = *reg; output_reg[ir->data.location + i].reg_offset = i; output_reg_annotation[ir->data.location + i] = ir->name; @@ -1092,14 +1098,14 @@ vec4_visitor::visit(ir_variable *ir) * Some uniforms, such as samplers and atomic counters, have no actual * storage, so we should ignore them. */ - if (ir->is_in_buffer_block() || type_size(ir->type) == 0) + if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0) return; /* Track how big the whole uniform variable is, in case we need to put a * copy of its data into pull constants for array access. */ assert(this->uniforms < uniform_array_size); - this->uniform_size[this->uniforms] = type_size(ir->type); + this->uniform_size[this->uniforms] = type_size_vec4(ir->type); if (!strncmp(ir->name, "gl_", 3)) { setup_builtin_uniform_values(ir); @@ -2052,7 +2058,7 @@ vec4_visitor::compute_array_stride(ir_dereference_array *ir) /* Under normal circumstances array elements are stored consecutively, so * the stride is equal to the size of the array element. */ - return type_size(ir->type); + return type_size_vec4(ir->type); } @@ -2121,7 +2127,7 @@ vec4_visitor::visit(ir_dereference_record *ir) for (i = 0; i < struct_type->length; i++) { if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) break; - offset += type_size(struct_type->fields.structure[i].type); + offset += type_size_vec4(struct_type->fields.structure[i].type); } /* If the type is smaller than a vec4, replicate the last channel out. */ @@ -2330,7 +2336,7 @@ vec4_visitor::visit(ir_assignment *ir) emit_bool_to_cond_code(ir->condition, &predicate); } - for (i = 0; i < type_size(ir->lhs->type); i++) { + for (i = 0; i < type_size_vec4(ir->lhs->type); i++) { vec4_instruction *inst = emit(MOV(dst, src)); inst->predicate = predicate; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 72e37d4..fd7e56e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -56,7 +56,7 @@ brw_upload_pull_constants(struct brw_context *brw, const struct brw_stage_prog_data *prog_data, bool dword_pitch) { - int i; + unsigned i; uint32_t surf_index = prog_data->binding_table.pull_constants_start; if (!prog_data->nr_pull_params) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 0cd4390..cd0b56b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -47,7 +47,7 @@ brw_color_buffer_write_enabled(struct brw_context *brw) struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct gl_fragment_program *fp = brw->fragment_program; - int i; + unsigned i; /* _NEW_BUFFERS */ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f13a97c..8213f4e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -898,7 +898,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw, uint32_t *surf_offsets = &stage_state->surf_offset[prog_data->binding_table.ubo_start]; - for (int i = 0; i < shader->NumUniformBlocks; i++) { + for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { struct gl_uniform_buffer_binding *binding; struct intel_buffer_object *intel_bo; @@ -958,7 +958,7 @@ brw_upload_abo_surfaces(struct brw_context *brw, uint32_t *surf_offsets = &stage_state->surf_offset[prog_data->binding_table.abo_start]; - for (int i = 0; i < prog->NumAtomicBuffers; i++) { + for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { struct gl_atomic_buffer_binding *binding = &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; struct intel_buffer_object *intel_bo = @@ -1117,7 +1117,7 @@ update_texture_image_param(struct brw_context *brw, minify(mt->logical_depth0, u->Level) : mt->logical_depth0); - intel_miptree_get_image_offset(mt, u->Level, u->Layer, + intel_miptree_get_image_offset(mt, u->Level, u->_Layer, ¶m->offset[0], ¶m->offset[1]); @@ -1202,7 +1202,7 @@ update_image_surface(struct brw_context *brw, access != GL_READ_ONLY); } else { - const unsigned min_layer = obj->MinLayer + u->Layer; + const unsigned min_layer = obj->MinLayer + u->_Layer; const unsigned min_level = obj->MinLevel + u->Level; const unsigned num_layers = (!u->Layered ? 1 : obj->Target == GL_TEXTURE_CUBE_MAP ? 6 : diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 35d10ef..6653a6d 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -68,7 +68,7 @@ gen6_upload_push_constants(struct brw_context *brw, _mesa_load_state_parameters(ctx, prog->Parameters); gl_constant_value *param; - int i; + unsigned i; param = brw_state_batch(brw, type, prog_data->nr_params * sizeof(gl_constant_value), diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 41573a8..8cd2fc4 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -116,7 +116,7 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, /* Construct the list of SO_DECLs to be emitted. The formatting of the * command is feels strange -- each dword pair contains a SO_DECL per stream. */ - for (int i = 0; i < linked_xfb_info->NumOutputs; i++) { + for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) { int buffer = linked_xfb_info->Outputs[i].OutputBuffer; uint16_t decl = 0; int varying = linked_xfb_info->Outputs[i].OutputRegister; diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index 1af90ec..1b48643 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + bool uses_edge_flag; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); + uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { unsigned vue = brw->vb.nr_enabled; - WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG, - "Using VID/IID with edgeflags, need to reorder the " - "vertex attributes"); + /* The element for the edge flags must always be last, so we have to + * insert the SGVS before it in that case. + */ + if (uses_edge_flag) { + assert(vue > 0); + vue--; + } + WARN_ONCE(vue >= 33, "Trying to insert VID/IID past 33rd vertex element, " "need to reorder the vertex attrbutes."); @@ -74,7 +83,7 @@ gen8_emit_vertices(struct brw_context *brw) BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(brw->vb.nr_buffers | GEN8_VF_INSTANCING_ENABLE); + OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } else { @@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw) ADVANCE_BATCH(); } - unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; + /* Normally we don't need an element for the SGVS attribute because the + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the + * vertex ID is used then it needs an element for the base vertex buffer. + * Additionally if there is an edge flag element then the SGVS can't be + * inserted past that so we need a dummy element to ensure that the edge + * flag is the last one. + */ + bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid || + (brw->vs.prog_data->uses_instanceid && + uses_edge_flag)); + unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, * presumably for VertexID/InstanceID. @@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw) (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); } + if (needs_sgvs_element) { + if (brw->vs.prog_data->uses_vertexid) { + OUT_BATCH(GEN6_VE0_VALID | + brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | + BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); + } else { + OUT_BATCH(GEN6_VE0_VALID); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); + } + } + if (gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); @@ -206,25 +244,26 @@ gen8_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } - - if (brw->vs.prog_data->uses_vertexid) { - OUT_BATCH(GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } ADVANCE_BATCH(); - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { + for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { const struct brw_vertex_element *input = brw->vb.enabled[i]; const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; + unsigned element_index; + + /* The edge flag element is reordered to be the last one in the code + * above so we need to compensate for that in the element indices used + * below. + */ + if (input == gen6_edgeflag_input) + element_index = nr_elements - 1; + else + element_index = j++; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); + OUT_BATCH(element_index | + (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); OUT_BATCH(buffer->step_rate); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 3bc28a1..1a246d3 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -331,6 +331,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_shader_image_load_store = true; + ctx->Extensions.ARB_shader_image_size = true; ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 72648b0..64d57e8 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -662,7 +662,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) struct intel_renderbuffer *stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; - int i; + unsigned i; DBG("%s() on fb %p (%s)\n", __func__, fb, (fb == ctx->DrawBuffer ? "drawbuffer" : @@ -797,7 +797,7 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx, intel_prepare_render(brw); if (mask & GL_COLOR_BUFFER_BIT) { - GLint i; + unsigned i; struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index a164c69..5911b44 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -122,7 +122,7 @@ aub_dump_bmp(struct gl_context *ctx) { struct gl_framebuffer *fb = ctx->DrawBuffer; - for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { struct intel_renderbuffer *irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); @@ -1219,7 +1219,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) __DRIconfig **configs = NULL; /* Generate singlesample configs without accumulation buffer. */ - for (int i = 0; i < ARRAY_SIZE(formats); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) { __DRIconfig **new_configs; int num_depth_stencil_bits = 2; @@ -1256,7 +1256,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) /* Generate the minimum possible set of configs that include an * accumulation buffer. */ - for (int i = 0; i < ARRAY_SIZE(formats); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) { __DRIconfig **new_configs; if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) { @@ -1288,7 +1288,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) * supported. Singlebuffer configs are not supported because no one wants * them. */ - for (int i = 0; i < ARRAY_SIZE(formats); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) { if (devinfo->gen < 6) break; diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 1cdea93..e17b41c 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -866,8 +866,8 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->AtomicBufferBindings[i].BufferObject, ctx->Shared->NullBufferObj); - ctx->AtomicBufferBindings[i].Offset = -1; - ctx->AtomicBufferBindings[i].Size = -1; + ctx->AtomicBufferBindings[i].Offset = 0; + ctx->AtomicBufferBindings[i].Size = 0; } } diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 888c461..be542dd 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -402,10 +402,6 @@ one_time_init( struct gl_context *ctx ) PACKAGE_VERSION, __DATE__, __TIME__); } #endif - -#ifdef DEBUG - _mesa_test_formats(); -#endif } /* per-API one-time init */ diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index d934d19..4a3c231 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -50,6 +50,7 @@ enum { ES1 = 1 << API_OPENGLES, ES2 = 1 << API_OPENGLES2, ES3 = 1 << (API_OPENGL_LAST + 1), + ES31 = 1 << (API_OPENGL_LAST + 2), }; /** @@ -152,6 +153,7 @@ static const struct extension extension_table[] = { { "GL_ARB_shader_atomic_counters", o(ARB_shader_atomic_counters), GL, 2011 }, { "GL_ARB_shader_bit_encoding", o(ARB_shader_bit_encoding), GL, 2010 }, { "GL_ARB_shader_image_load_store", o(ARB_shader_image_load_store), GL, 2011 }, + { "GL_ARB_shader_image_size", o(ARB_shader_image_size), GL, 2012 }, { "GL_ARB_shader_objects", o(dummy_true), GL, 2002 }, { "GL_ARB_shader_precision", o(ARB_shader_precision), GL, 2010 }, { "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 }, @@ -773,6 +775,8 @@ _mesa_make_extension_string(struct gl_context *ctx) unsigned api_set = (1 << ctx->API); if (_mesa_is_gles3(ctx)) api_set |= ES3; + if (_mesa_is_gles31(ctx)) + api_set |= ES31; /* Check if the MESA_EXTENSION_MAX_YEAR env var is set */ { @@ -854,6 +858,8 @@ _mesa_get_extension_count(struct gl_context *ctx) unsigned api_set = (1 << ctx->API); if (_mesa_is_gles3(ctx)) api_set |= ES3; + if (_mesa_is_gles31(ctx)) + api_set |= ES31; /* only count once */ if (ctx->Extensions.Count != 0) @@ -880,6 +886,8 @@ _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index) unsigned api_set = (1 << ctx->API); if (_mesa_is_gles3(ctx)) api_set |= ES3; + if (_mesa_is_gles31(ctx)) + api_set |= ES31; base = (GLboolean*) &ctx->Extensions; n = 0; diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8418340..07db195 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2033,6 +2033,16 @@ renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, */ sample_count_error = _mesa_check_sample_count(ctx, GL_RENDERBUFFER, internalFormat, samples); + + /* Section 2.5 (GL Errors) of OpenGL 3.0 specification, page 16: + * + * "If a negative number is provided where an argument of type sizei or + * sizeiptr is specified, the error INVALID VALUE is generated." + */ + if (samples < 0) { + sample_count_error = GL_INVALID_VALUE; + } + if (sample_count_error != GL_NO_ERROR) { _mesa_error(ctx, sample_count_error, "%s(samples)", func); return; diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py index 3bae57e..839d407 100644 --- a/src/mesa/main/format_info.py +++ b/src/mesa/main/format_info.py @@ -98,14 +98,6 @@ def get_gl_data_type(fmat): else: assert False -def get_mesa_layout(fmat): - if fmat.layout == 'array': - return 'MESA_FORMAT_LAYOUT_ARRAY' - elif fmat.layout == 'packed': - return 'MESA_FORMAT_LAYOUT_PACKED' - else: - return 'MESA_FORMAT_LAYOUT_OTHER' - def get_channel_bits(fmat, chan_name): if fmat.is_compressed(): # These values are pretty-much bogus, but OpenGL requires that we @@ -179,7 +171,7 @@ for fmat in formats: print ' {' print ' {0},'.format(fmat.name) print ' "{0}",'.format(fmat.name) - print ' {0},'.format(get_mesa_layout(fmat)) + print ' {0},'.format('MESA_FORMAT_LAYOUT_' + fmat.layout.upper()) print ' {0},'.format(get_gl_base_format(fmat)) print ' {0},'.format(get_gl_data_type(fmat)) @@ -188,6 +180,8 @@ for fmat in formats: bits = [ get_channel_bits(fmat, name) for name in ['l', 'i', 'z', 's']] print ' {0},'.format(', '.join(map(str, bits))) + print ' {0:d},'.format(fmat.colorspace == 'srgb') + print ' {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height, int(fmat.block_size() / 8)) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 810bb16..5fdabd5 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -602,7 +602,7 @@ _mesa_format_to_array(mesa_format format, GLenum *type, int *num_components, *normalized = !_mesa_is_format_integer(format); - _mesa_format_to_type_and_comps(format, type, &format_components); + _mesa_uncompressed_format_to_type_and_comps(format, type, &format_components); switch (_mesa_get_format_layout(format)) { case MESA_FORMAT_LAYOUT_ARRAY: diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index d7b2bae..8dd07d8 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -65,6 +65,8 @@ struct gl_format_info GLubyte DepthBits; GLubyte StencilBits; + bool IsSRGBFormat; + /** * To describe compressed formats. If not compressed, Width=Height=1. */ @@ -81,6 +83,7 @@ static const struct gl_format_info * _mesa_get_format_info(mesa_format format) { const struct gl_format_info *info = &format_info[format]; + STATIC_ASSERT(ARRAY_SIZE(format_info) == MESA_FORMAT_COUNT); assert(info->Name == format); return info; } @@ -188,6 +191,12 @@ _mesa_get_format_max_bits(mesa_format format) * The return value will be one of: * MESA_FORMAT_LAYOUT_ARRAY * MESA_FORMAT_LAYOUT_PACKED + * MESA_FORMAT_LAYOUT_S3TC + * MESA_FORMAT_LAYOUT_RGTC + * MESA_FORMAT_LAYOUT_FXT1 + * MESA_FORMAT_LAYOUT_ETC1 + * MESA_FORMAT_LAYOUT_ETC2 + * MESA_FORMAT_LAYOUT_BPTC * MESA_FORMAT_LAYOUT_OTHER */ extern enum mesa_format_layout @@ -562,30 +571,8 @@ _mesa_is_format_color_format(mesa_format format) GLenum _mesa_get_format_color_encoding(mesa_format format) { - /* XXX this info should be encoded in gl_format_info */ - switch (format) { - case MESA_FORMAT_BGR_SRGB8: - case MESA_FORMAT_A8B8G8R8_SRGB: - case MESA_FORMAT_B8G8R8A8_SRGB: - case MESA_FORMAT_A8R8G8B8_SRGB: - case MESA_FORMAT_R8G8B8A8_SRGB: - case MESA_FORMAT_L_SRGB8: - case MESA_FORMAT_L8A8_SRGB: - case MESA_FORMAT_A8L8_SRGB: - case MESA_FORMAT_SRGB_DXT1: - case MESA_FORMAT_SRGBA_DXT1: - case MESA_FORMAT_SRGBA_DXT3: - case MESA_FORMAT_SRGBA_DXT5: - case MESA_FORMAT_R8G8B8X8_SRGB: - case MESA_FORMAT_ETC2_SRGB8: - case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: - case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: - case MESA_FORMAT_B8G8R8X8_SRGB: - case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: - return GL_SRGB; - default: - return GL_LINEAR; - } + const struct gl_format_info *info = _mesa_get_format_info(format); + return info->IsSRGBFormat ? GL_SRGB : GL_LINEAR; } @@ -878,124 +865,13 @@ _mesa_format_row_stride(mesa_format format, GLsizei width) } -/** - * Debug/test: check that all formats are handled in the - * _mesa_format_to_type_and_comps() function. When new pixel formats - * are added to Mesa, that function needs to be updated. - * This is a no-op after the first call. - */ -static void -check_format_to_type_and_comps(void) -{ - mesa_format f; - - for (f = MESA_FORMAT_NONE + 1; f < MESA_FORMAT_COUNT; f++) { - GLenum datatype = 0; - GLuint comps = 0; - /* This function will emit a problem/warning if the format is - * not handled. - */ - _mesa_format_to_type_and_comps(f, &datatype, &comps); - } -} /** - * Do sanity checking of the format info table. + * Return datatype and number of components per texel for the given + * uncompressed mesa_format. Only used for mipmap generation code. */ void -_mesa_test_formats(void) -{ - GLuint i; - - STATIC_ASSERT(ARRAY_SIZE(format_info) == MESA_FORMAT_COUNT); - - for (i = 0; i < MESA_FORMAT_COUNT; i++) { - const struct gl_format_info *info = _mesa_get_format_info(i); - assert(info); - - assert(info->Name == i); - - if (info->Name == MESA_FORMAT_NONE) - continue; - - if (info->BlockWidth == 1 && info->BlockHeight == 1) { - if (info->RedBits > 0) { - GLuint t = info->RedBits + info->GreenBits - + info->BlueBits + info->AlphaBits; - assert(t / 8 <= info->BytesPerBlock); - (void) t; - } - } - - assert(info->DataType == GL_UNSIGNED_NORMALIZED || - info->DataType == GL_SIGNED_NORMALIZED || - info->DataType == GL_UNSIGNED_INT || - info->DataType == GL_INT || - info->DataType == GL_FLOAT || - /* Z32_FLOAT_X24S8 has DataType of GL_NONE */ - info->DataType == GL_NONE); - - if (info->BaseFormat == GL_RGB) { - assert(info->RedBits > 0); - assert(info->GreenBits > 0); - assert(info->BlueBits > 0); - assert(info->AlphaBits == 0); - assert(info->LuminanceBits == 0); - assert(info->IntensityBits == 0); - } - else if (info->BaseFormat == GL_RGBA) { - assert(info->RedBits > 0); - assert(info->GreenBits > 0); - assert(info->BlueBits > 0); - assert(info->AlphaBits > 0); - assert(info->LuminanceBits == 0); - assert(info->IntensityBits == 0); - } - else if (info->BaseFormat == GL_RG) { - assert(info->RedBits > 0); - assert(info->GreenBits > 0); - assert(info->BlueBits == 0); - assert(info->AlphaBits == 0); - assert(info->LuminanceBits == 0); - assert(info->IntensityBits == 0); - } - else if (info->BaseFormat == GL_RED) { - assert(info->RedBits > 0); - assert(info->GreenBits == 0); - assert(info->BlueBits == 0); - assert(info->AlphaBits == 0); - assert(info->LuminanceBits == 0); - assert(info->IntensityBits == 0); - } - else if (info->BaseFormat == GL_LUMINANCE) { - assert(info->RedBits == 0); - assert(info->GreenBits == 0); - assert(info->BlueBits == 0); - assert(info->AlphaBits == 0); - assert(info->LuminanceBits > 0); - assert(info->IntensityBits == 0); - } - else if (info->BaseFormat == GL_INTENSITY) { - assert(info->RedBits == 0); - assert(info->GreenBits == 0); - assert(info->BlueBits == 0); - assert(info->AlphaBits == 0); - assert(info->LuminanceBits == 0); - assert(info->IntensityBits > 0); - } - } - - check_format_to_type_and_comps(); -} - - - -/** - * Return datatype and number of components per texel for the given mesa_format. - * Only used for mipmap generation code. - */ -void -_mesa_format_to_type_and_comps(mesa_format format, +_mesa_uncompressed_format_to_type_and_comps(mesa_format format, GLenum *datatype, GLuint *comps) { switch (format) { @@ -1229,44 +1105,6 @@ _mesa_format_to_type_and_comps(mesa_format format, *comps = 2; return; - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - case MESA_FORMAT_SRGB_DXT1: - case MESA_FORMAT_SRGBA_DXT1: - case MESA_FORMAT_SRGBA_DXT3: - case MESA_FORMAT_SRGBA_DXT5: - case MESA_FORMAT_R_RGTC1_UNORM: - case MESA_FORMAT_R_RGTC1_SNORM: - case MESA_FORMAT_RG_RGTC2_UNORM: - case MESA_FORMAT_RG_RGTC2_SNORM: - case MESA_FORMAT_L_LATC1_UNORM: - case MESA_FORMAT_L_LATC1_SNORM: - case MESA_FORMAT_LA_LATC2_UNORM: - case MESA_FORMAT_LA_LATC2_SNORM: - case MESA_FORMAT_ETC1_RGB8: - case MESA_FORMAT_ETC2_RGB8: - case MESA_FORMAT_ETC2_SRGB8: - case MESA_FORMAT_ETC2_RGBA8_EAC: - case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: - case MESA_FORMAT_ETC2_R11_EAC: - case MESA_FORMAT_ETC2_RG11_EAC: - case MESA_FORMAT_ETC2_SIGNED_R11_EAC: - case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: - case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: - case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: - case MESA_FORMAT_BPTC_RGBA_UNORM: - case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: - case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT: - case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT: - /* XXX generate error instead? */ - *datatype = GL_UNSIGNED_BYTE; - *comps = 0; - return; - case MESA_FORMAT_RGBA_FLOAT32: *datatype = GL_FLOAT; *comps = 4; @@ -1564,14 +1402,12 @@ _mesa_format_to_type_and_comps(mesa_format format, case MESA_FORMAT_COUNT: assert(0); return; - - case MESA_FORMAT_NONE: - /* For debug builds, warn if any formats are not handled */ -#ifdef DEBUG default: -#endif - _mesa_problem(NULL, "bad format %s in _mesa_format_to_type_and_comps", + /* Warn if any formats are not handled */ + _mesa_problem(NULL, "bad format %s in _mesa_uncompressed_format_to_type_and_comps", _mesa_get_format_name(format)); + assert(format == MESA_FORMAT_NONE || + _mesa_is_format_compressed(format)); *datatype = 0; *comps = 1; } @@ -1584,20 +1420,26 @@ _mesa_format_to_type_and_comps(mesa_format format, * \param format the user-specified image format * \param type the user-specified image datatype * \param swapBytes typically the current pixel pack/unpack byteswap state + * \param[out] error GL_NO_ERROR if format is an expected input. + * GL_INVALID_ENUM if format is an unexpected input. * \return GL_TRUE if the formats match, GL_FALSE otherwise. */ GLboolean _mesa_format_matches_format_and_type(mesa_format mesa_format, GLenum format, GLenum type, - GLboolean swapBytes) + GLboolean swapBytes, GLenum *error) { const GLboolean littleEndian = _mesa_little_endian(); + if (error) + *error = GL_NO_ERROR; /* Note: When reading a GL format/type combination, the format lists channel * assignments from most significant channel in the type to least * significant. A type with _REV indicates that the assignments are * swapped, so they are listed from least significant to most significant. * + * Compressed formats will fall through and return GL_FALSE. + * * For sanity, please keep this switch statement ordered the same as the * enums in formats.h. */ @@ -1858,26 +1700,6 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format, case MESA_FORMAT_S_UINT8: return format == GL_STENCIL_INDEX && type == GL_UNSIGNED_BYTE; - case MESA_FORMAT_SRGB_DXT1: - case MESA_FORMAT_SRGBA_DXT1: - case MESA_FORMAT_SRGBA_DXT3: - case MESA_FORMAT_SRGBA_DXT5: - return GL_FALSE; - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - return GL_FALSE; - - case MESA_FORMAT_BPTC_RGBA_UNORM: - case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: - case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT: - case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT: - return GL_FALSE; - case MESA_FORMAT_RGBA_FLOAT32: return format == GL_RGBA && type == GL_FLOAT && !swapBytes; case MESA_FORMAT_RGBA_FLOAT16: @@ -2074,31 +1896,6 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format, return format == GL_RGBA && type == GL_UNSIGNED_SHORT && !swapBytes; - case MESA_FORMAT_R_RGTC1_UNORM: - case MESA_FORMAT_R_RGTC1_SNORM: - case MESA_FORMAT_RG_RGTC2_UNORM: - case MESA_FORMAT_RG_RGTC2_SNORM: - return GL_FALSE; - - case MESA_FORMAT_L_LATC1_UNORM: - case MESA_FORMAT_L_LATC1_SNORM: - case MESA_FORMAT_LA_LATC2_UNORM: - case MESA_FORMAT_LA_LATC2_SNORM: - return GL_FALSE; - - case MESA_FORMAT_ETC1_RGB8: - case MESA_FORMAT_ETC2_RGB8: - case MESA_FORMAT_ETC2_SRGB8: - case MESA_FORMAT_ETC2_RGBA8_EAC: - case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: - case MESA_FORMAT_ETC2_R11_EAC: - case MESA_FORMAT_ETC2_RG11_EAC: - case MESA_FORMAT_ETC2_SIGNED_R11_EAC: - case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: - case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: - case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: - return GL_FALSE; - case MESA_FORMAT_A_SNORM8: return format == GL_ALPHA && type == GL_BYTE; case MESA_FORMAT_L_SNORM8: @@ -2181,8 +1978,11 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format, case MESA_FORMAT_B8G8R8X8_SRGB: case MESA_FORMAT_X8R8G8B8_SRGB: return GL_FALSE; + default: + assert(_mesa_is_format_compressed(format)); + if (error) + *error = GL_INVALID_ENUM; } - return GL_FALSE; } diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index d938e6a..4936fa0 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -64,6 +64,12 @@ extern "C" { enum mesa_format_layout { MESA_FORMAT_LAYOUT_ARRAY, MESA_FORMAT_LAYOUT_PACKED, + MESA_FORMAT_LAYOUT_S3TC, + MESA_FORMAT_LAYOUT_RGTC, + MESA_FORMAT_LAYOUT_FXT1, + MESA_FORMAT_LAYOUT_ETC1, + MESA_FORMAT_LAYOUT_ETC2, + MESA_FORMAT_LAYOUT_BPTC, MESA_FORMAT_LAYOUT_OTHER, }; @@ -659,7 +665,7 @@ extern GLint _mesa_format_row_stride(mesa_format format, GLsizei width); extern void -_mesa_format_to_type_and_comps(mesa_format format, +_mesa_uncompressed_format_to_type_and_comps(mesa_format format, GLenum *datatype, GLuint *comps); extern void @@ -680,7 +686,7 @@ _mesa_format_has_color_component(mesa_format format, int component); GLboolean _mesa_format_matches_format_and_type(mesa_format mesa_format, GLenum format, GLenum type, - GLboolean swapBytes); + GLboolean swapBytes, GLenum *error); #ifdef __cplusplus } diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 7dc92f1..517c391 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -806,7 +806,7 @@ descriptor=[ [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ], # GL_ARB_shader_image_load_store - [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store" ], + [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ], [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ], [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"], diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 1e22f93..2bf5902 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1886,7 +1886,7 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target, GLenum datatype; GLuint comps; - _mesa_format_to_type_and_comps(srcImage->TexFormat, &datatype, &comps); + _mesa_uncompressed_format_to_type_and_comps(srcImage->TexFormat, &datatype, &comps); for (level = texObj->BaseLevel; level < maxLevel; level++) { /* generate image[level+1] from image[level] */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 83f3717..4883cbc 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -94,7 +94,10 @@ struct vbo_context; #define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1) #define PRIM_UNKNOWN (PRIM_MAX + 2) - +#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING) +#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX) +#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING) +#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) /** * Determine if the given gl_varying_slot appears in the fragment shader. @@ -117,7 +120,6 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot) } } - /** * Indexes for all renderbuffers */ @@ -3571,7 +3573,7 @@ struct gl_constants /* GL_ARB_shader_image_load_store */ GLuint MaxImageUnits; - GLuint MaxCombinedImageUnitsAndFragmentOutputs; + GLuint MaxCombinedShaderOutputResources; GLuint MaxImageSamples; GLuint MaxCombinedImageUniforms; @@ -3656,6 +3658,7 @@ struct gl_extensions GLboolean ARB_shader_atomic_counters; GLboolean ARB_shader_bit_encoding; GLboolean ARB_shader_image_load_store; + GLboolean ARB_shader_image_size; GLboolean ARB_shader_precision; GLboolean ARB_shader_stencil_export; GLboolean ARB_shader_storage_buffer_object; @@ -4073,10 +4076,16 @@ struct gl_image_unit GLboolean _Valid; /** + * Layer of the texture object bound to this unit as specified by the + * application. + */ + GLuint Layer; + + /** * Layer of the texture object bound to this unit, or zero if the * whole level is bound. */ - GLuint Layer; + GLuint _Layer; /** * Access allowed to this texture image. Either \c GL_READ_ONLY, diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index 09e6154..e7783ea 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -150,15 +150,6 @@ GLenum _mesa_check_sample_count(struct gl_context *ctx, GLenum target, GLenum internalFormat, GLsizei samples) { - /* Section 2.5 (GL Errors) of OpenGL 3.0 specification, page 16: - * - * "If a negative number is provided where an argument of type sizei or - * sizeiptr is specified, the error INVALID VALUE is generated." - */ - if (samples < 0) { - return GL_INVALID_VALUE; - } - /* Section 4.4 (Framebuffer objects), page 198 of the OpenGL ES 3.0.0 * specification says: * diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index d826ecf..1277944 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -201,7 +201,7 @@ readpixels_can_use_memcpy(const struct gl_context *ctx, GLenum format, GLenum ty /* The Mesa format must match the input format and type. */ if (!_mesa_format_matches_format_and_type(rb->Format, format, type, - packing->SwapBytes)) { + packing->SwapBytes, NULL)) { return GL_FALSE; } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index f9a7d13..b227c17 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1995,55 +1995,6 @@ _mesa_use_shader_program(struct gl_context *ctx, GLenum type, } -static GLuint -_mesa_create_shader_program(struct gl_context* ctx, GLboolean separate, - GLenum type, GLsizei count, - const GLchar* const *strings) -{ - const GLuint shader = create_shader(ctx, type); - GLuint program = 0; - - if (shader) { - _mesa_ShaderSource(shader, count, strings, NULL); - - compile_shader(ctx, shader); - - program = create_shader_program(ctx); - if (program) { - struct gl_shader_program *shProg; - struct gl_shader *sh; - GLint compiled = GL_FALSE; - - shProg = _mesa_lookup_shader_program(ctx, program); - sh = _mesa_lookup_shader(ctx, shader); - - shProg->SeparateShader = separate; - - get_shaderiv(ctx, shader, GL_COMPILE_STATUS, &compiled); - if (compiled) { - attach_shader(ctx, program, shader); - link_program(ctx, program); - detach_shader(ctx, program, shader); - -#if 0 - /* Possibly... */ - if (active-user-defined-varyings-in-linked-program) { - append-error-to-info-log; - shProg->LinkStatus = GL_FALSE; - } -#endif - } - if (sh->InfoLog) - ralloc_strcat(&shProg->InfoLog, sh->InfoLog); - } - - delete_shader(ctx, shader); - } - - return program; -} - - /** * Copy program-specific data generated by linking from the gl_shader_program * object to a specific gl_program object. @@ -2111,7 +2062,56 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, { GET_CURRENT_CONTEXT(ctx); - return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings); + const GLuint shader = create_shader(ctx, type); + GLuint program = 0; + + /* + * According to OpenGL 4.5 and OpenGL ES 3.1 standards, section 7.3: + * GL_INVALID_VALUE should be generated if count < 0 + */ + if (count < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "glCreateShaderProgram (count < 0)"); + return program; + } + + if (shader) { + _mesa_ShaderSource(shader, count, strings, NULL); + + compile_shader(ctx, shader); + + program = create_shader_program(ctx); + if (program) { + struct gl_shader_program *shProg; + struct gl_shader *sh; + GLint compiled = GL_FALSE; + + shProg = _mesa_lookup_shader_program(ctx, program); + sh = _mesa_lookup_shader(ctx, shader); + + shProg->SeparateShader = GL_TRUE; + + get_shaderiv(ctx, shader, GL_COMPILE_STATUS, &compiled); + if (compiled) { + attach_shader(ctx, program, shader); + link_program(ctx, program); + detach_shader(ctx, program, shader); + +#if 0 + /* Possibly... */ + if (active-user-defined-varyings-in-linked-program) { + append-error-to-info-log; + shProg->LinkStatus = GL_FALSE; + } +#endif + } + if (sh->InfoLog) + ralloc_strcat(&shProg->InfoLog, sh->InfoLog); + } + + delete_shader(ctx, shader); + } + + return program; } diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index a348cdb..c4bba84 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -331,17 +331,88 @@ get_image_format_class(mesa_format format) } } +/** + * Return whether an image format should be supported based on the current API + * version of the context. + */ +static bool +is_image_format_supported(const struct gl_context *ctx, GLenum format) +{ + switch (format) { + /* Formats supported on both desktop and ES GL, c.f. table 8.27 of the + * OpenGL ES 3.1 specification. + */ + case GL_RGBA32F: + case GL_RGBA16F: + case GL_R32F: + case GL_RGBA32UI: + case GL_RGBA16UI: + case GL_RGBA8UI: + case GL_R32UI: + case GL_RGBA32I: + case GL_RGBA16I: + case GL_RGBA8I: + case GL_R32I: + case GL_RGBA8: + case GL_RGBA8_SNORM: + return true; + + /* Formats supported on unextended desktop GL and the original + * ARB_shader_image_load_store extension, c.f. table 3.21 of the OpenGL 4.2 + * specification. + */ + case GL_RG32F: + case GL_RG16F: + case GL_R11F_G11F_B10F: + case GL_R16F: + case GL_RGB10_A2UI: + case GL_RG32UI: + case GL_RG16UI: + case GL_RG8UI: + case GL_R16UI: + case GL_R8UI: + case GL_RG32I: + case GL_RG16I: + case GL_RG8I: + case GL_R16I: + case GL_R8I: + case GL_RGBA16: + case GL_RGB10_A2: + case GL_RG16: + case GL_RG8: + case GL_R16: + case GL_R8: + case GL_RGBA16_SNORM: + case GL_RG16_SNORM: + case GL_RG8_SNORM: + case GL_R16_SNORM: + case GL_R8_SNORM: + return _mesa_is_desktop_gl(ctx); + + default: + return false; + } +} + +struct gl_image_unit +_mesa_default_image_unit(struct gl_context *ctx) +{ + const GLenum format = _mesa_is_desktop_gl(ctx) ? GL_R8 : GL_R32UI; + const struct gl_image_unit u = { + .Access = GL_READ_ONLY, + .Format = format, + ._ActualFormat = _mesa_get_shader_image_format(format) + }; + return u; +} + void _mesa_init_image_units(struct gl_context *ctx) { unsigned i; - for (i = 0; i < ARRAY_SIZE(ctx->ImageUnits); ++i) { - struct gl_image_unit *u = &ctx->ImageUnits[i]; - u->Access = GL_READ_ONLY; - u->Format = GL_R8; - u->_ActualFormat = _mesa_get_shader_image_format(u->Format); - } + for (i = 0; i < ARRAY_SIZE(ctx->ImageUnits); ++i) + ctx->ImageUnits[i] = _mesa_default_image_unit(ctx); } static GLboolean @@ -362,7 +433,7 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u) return GL_FALSE; if (_mesa_tex_target_is_layered(t->Target) && - u->Layer >= _mesa_get_texture_layers(t, u->Level)) + u->_Layer >= _mesa_get_texture_layers(t, u->Level)) return GL_FALSE; if (t->Target == GL_TEXTURE_BUFFER) { @@ -370,7 +441,7 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u) } else { struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ? - t->Image[u->Layer][u->Level] : + t->Image[u->_Layer][u->Level] : t->Image[0][u->Level]); if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples) @@ -442,7 +513,7 @@ validate_bind_image_texture(struct gl_context *ctx, GLuint unit, return GL_FALSE; } - if (!_mesa_get_shader_image_format(format)) { + if (!is_image_format_supported(ctx, format)) { _mesa_error(ctx, GL_INVALID_VALUE, "glBindImageTexture(format)"); return GL_FALSE; } @@ -475,6 +546,18 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, return; } + /* From section 8.22 "Texture Image Loads and Stores" of the OpenGL ES + * 3.1 spec: + * + * "An INVALID_OPERATION error is generated if texture is not the name + * of an immutable texture object." + */ + if (_mesa_is_gles(ctx) && !t->Immutable) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBindImageTexture(!immutable)"); + return; + } + _mesa_reference_texobj(&u->TexObj, t); } else { _mesa_reference_texobj(&u->TexObj, NULL); @@ -488,7 +571,8 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) { u->Layered = layered; - u->Layer = (layered ? 0 : layer); + u->Layer = layer; + u->_Layer = (u->Layered ? 0 : u->Layer); } else { u->Layered = GL_FALSE; u->Layer = 0; @@ -599,7 +683,7 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) tex_format = image->InternalFormat; } - if (_mesa_get_shader_image_format(tex_format) == MESA_FORMAT_NONE) { + if (!is_image_format_supported(ctx, tex_format)) { /* The ARB_multi_bind spec says: * * "An INVALID_OPERATION error is generated if the internal @@ -619,7 +703,7 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) _mesa_reference_texobj(&u->TexObj, texObj); u->Level = 0; u->Layered = _mesa_tex_target_is_layered(texObj->Target); - u->Layer = 0; + u->_Layer = u->Layer = 0; u->Access = GL_READ_WRITE; u->Format = tex_format; u->_ActualFormat = _mesa_get_shader_image_format(tex_format); @@ -629,7 +713,7 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) _mesa_reference_texobj(&u->TexObj, NULL); u->Level = 0; u->Layered = GL_FALSE; - u->Layer = 0; + u->_Layer = u->Layer = 0; u->Access = GL_READ_ONLY; u->Format = GL_R8; u->_ActualFormat = MESA_FORMAT_R_UNORM8; @@ -653,3 +737,43 @@ _mesa_MemoryBarrier(GLbitfield barriers) if (ctx->Driver.MemoryBarrier) ctx->Driver.MemoryBarrier(ctx, barriers); } + +void GLAPIENTRY +_mesa_MemoryBarrierByRegion(GLbitfield barriers) +{ + GET_CURRENT_CONTEXT(ctx); + + GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT | + GL_FRAMEBUFFER_BARRIER_BIT | + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | + GL_SHADER_STORAGE_BARRIER_BIT | + GL_TEXTURE_FETCH_BARRIER_BIT | + GL_UNIFORM_BARRIER_BIT; + + if (ctx->Driver.MemoryBarrier) { + /* From section 7.11.2 of the OpenGL ES 3.1 specification: + * + * "When barriers is ALL_BARRIER_BITS, shader memory accesses will be + * synchronized relative to all these barrier bits, but not to other + * barrier bits specific to MemoryBarrier." + * + * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then all + * barriers allowed by glMemoryBarrierByRegion should be activated." + */ + if (barriers == GL_ALL_BARRIER_BITS) + return ctx->Driver.MemoryBarrier(ctx, all_allowed_bits); + + /* From section 7.11.2 of the OpenGL ES 3.1 specification: + * + * "An INVALID_VALUE error is generated if barriers is not the special + * value ALL_BARRIER_BITS, and has any bits set other than those + * described above." + */ + if ((barriers & ~all_allowed_bits) != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glMemoryBarrierByRegion(unsupported barrier bit"); + } + + ctx->Driver.MemoryBarrier(ctx, barriers); + } +} diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h index 33d8a1e..bbe088a 100644 --- a/src/mesa/main/shaderimage.h +++ b/src/mesa/main/shaderimage.h @@ -43,6 +43,12 @@ mesa_format _mesa_get_shader_image_format(GLenum format); /** + * Get a single image unit struct with the default state. + */ +struct gl_image_unit +_mesa_default_image_unit(struct gl_context *ctx); + +/** * Initialize a context's shader image units to the default state. */ void @@ -68,6 +74,9 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures); void GLAPIENTRY _mesa_MemoryBarrier(GLbitfield barriers); +void GLAPIENTRY +_mesa_MemoryBarrierByRegion(GLbitfield barriers); + #ifdef __cplusplus } #endif diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am index 251474d..9467f3b 100644 --- a/src/mesa/main/tests/Makefile.am +++ b/src/mesa/main/tests/Makefile.am @@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI main_test_SOURCES += \ dispatch_sanity.cpp \ + mesa_formats.cpp \ program_state_string.cpp main_test_LDADD += \ diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index af89d2c..59107eb 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -851,6 +851,9 @@ const struct function common_desktop_functions_possible[] = { // { "glTextureStorage2DMultisampleEXT", 43, -1 }, // XXX: Add to xml // { "glTextureStorage3DMultisampleEXT", 43, -1 }, // XXX: Add to xml +/* GL 4.5 */ + { "glMemoryBarrierByRegion", 45, -1 }, + /* GL_ARB_internalformat_query */ { "glGetInternalformativ", 30, -1 }, @@ -1739,6 +1742,9 @@ const struct function gl_core_functions_possible[] = { // { "glTextureStorage2DMultisampleEXT", 43, -1 }, // XXX: Add to xml // { "glTextureStorage3DMultisampleEXT", 43, -1 }, // XXX: Add to xml +/* GL 4.5 */ + { "glMemoryBarrierByRegion", 45, -1 }, + /* GL_ARB_direct_state_access */ { "glCreateTransformFeedbacks", 45, -1 }, { "glTransformFeedbackBufferBase", 45, -1 }, @@ -2461,8 +2467,7 @@ const struct function gles31_functions_possible[] = { { "glGetBooleani_v", 31, -1 }, { "glMemoryBarrier", 31, -1 }, - // FINISHME: This function has not been implemented yet. - // { "glMemoryBarrierByRegion", 31, -1 }, + { "glMemoryBarrierByRegion", 31, -1 }, { "glTexStorage2DMultisample", 31, -1 }, { "glGetMultisamplefv", 31, -1 }, diff --git a/src/mesa/main/tests/mesa_formats.cpp b/src/mesa/main/tests/mesa_formats.cpp new file mode 100644 index 0000000..5356cd9 --- /dev/null +++ b/src/mesa/main/tests/mesa_formats.cpp @@ -0,0 +1,139 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \name mesa_formats.cpp + * + * Verify that all mesa formats are handled in certain functions and that + * the format info table is sane. + * + */ + +#include <gtest/gtest.h> + +#include "main/formats.h" +#include "main/glformats.h" + +/** + * Debug/test: check that all uncompressed formats are handled in the + * _mesa_uncompressed_format_to_type_and_comps() function. When new pixel + * formats are added to Mesa, that function needs to be updated. + */ +TEST(MesaFormatsTest, FormatTypeAndComps) +{ + for (int fi = MESA_FORMAT_NONE + 1; fi < MESA_FORMAT_COUNT; ++fi) { + mesa_format f = (mesa_format) fi; + SCOPED_TRACE(_mesa_get_format_name(f)); + + /* This function will emit a problem/warning if the format is + * not handled. + */ + if (!_mesa_is_format_compressed(f)) { + GLenum datatype = 0; + GLenum error = 0; + GLuint comps = 0; + + /* If the datatype is zero, the format was not handled */ + _mesa_uncompressed_format_to_type_and_comps(f, &datatype, &comps); + EXPECT_NE(datatype, (GLenum)0); + + /* If the error isn't NO_ERROR, the format was not handled. + * Use an arbitrary GLenum format. */ + _mesa_format_matches_format_and_type(f, GL_RG, datatype, + GL_FALSE, &error); + EXPECT_EQ((GLenum)GL_NO_ERROR, error); + } + + } +} + +/** + * Do sanity checking of the format info table. + */ +TEST(MesaFormatsTest, FormatSanity) +{ + for (int fi = 0; fi < MESA_FORMAT_COUNT; ++fi) { + mesa_format f = (mesa_format) fi; + SCOPED_TRACE(_mesa_get_format_name(f)); + GLenum datatype = _mesa_get_format_datatype(f); + GLint r = _mesa_get_format_bits(f, GL_RED_BITS); + GLint g = _mesa_get_format_bits(f, GL_GREEN_BITS); + GLint b = _mesa_get_format_bits(f, GL_BLUE_BITS); + GLint a = _mesa_get_format_bits(f, GL_ALPHA_BITS); + GLint l = _mesa_get_format_bits(f, GL_TEXTURE_LUMINANCE_SIZE); + GLint i = _mesa_get_format_bits(f, GL_TEXTURE_INTENSITY_SIZE); + + /* Note: Z32_FLOAT_X24S8 has datatype of GL_NONE */ + EXPECT_TRUE(datatype == GL_NONE || + datatype == GL_UNSIGNED_NORMALIZED || + datatype == GL_SIGNED_NORMALIZED || + datatype == GL_UNSIGNED_INT || + datatype == GL_INT || + datatype == GL_FLOAT); + + if (r > 0 && !_mesa_is_format_compressed(f)) { + GLint bytes = _mesa_get_format_bytes(f); + EXPECT_LE((r+g+b+a) / 8, bytes); + } + + /* Determines if the base format has a channel [rgba] or property [li]. + * > indicates existance + * == indicates non-existance + */ + #define HAS_PROP(rop,gop,bop,aop,lop,iop) \ + do { \ + EXPECT_TRUE(r rop 0); \ + EXPECT_TRUE(g gop 0); \ + EXPECT_TRUE(b bop 0); \ + EXPECT_TRUE(a aop 0); \ + EXPECT_TRUE(l lop 0); \ + EXPECT_TRUE(i iop 0); \ + } while(0) + + switch (_mesa_get_format_base_format(f)) { + case GL_RGBA: + HAS_PROP(>,>,>,>,==,==); + break; + case GL_RGB: + HAS_PROP(>,>,>,==,==,==); + break; + case GL_RG: + HAS_PROP(>,>,==,==,==,==); + break; + case GL_RED: + HAS_PROP(>,==,==,==,==,==); + break; + case GL_LUMINANCE: + HAS_PROP(==,==,==,==,>,==); + break; + case GL_INTENSITY: + HAS_PROP(==,==,==,==,==,>); + break; + default: + break; + } + + #undef HAS_PROP + + } +} diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 0fd1a36..edfb036 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -586,34 +586,16 @@ _mesa_compressed_image_address(GLint col, GLint row, GLint img, compressed_fetch_func _mesa_get_compressed_fetch_func(mesa_format format) { - switch (format) { - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - case MESA_FORMAT_SRGB_DXT1: - case MESA_FORMAT_SRGBA_DXT1: - case MESA_FORMAT_SRGBA_DXT3: - case MESA_FORMAT_SRGBA_DXT5: + switch (_mesa_get_format_layout(format)) { + case MESA_FORMAT_LAYOUT_S3TC: return _mesa_get_dxt_fetch_func(format); - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_LAYOUT_FXT1: return _mesa_get_fxt_fetch_func(format); - case MESA_FORMAT_R_RGTC1_UNORM: - case MESA_FORMAT_L_LATC1_UNORM: - case MESA_FORMAT_R_RGTC1_SNORM: - case MESA_FORMAT_L_LATC1_SNORM: - case MESA_FORMAT_RG_RGTC2_UNORM: - case MESA_FORMAT_LA_LATC2_UNORM: - case MESA_FORMAT_RG_RGTC2_SNORM: - case MESA_FORMAT_LA_LATC2_SNORM: + case MESA_FORMAT_LAYOUT_RGTC: return _mesa_get_compressed_rgtc_func(format); - case MESA_FORMAT_ETC1_RGB8: + case MESA_FORMAT_LAYOUT_ETC1: return _mesa_get_etc_fetch_func(format); - case MESA_FORMAT_BPTC_RGBA_UNORM: - case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: - case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT: - case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT: + case MESA_FORMAT_LAYOUT_BPTC: return _mesa_get_bptc_fetch_func(format); default: return NULL; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index c0ccce3..3c1e166 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -651,7 +651,7 @@ get_tex_memcpy(struct gl_context *ctx, texBaseFormat == texImage->_BaseFormat) { memCopy = _mesa_format_matches_format_and_type(texImage->TexFormat, format, type, - ctx->Pack.SwapBytes); + ctx->Pack.SwapBytes, NULL); } if (depth > 1) { diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 3a556a6..274ecad 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1785,18 +1785,36 @@ compressedteximage_only_format(const struct gl_context *ctx, GLenum format) } +/* Writes to an GL error pointer if non-null and returns whether or not the + * error is GL_NO_ERROR */ +static bool +write_error(GLenum *err_ptr, GLenum error) +{ + if (err_ptr) + *err_ptr = error; + + return error == GL_NO_ERROR; +} + /** * Helper function to determine whether a target and specific compression - * format are supported. + * format are supported. The error parameter returns GL_NO_ERROR if the + * target can be compressed. Otherwise it returns either GL_INVALID_OPERATION + * or GL_INVALID_ENUM, whichever is more appropriate. */ GLboolean _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, - GLenum intFormat) + GLenum intFormat, GLenum *error) { + GLboolean target_can_be_compresed = GL_FALSE; + mesa_format format = _mesa_glenum_to_compressed_format(intFormat); + enum mesa_format_layout layout = _mesa_get_format_layout(format); + switch (target) { case GL_TEXTURE_2D: case GL_PROXY_TEXTURE_2D: - return GL_TRUE; /* true for any compressed format so far */ + target_can_be_compresed = GL_TRUE; /* true for any compressed format so far */ + break; case GL_PROXY_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: @@ -1805,26 +1823,46 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - return ctx->Extensions.ARB_texture_cube_map; + target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map; + break; case GL_PROXY_TEXTURE_2D_ARRAY_EXT: case GL_TEXTURE_2D_ARRAY_EXT: - return ctx->Extensions.EXT_texture_array; + target_can_be_compresed = ctx->Extensions.EXT_texture_array; + break; case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP_ARRAY: - return ctx->Extensions.ARB_texture_cube_map_array; + /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: + * + * "The ETC2/EAC texture compression algorithm supports only + * two-dimensional images. If internalformat is an ETC2/EAC format, + * glCompressedTexImage3D will generate an INVALID_OPERATION error if + * target is not TEXTURE_2D_ARRAY." + * + * This should also be applicable for glTexStorage3D(). Other available + * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY. + */ + if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) + return write_error(error, GL_INVALID_OPERATION); + + target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; + break; case GL_TEXTURE_3D: - switch (intFormat) { - case GL_COMPRESSED_RGBA_BPTC_UNORM: - case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: - case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: - case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: - return ctx->Extensions.ARB_texture_compression_bptc; - default: - return GL_FALSE; + + /* See ETC2/EAC comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY. */ + if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) + return write_error(error, GL_INVALID_OPERATION); + + if (layout == MESA_FORMAT_LAYOUT_BPTC) { + target_can_be_compresed = ctx->Extensions.ARB_texture_compression_bptc; + break; } + + break; default: - return GL_FALSE; + break; } + return write_error(error, + target_can_be_compresed ? GL_NO_ERROR : GL_INVALID_ENUM); } @@ -2284,8 +2322,9 @@ texture_error_check( struct gl_context *ctx, /* additional checks for compressed textures */ if (_mesa_is_compressed_format(ctx, internalFormat)) { - if (!_mesa_target_can_be_compressed(ctx, target, internalFormat)) { - _mesa_error(ctx, GL_INVALID_ENUM, + GLenum err; + if (!_mesa_target_can_be_compressed(ctx, target, internalFormat, &err)) { + _mesa_error(ctx, err, "glTexImage%dD(target can't be compressed)", dimensions); return GL_TRUE; } @@ -2340,16 +2379,8 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions, GLenum error = GL_NO_ERROR; char *reason = ""; /* no error */ - if (!_mesa_target_can_be_compressed(ctx, target, internalFormat)) { + if (!_mesa_target_can_be_compressed(ctx, target, internalFormat, &error)) { reason = "target"; - /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: - * - * "The ETC2/EAC texture compression algorithm supports only - * two-dimensional images. If internalformat is an ETC2/EAC format, - * CompressedTexImage3D will generate an INVALID_OPERATION error if - * target is not TEXTURE_2D_ARRAY." - */ - error = _mesa_is_desktop_gl(ctx) ? GL_INVALID_ENUM : GL_INVALID_OPERATION; goto error; } @@ -2813,9 +2844,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_compressed_format(ctx, internalFormat)) { - if (!_mesa_target_can_be_compressed(ctx, target, internalFormat)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glCopyTexImage%dD(target)", dimensions); + GLenum err; + if (!_mesa_target_can_be_compressed(ctx, target, internalFormat, &err)) { + _mesa_error(ctx, err, + "glCopyTexImage%dD(target can't be compressed)", dimensions); return GL_TRUE; } if (compressedteximage_only_format(ctx, internalFormat)) { @@ -5569,10 +5601,13 @@ static GLboolean is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat) { /* Everything that is allowed for renderbuffers, - * except for a base format of GL_STENCIL_INDEX. + * except for a base format of GL_STENCIL_INDEX, unless supported. */ GLenum baseFormat = _mesa_base_fbo_format(ctx, internalformat); - return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX; + if (ctx->Extensions.ARB_texture_stencil8) + return baseFormat != 0; + else + return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX; } @@ -5596,13 +5631,13 @@ check_multisample_target(GLuint dims, GLenum target, bool dsa) static void -_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, - struct gl_texture_object *texObj, - GLenum target, GLsizei samples, - GLint internalformat, GLsizei width, - GLsizei height, GLsizei depth, - GLboolean fixedsamplelocations, - GLboolean immutable, const char *func) +texture_image_multisample(struct gl_context *ctx, GLuint dims, + struct gl_texture_object *texObj, + GLenum target, GLsizei samples, + GLint internalformat, GLsizei width, + GLsizei height, GLsizei depth, + GLboolean fixedsamplelocations, + GLboolean immutable, const char *func) { struct gl_texture_image *texImage; GLboolean sizeOK, dimensionsOK, samplesOK; @@ -5616,6 +5651,11 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, return; } + if (samples < 1) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(samples < 1)", func); + return; + } + if (!check_multisample_target(dims, target, dsa)) { if (dsa) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(target)", func); @@ -5763,10 +5803,10 @@ _mesa_TexImage2DMultisample(GLenum target, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 2, texObj, target, samples, - internalformat, width, height, 1, - fixedsamplelocations, GL_FALSE, - "glTexImage2DMultisample"); + texture_image_multisample(ctx, 2, texObj, target, samples, + internalformat, width, height, 1, + fixedsamplelocations, GL_FALSE, + "glTexImage2DMultisample"); } @@ -5783,12 +5823,26 @@ _mesa_TexImage3DMultisample(GLenum target, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 3, texObj, target, samples, - internalformat, width, height, depth, - fixedsamplelocations, GL_FALSE, - "glTexImage3DMultisample"); + texture_image_multisample(ctx, 3, texObj, target, samples, + internalformat, width, height, depth, + fixedsamplelocations, GL_FALSE, + "glTexImage3DMultisample"); } +static bool +valid_texstorage_ms_parameters(GLsizei width, GLsizei height, GLsizei depth, + GLsizei samples, unsigned dims) +{ + GET_CURRENT_CONTEXT(ctx); + + if (!_mesa_valid_tex_storage_dim(width, height, depth)) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glTexStorage%uDMultisample(width=%d,height=%d,depth=%d)", + dims, width, height, depth); + return false; + } + return true; +} void GLAPIENTRY _mesa_TexStorage2DMultisample(GLenum target, GLsizei samples, @@ -5802,10 +5856,13 @@ _mesa_TexStorage2DMultisample(GLenum target, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 2, texObj, target, samples, - internalformat, width, height, 1, - fixedsamplelocations, GL_TRUE, - "glTexStorage2DMultisample"); + if (!valid_texstorage_ms_parameters(width, height, 1, samples, 2)) + return; + + texture_image_multisample(ctx, 2, texObj, target, samples, + internalformat, width, height, 1, + fixedsamplelocations, GL_TRUE, + "glTexStorage2DMultisample"); } void GLAPIENTRY @@ -5821,10 +5878,13 @@ _mesa_TexStorage3DMultisample(GLenum target, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 3, texObj, target, samples, - internalformat, width, height, depth, - fixedsamplelocations, GL_TRUE, - "glTexStorage3DMultisample"); + if (!valid_texstorage_ms_parameters(width, height, depth, samples, 3)) + return; + + texture_image_multisample(ctx, 3, texObj, target, samples, + internalformat, width, height, depth, + fixedsamplelocations, GL_TRUE, + "glTexStorage3DMultisample"); } void GLAPIENTRY @@ -5841,10 +5901,13 @@ _mesa_TextureStorage2DMultisample(GLuint texture, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 2, texObj, texObj->Target, samples, - internalformat, width, height, 1, - fixedsamplelocations, GL_TRUE, - "glTextureStorage2DMultisample"); + if (!valid_texstorage_ms_parameters(width, height, 1, samples, 2)) + return; + + texture_image_multisample(ctx, 2, texObj, texObj->Target, samples, + internalformat, width, height, 1, + fixedsamplelocations, GL_TRUE, + "glTextureStorage2DMultisample"); } void GLAPIENTRY @@ -5862,8 +5925,11 @@ _mesa_TextureStorage3DMultisample(GLuint texture, GLsizei samples, if (!texObj) return; - _mesa_texture_image_multisample(ctx, 3, texObj, texObj->Target, samples, - internalformat, width, height, depth, - fixedsamplelocations, GL_TRUE, - "glTextureStorage3DMultisample"); + if (!valid_texstorage_ms_parameters(width, height, depth, samples, 3)) + return; + + texture_image_multisample(ctx, 3, texObj, texObj->Target, samples, + internalformat, width, height, depth, + fixedsamplelocations, GL_TRUE, + "glTextureStorage3DMultisample"); } diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h index bf729da..a4736b5 100644 --- a/src/mesa/main/teximage.h +++ b/src/mesa/main/teximage.h @@ -133,7 +133,7 @@ _mesa_test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, extern GLboolean _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, - GLenum intFormat); + GLenum intFormat, GLenum *error); extern GLuint _mesa_tex_target_to_face(GLenum target); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index cd7cfd6..c5d83e1 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -37,6 +37,7 @@ #include "hash.h" #include "imports.h" #include "macros.h" +#include "shaderimage.h" #include "teximage.h" #include "texobj.h" #include "texstate.h" @@ -1411,8 +1412,10 @@ unbind_texobj_from_image_units(struct gl_context *ctx, for (i = 0; i < ctx->Const.MaxImageUnits; i++) { struct gl_image_unit *unit = &ctx->ImageUnits[i]; - if (texObj == unit->TexObj) + if (texObj == unit->TexObj) { _mesa_reference_texobj(&unit->TexObj, NULL); + *unit = _mesa_default_image_unit(ctx); + } } } @@ -1742,10 +1745,10 @@ _mesa_BindTexture( GLenum target, GLuint texName ) * texture object will be decremented. It'll be deleted if the * count hits zero. */ -void -_mesa_bind_texture_unit(struct gl_context *ctx, - GLuint unit, - struct gl_texture_object *texObj) +static void +bind_texture_unit(struct gl_context *ctx, + GLuint unit, + struct gl_texture_object *texObj) { struct gl_texture_unit *texUnit; @@ -1834,7 +1837,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) } assert(valid_texture_object(texObj)); - _mesa_bind_texture_unit(ctx, unit, texObj); + bind_texture_unit(ctx, unit, texObj); } diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index ec5ccb2..690878c 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -209,10 +209,6 @@ extern void _mesa_delete_nameless_texture(struct gl_context *ctx, struct gl_texture_object *texObj); -extern void -_mesa_bind_texture_unit(struct gl_context *ctx, - GLuint unit, - struct gl_texture_object *texObj); /*@}*/ diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index c0611c3..16739f1 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1562,6 +1562,19 @@ invalid_pname: _mesa_enum_to_string(pname)); } +static bool +valid_tex_level_parameteriv_target(struct gl_context *ctx, GLenum target, + bool dsa) +{ + const char *suffix = dsa ? "ture" : ""; + if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetTex%sLevelParameter[if]v(target=%s)", suffix, + _mesa_enum_to_string(target)); + return false; + } + return true; +} /** * This isn't exposed to the rest of the driver because it is a part of the @@ -1585,13 +1598,6 @@ get_tex_level_parameteriv(struct gl_context *ctx, return; } - if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetTex%sLevelParameter[if]v(target=%s)", suffix, - _mesa_enum_to_string(target)); - return; - } - maxLevels = _mesa_max_texture_levels(ctx, target); assert(maxLevels != 0); @@ -1619,6 +1625,9 @@ _mesa_GetTexLevelParameterfv( GLenum target, GLint level, GLint iparam; GET_CURRENT_CONTEXT(ctx); + if (!valid_tex_level_parameteriv_target(ctx, target, false)) + return; + texObj = _mesa_get_current_tex_object(ctx, target); if (!texObj) return; @@ -1636,6 +1645,9 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, struct gl_texture_object *texObj; GET_CURRENT_CONTEXT(ctx); + if (!valid_tex_level_parameteriv_target(ctx, target, false)) + return; + texObj = _mesa_get_current_tex_object(ctx, target); if (!texObj) return; @@ -1657,6 +1669,9 @@ _mesa_GetTextureLevelParameterfv(GLuint texture, GLint level, if (!texObj) return; + if (!valid_tex_level_parameteriv_target(ctx, texObj->Target, true)) + return; + get_tex_level_parameteriv(ctx, texObj, texObj->Target, level, pname, &iparam, true); @@ -1675,6 +1690,9 @@ _mesa_GetTextureLevelParameteriv(GLuint texture, GLint level, if (!texObj) return; + if (!valid_tex_level_parameteriv_target(ctx, texObj->Target, true)) + return; + get_tex_level_parameteriv(ctx, texObj, texObj->Target, level, pname, params, true); } @@ -1890,6 +1908,12 @@ get_tex_parameterfv(struct gl_context *ctx, *params = (GLfloat) obj->Sampler.sRGBDecode; break; + case GL_IMAGE_FORMAT_COMPATIBILITY_TYPE: + if (!ctx->Extensions.ARB_shader_image_load_store) + goto invalid_pname; + *params = (GLfloat) obj->ImageFormatCompatibilityType; + break; + default: goto invalid_pname; } diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index 4a2cc60..c53bb29 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -189,6 +189,20 @@ clear_texture_fields(struct gl_context *ctx, } +/** + * Update/re-validate framebuffer object. + */ +static void +update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj) +{ + const unsigned numFaces = _mesa_num_tex_faces(texObj->Target); + for (int level = 0; level < ARRAY_SIZE(texObj->Image[0]); level++) { + for (unsigned face = 0; face < numFaces; face++) + _mesa_update_fbo_texture(ctx, texObj, face, level); + } +} + + GLboolean _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat) { @@ -287,29 +301,21 @@ tex_storage_error_check(struct gl_context *ctx, * order to allow meta functions to use legacy formats. */ /* size check */ - if (width < 1 || height < 1 || depth < 1) { + if (!_mesa_valid_tex_storage_dim(width, height, depth)) { _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sStorage%uD(width, height or depth < 1)", suffix, dims); return GL_TRUE; } - /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: - * - * "The ETC2/EAC texture compression algorithm supports only - * two-dimensional images. If internalformat is an ETC2/EAC format, - * CompressedTexImage3D will generate an INVALID_OPERATION error if - * target is not TEXTURE_2D_ARRAY." - * - * This should also be applicable for glTexStorage3D(). - */ - if (_mesa_is_compressed_format(ctx, internalformat) - && !_mesa_target_can_be_compressed(ctx, target, internalformat)) { - _mesa_error(ctx, _mesa_is_desktop_gl(ctx)? - GL_INVALID_ENUM : GL_INVALID_OPERATION, + if (_mesa_is_compressed_format(ctx, internalformat)) { + GLenum err; + if (!_mesa_target_can_be_compressed(ctx, target, internalformat, &err)) { + _mesa_error(ctx, err, "glTex%sStorage%dD(internalformat = %s)", suffix, dims, _mesa_enum_to_string(internalformat)); - return GL_TRUE; + return GL_TRUE; + } } /* levels check */ @@ -446,6 +452,7 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims, _mesa_set_texture_view_state(ctx, texObj, target, levels); + update_fbo_texture(ctx, texObj); } } diff --git a/src/mesa/main/texstorage.h b/src/mesa/main/texstorage.h index 6f5495f..033ecb7 100644 --- a/src/mesa/main/texstorage.h +++ b/src/mesa/main/texstorage.h @@ -38,6 +38,27 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, bool dsa); +/** + * Texture width, height and depth check shared with the + * multisample variants of TexStorage functions. + * + * From OpenGL 4.5 Core spec, page 260 (section 8.19) + * + * "An INVALID_VALUE error is generated if width, height, depth + * or levels are less than 1, for commands with the corresponding + * parameters." + * + * (referring to TextureStorage* commands, these also match values + * specified for OpenGL ES 3.1.) + */ +static inline bool +_mesa_valid_tex_storage_dim(GLsizei width, GLsizei height, GLsizei depth) +{ + if (width < 1 || height < 1 || depth < 1) + return false; + return true; +} + /*@}*/ /** diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 37c0569..fc83310 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -863,7 +863,7 @@ _mesa_texstore_can_use_memcpy(struct gl_context *ctx, /* The Mesa format must match the input format and type. */ if (!_mesa_format_matches_format_and_type(dstFormat, srcFormat, srcType, - srcPacking->SwapBytes)) { + srcPacking->SwapBytes, NULL)) { return GL_FALSE; } diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 036530e..1026618 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -319,24 +319,31 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, return; } + if ((uni->type->base_type == GLSL_TYPE_DOUBLE && + returnType != GLSL_TYPE_DOUBLE) || + (uni->type->base_type != GLSL_TYPE_DOUBLE && + returnType == GLSL_TYPE_DOUBLE)) { + _mesa_error( ctx, GL_INVALID_OPERATION, + "glGetnUniform*vARB(incompatible uniform types)"); + return; + } { unsigned elements = (uni->type->is_sampler()) ? 1 : uni->type->components(); + const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1; /* Calculate the source base address *BEFORE* modifying elements to * account for the size of the user's buffer. */ const union gl_constant_value *const src = - &uni->storage[offset * elements]; + &uni->storage[offset * elements * dmul]; assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT || - returnType == GLSL_TYPE_UINT); - /* The three (currently) supported types all have the same size, - * which is of course the same as their union. That'll change - * with glGetUniformdv()... - */ - unsigned bytes = sizeof(src[0]) * elements; + returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE); + + /* doubles have a different size than the other 3 types */ + unsigned bytes = sizeof(src[0]) * elements * dmul; if (bufSize < 0 || bytes > (unsigned) bufSize) { _mesa_error( ctx, GL_INVALID_OPERATION, "glGetnUniform*vARB(out of bounds: bufSize is %d," @@ -677,9 +684,11 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, match = (basicType != GLSL_TYPE_DOUBLE); break; case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: match = (basicType == GLSL_TYPE_INT); break; + case GLSL_TYPE_IMAGE: + match = (basicType == GLSL_TYPE_INT && _mesa_is_desktop_gl(ctx)); + break; default: match = (basicType == uni->type->base_type); break; diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index ff1df72..10819e2 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -888,16 +888,7 @@ _mesa_GetnUniformdvARB(GLuint program, GLint location, { GET_CURRENT_CONTEXT(ctx); - (void) program; - (void) location; - (void) bufSize; - (void) params; - - /* _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params); - */ - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB" - "(GL_ARB_gpu_shader_fp64 not implemented)"); } void GLAPIENTRY diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index d54f934..d96b7bc 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -33,6 +33,7 @@ #include "prog_instruction.h" #include "prog_parameter.h" #include "prog_print.h" +#include "program.h" /** * \file prog_to_nir.c @@ -142,7 +143,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); src.src = nir_src_for_ssa(&load->dest.ssa); break; @@ -166,6 +167,8 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) } /* FALLTHROUGH */ case PROGRAM_STATE_VAR: { + assert(c->parameters != NULL); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); @@ -200,7 +203,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) deref_arr->base_offset = prog_src->Index; } - nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + nir_builder_instr_insert(b, &load->instr); src.src = nir_src_for_ssa(&load->dest.ssa); break; @@ -250,7 +253,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) mov->dest.write_mask = 0x1; mov->src[0] = src; mov->src[0].swizzle[0] = swizzle; - nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr); + nir_builder_instr_insert(b, &mov->instr); chans[i] = &mov->dest.dest.ssa; } @@ -278,7 +281,7 @@ ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) instr->src[i].src = nir_src_for_ssa(src[i]); instr->dest = dest; - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); } static void @@ -297,7 +300,7 @@ ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest, mov->src[0].src = nir_src_for_ssa(def); for (unsigned i = def->num_components; i < 4; i++) mov->src[0].swizzle[i] = def->num_components - 1; - nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr); + nir_builder_instr_insert(b, &mov->instr); } static void @@ -558,7 +561,7 @@ ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); discard->src[0] = nir_src_for_ssa(cmp); - nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr); + nir_builder_instr_insert(b, &discard->instr); } static void @@ -685,7 +688,7 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, assert(src_number == num_srcs); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + nir_builder_instr_insert(b, &instr->instr); /* Resolve the writemask on the texture op. */ ptn_move_dest(b, dest, &instr->dest.ssa); @@ -941,7 +944,7 @@ ptn_add_output_stores(struct ptn_compile *c) } else { store->src[0].reg.reg = c->output_regs[var->data.location]; } - nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr); + nir_builder_instr_insert(b, &store->instr); } } @@ -985,7 +988,7 @@ setup_registers_and_variables(struct ptn_compile *c) load_x->num_components = 1; load_x->variables[0] = nir_deref_var_create(load_x, var); nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL); - nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr); + nir_builder_instr_insert(b, &load_x->instr); nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0), nir_imm_float(b, 0.0), nir_imm_float(b, 1.0)); @@ -1001,7 +1004,7 @@ setup_registers_and_variables(struct ptn_compile *c) store->num_components = 4; store->variables[0] = nir_deref_var_create(store, fullvar); store->src[0] = nir_src_for_ssa(f001); - nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); + nir_builder_instr_insert(b, &store->instr); /* Insert the real input into the list so the driver has real * inputs, but set c->input_vars[i] to the temporary so we use @@ -1079,22 +1082,25 @@ prog_to_nir(const struct gl_program *prog, { struct ptn_compile *c; struct nir_shader *s; + gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target); c = rzalloc(NULL, struct ptn_compile); if (!c) return NULL; - s = nir_shader_create(NULL, options); + s = nir_shader_create(NULL, stage, options); if (!s) goto fail; c->prog = prog; - c->parameters = rzalloc(s, nir_variable); - c->parameters->type = glsl_array_type(glsl_vec4_type(), - prog->Parameters->NumParameters); - c->parameters->name = "parameters"; - c->parameters->data.read_only = true; - c->parameters->data.mode = nir_var_uniform; - exec_list_push_tail(&s->uniforms, &c->parameters->node); + if (prog->Parameters->NumParameters > 0) { + c->parameters = rzalloc(s, nir_variable); + c->parameters->type = + glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters); + c->parameters->name = "parameters"; + c->parameters->data.read_only = true; + c->parameters->data.mode = nir_var_uniform; + exec_list_push_tail(&s->uniforms, &c->parameters->node); + } nir_function *func = nir_function_create(s, "main"); nir_function_overload *overload = nir_function_overload_create(func); @@ -1102,7 +1108,7 @@ prog_to_nir(const struct gl_program *prog, c->build.shader = s; c->build.impl = impl; - c->build.cf_node_list = &impl->body; + nir_builder_insert_after_cf_list(&c->build, &impl->body); setup_registers_and_variables(c); if (unlikely(c->error)) diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 1396906..4fdef7f 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -192,6 +192,7 @@ st_BlitFramebuffer(struct gl_context *ctx, blit.filter = pFilter; blit.render_condition_enable = TRUE; + blit.alpha_blend = FALSE; if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 18ea43f..6ff6cf6 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -139,7 +139,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, * in which case the memcpy-based fast path will likely be used and * we don't have to blit. */ if (_mesa_format_matches_format_and_type(rb->Format, format, - type, pack->SwapBytes)) { + type, pack->SwapBytes, NULL)) { goto fallback; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 715d69c..93335ae 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -695,7 +695,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, * in which case the memcpy-based fast path will likely be used and * we don't have to blit. */ if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, - type, unpack->SwapBytes)) { + type, unpack->SwapBytes, NULL)) { goto fallback; } @@ -963,7 +963,7 @@ st_GetTexSubImage(struct gl_context * ctx, /* See if the texture format already matches the format and type, * in which case the memcpy-based fast path will be used. */ if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, - type, ctx->Pack.SwapBytes)) { + type, ctx->Pack.SwapBytes, NULL)) { goto fallback; } @@ -1071,6 +1071,8 @@ st_GetTexSubImage(struct gl_context * ctx, /* From now on, we need the gallium representation of dimensions. */ if (gl_target == GL_TEXTURE_1D_ARRAY) { + zoffset = yoffset; + yoffset = 0; depth = height; height = 1; } @@ -1114,7 +1116,7 @@ st_GetTexSubImage(struct gl_context * ctx, /* copy/pack data into user buffer */ if (_mesa_format_matches_format_and_type(mesa_format, format, type, - ctx->Pack.SwapBytes)) { + ctx->Pack.SwapBytes, NULL)) { /* memcpy */ const uint bytesPerRow = width * util_format_get_blocksize(dst_format); GLuint row, slice; @@ -1871,6 +1873,31 @@ st_TextureView(struct gl_context *ctx, return GL_TRUE; } +/* HACK: this is only enough for the most basic uses of CopyImage. Must fix + * before actually exposing the extension. + */ +static void +st_CopyImageSubData(struct gl_context *ctx, + struct gl_texture_image *src_image, + int src_x, int src_y, int src_z, + struct gl_texture_image *dst_image, + int dst_x, int dst_y, int dst_z, + int src_width, int src_height) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_texture_image *src = st_texture_image(src_image); + struct st_texture_image *dst = st_texture_image(dst_image); + + struct pipe_box box; + + u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box); + pipe->resource_copy_region(pipe, dst->pt, dst_image->Level, + dst_x, dst_y, dst_z, + src->pt, src_image->Level, + &box); +} + void st_init_texture_functions(struct dd_function_table *functions) @@ -1903,4 +1930,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->AllocTextureStorage = st_AllocTextureStorage; functions->TextureView = st_TextureView; + + functions->CopyImageSubData = st_CopyImageSubData; } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index db7b5b7..db74184 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1917,7 +1917,7 @@ st_choose_matching_format(struct st_context *st, unsigned bind, } if (_mesa_format_matches_format_and_type(mesa_format, format, type, - swapBytes)) { + swapBytes, NULL)) { enum pipe_format format = st_mesa_format_to_pipe_format(st, mesa_format); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6f00727..cba9881 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2816,7 +2816,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) */ glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]); new_inst->saturate = inst->saturate; inst->dead_mask = inst->dst[0].writemask; } else { @@ -4402,12 +4402,12 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { glsl_to_tgsi_instruction *newinst; - st_src_reg src_regs[3]; + st_src_reg src_regs[4]; if (inst->dst[0].file == PROGRAM_OUTPUT) prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index); - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 4; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT && src_regs[i].index == VARYING_SLOT_COL0) { @@ -4418,7 +4418,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } @@ -4487,18 +4487,18 @@ get_bitmap_visitor(struct st_fragment_program *fp, * new visitor. */ foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { glsl_to_tgsi_instruction *newinst; - st_src_reg src_regs[3]; + st_src_reg src_regs[4]; if (inst->dst[0].file == PROGRAM_OUTPUT) prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index); - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 4; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT) prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index dc6827e..5393d50 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -242,7 +242,7 @@ fast_draw_rgba_pixels(struct gl_context *ctx, GLint x, GLint y, } if (_mesa_format_matches_format_and_type(rb->Format, format, type, - ctx->Unpack.SwapBytes)) { + ctx->Unpack.SwapBytes, NULL)) { fast_draw_generic_pixels(ctx, rb, x, y, width, height, format, type, &unpack, pixels); return GL_TRUE; diff --git a/src/util/bitset.h b/src/util/bitset.h index febcdde..c452819 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -96,4 +96,40 @@ __bitset_ffs(const BITSET_WORD *x, int n) #define BITSET_FFS(x) __bitset_ffs(x, ARRAY_SIZE(x)) +static inline unsigned +__bitset_next_set(unsigned i, BITSET_WORD *tmp, + BITSET_WORD *set, unsigned size) +{ + unsigned bit, word; + + /* NOTE: The initial conditions for this function are very specific. At + * the start of the loop, the tmp variable must be set to *set and the + * initial i value set to 0. This way, if there is a bit set in the first + * word, we ignore the i-value and just grab that bit (so 0 is ok, even + * though 0 may be returned). If the first word is 0, then the value of + * `word` will be 0 and we will go on to look at the second word. + */ + word = BITSET_BITWORD(i); + while (*tmp == 0) { + word++; + + if (word >= BITSET_WORDS(size)) + return size; + + *tmp = set[word]; + } + + /* Find the next set bit in the non-zero word */ + bit = ffs(*tmp) - 1; + + /* Unset the bit */ + *tmp &= ~(1ull << bit); + + return word * BITSET_WORDBITS + bit; +} + +#define BITSET_FOREACH_SET(__i, __tmp, __set, __size) \ + for (__tmp = *(__set), __i = 0; \ + (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;) + #endif diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c index 436e008..8af93c0 100644 --- a/src/util/register_allocate.c +++ b/src/util/register_allocate.c @@ -183,7 +183,7 @@ struct ra_graph { * using ralloc_free(). */ struct ra_regs * -ra_alloc_reg_set(void *mem_ctx, unsigned int count) +ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists) { unsigned int i; struct ra_regs *regs; @@ -197,9 +197,15 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count) BITSET_WORDS(count)); BITSET_SET(regs->regs[i].conflicts, i); - regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); - regs->regs[i].conflict_list_size = 4; - regs->regs[i].conflict_list[0] = i; + if (need_conflict_lists) { + regs->regs[i].conflict_list = ralloc_array(regs->regs, + unsigned int, 4); + regs->regs[i].conflict_list_size = 4; + regs->regs[i].conflict_list[0] = i; + } else { + regs->regs[i].conflict_list = NULL; + regs->regs[i].conflict_list_size = 0; + } regs->regs[i].num_conflicts = 1; } @@ -227,12 +233,14 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) { struct ra_reg *reg1 = ®s->regs[r1]; - if (reg1->conflict_list_size == reg1->num_conflicts) { - reg1->conflict_list_size *= 2; - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, - unsigned int, reg1->conflict_list_size); + if (reg1->conflict_list) { + if (reg1->conflict_list_size == reg1->num_conflicts) { + reg1->conflict_list_size *= 2; + reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, + unsigned int, reg1->conflict_list_size); + } + reg1->conflict_list[reg1->num_conflicts++] = r2; } - reg1->conflict_list[reg1->num_conflicts++] = r2; BITSET_SET(reg1->conflicts, r2); } @@ -255,7 +263,7 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) */ void ra_add_transitive_reg_conflict(struct ra_regs *regs, - unsigned int base_reg, unsigned int reg) + unsigned int base_reg, unsigned int reg) { unsigned int i; @@ -266,13 +274,37 @@ ra_add_transitive_reg_conflict(struct ra_regs *regs, } } +/** + * Makes every conflict on the given register transitive. In other words, + * every register that conflicts with r will now conflict with every other + * register conflicting with r. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int r) +{ + struct ra_reg *reg = ®s->regs[r]; + BITSET_WORD tmp; + int c; + + BITSET_FOREACH_SET(c, tmp, reg->conflicts, regs->count) { + struct ra_reg *other = ®s->regs[c]; + unsigned i; + for (i = 0; i < BITSET_WORDS(regs->count); i++) + other->conflicts[i] |= reg->conflicts[i]; + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { struct ra_class *class; regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, - regs->class_count + 1); + regs->class_count + 1); class = rzalloc(regs, struct ra_class); regs->classes[regs->class_count] = class; @@ -319,7 +351,7 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) for (b = 0; b < regs->class_count; b++) { for (c = 0; c < regs->class_count; c++) { regs->classes[b]->q[c] = q_values[b][c]; - } + } } } else { /* Compute, for each class B and C, how many regs of B an @@ -410,14 +442,14 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) void ra_set_node_class(struct ra_graph *g, - unsigned int n, unsigned int class) + unsigned int n, unsigned int class) { g->nodes[n].class = class; } void ra_add_node_interference(struct ra_graph *g, - unsigned int n1, unsigned int n2) + unsigned int n1, unsigned int n2) { if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) { ra_add_node_adjacency(g, n1, n2); @@ -445,7 +477,7 @@ decrement_q(struct ra_graph *g, unsigned int n) if (n != n2 && !g->nodes[n2].in_stack) { assert(g->nodes[n2].q_total >= g->regs->classes[n2_class]->q[n_class]); - g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; + g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; } } } diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h index 61f182e..628d2bb 100644 --- a/src/util/register_allocate.h +++ b/src/util/register_allocate.h @@ -44,13 +44,15 @@ struct ra_regs; * registers, such as aligned register pairs that conflict with the * two real registers from which they are composed. */ -struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count); +struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count, + bool need_conflict_lists); void ra_set_allocate_round_robin(struct ra_regs *regs); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); void ra_add_transitive_reg_conflict(struct ra_regs *regs, unsigned int base_reg, unsigned int reg); +void ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a, unsigned int class_b, unsigned int num_conflicts); diff --git a/src/util/rounding.h b/src/util/rounding.h index 7b5608b..afb38fb 100644 --- a/src/util/rounding.h +++ b/src/util/rounding.h @@ -24,9 +24,8 @@ #ifndef _ROUNDING_H #define _ROUNDING_H -#include "c99_compat.h" // inline +#include "c99_math.h" -#include <math.h> #include <limits.h> #include <stdint.h> diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 2dbf59f..4cbf98a 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -1007,7 +1007,7 @@ anv_compile_shader_spirv(struct anv_compiler *compiler, mesa_shader->Program->nir = spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, - glsl_options->NirOptions); + stage_info[stage].stage, glsl_options->NirOptions); nir_validate_shader(mesa_shader->Program->nir); brw_process_nir(mesa_shader->Program->nir, |