summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTim Rowley <timothy.o.rowley@intel.com>2016-02-16 17:27:28 -0600
committerTim Rowley <timothy.o.rowley@intel.com>2016-03-02 18:38:41 -0600
commit2b2d3680bf164ec4f8b50436b96c3fc195318ea5 (patch)
tree94a91efdf29f8baf830a7124d49c1d6f9d2647ff /src/gallium
parent2eec41f6f1b85b43e38721661f4b21bd982c6a46 (diff)
downloadexternal_mesa3d-2b2d3680bf164ec4f8b50436b96c3fc195318ea5.zip
external_mesa3d-2b2d3680bf164ec4f8b50436b96c3fc195318ea5.tar.gz
external_mesa3d-2b2d3680bf164ec4f8b50436b96c3fc195318ea5.tar.bz2
gallium/swr: add OpenSWR driver
OpenSWR is a new software rasterizer for x86 processors designed for high performance and high scalablility on visualization workloads. Acked-by: Roland Scheidegger <sroland@vmware.com> Acked-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/swr/swr_clear.cpp142
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp407
-rw-r--r--src/gallium/drivers/swr/swr_context.h182
-rw-r--r--src/gallium/drivers/swr/swr_context_llvm.h124
-rw-r--r--src/gallium/drivers/swr/swr_draw.cpp271
-rw-r--r--src/gallium/drivers/swr/swr_fence.cpp143
-rw-r--r--src/gallium/drivers/swr/swr_fence.h70
-rw-r--r--src/gallium/drivers/swr/swr_loader.cpp67
-rw-r--r--src/gallium/drivers/swr/swr_memory.h99
-rw-r--r--src/gallium/drivers/swr/swr_public.h46
-rw-r--r--src/gallium/drivers/swr/swr_query.cpp334
-rw-r--r--src/gallium/drivers/swr/swr_query.h46
-rw-r--r--src/gallium/drivers/swr/swr_resource.h97
-rw-r--r--src/gallium/drivers/swr/swr_scratch.cpp116
-rw-r--r--src/gallium/drivers/swr/swr_scratch.h63
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp746
-rw-r--r--src/gallium/drivers/swr/swr_screen.h52
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp591
-rw-r--r--src/gallium/drivers/swr/swr_shader.h60
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp1370
-rw-r--r--src/gallium/drivers/swr/swr_state.h307
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.cpp338
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.h47
23 files changed, 5718 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp
new file mode 100644
index 0000000..9027f84
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -0,0 +1,142 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "swr_context.h"
+#include "swr_query.h"
+
+static void
+swr_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth,
+ unsigned stencil)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+ UINT clearMask = 0;
+
+ if (!swr_check_render_cond(pipe))
+ return;
+
+ if (ctx->dirty)
+ swr_update_derived(ctx);
+
+/* Update clearMask/targetMask */
+#if 0 /* XXX SWR currently only clears SWR_ATTACHMENT_COLOR0, don't bother \
+ checking others yet. */
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ UINT i;
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ if (fb->cbufs[i])
+ clearMask |= (SWR_CLEAR_COLOR0 << i);
+ }
+#else
+ if (buffers & PIPE_CLEAR_COLOR && fb->cbufs[0])
+ clearMask |= SWR_CLEAR_COLOR;
+#endif
+
+ if (buffers & PIPE_CLEAR_DEPTH && fb->zsbuf)
+ clearMask |= SWR_CLEAR_DEPTH;
+
+ if (buffers & PIPE_CLEAR_STENCIL && fb->zsbuf)
+ clearMask |= SWR_CLEAR_STENCIL;
+
+#if 0 // XXX HACK, override clear color alpha. On ubuntu, clears are
+ // transparent.
+ ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
+#endif
+
+ /* Reset viewport to full framebuffer width/height before clear, then
+ * restore it */
+ /* Scissor affects clear, viewport should not */
+ ctx->dirty |= SWR_NEW_VIEWPORT;
+ SWR_VIEWPORT vp = {0};
+ vp.width = ctx->framebuffer.width;
+ vp.height = ctx->framebuffer.height;
+ SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
+
+ swr_update_draw_context(ctx);
+ SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil);
+}
+
+
+#if 0 // XXX, these don't get called. how to get these called? Do we need
+ // them? Docs?
+static void
+swr_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
+ const union pipe_color_union *color,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ fprintf(stderr, "SWR swr_clear_render_target!\n");
+
+ ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+
+static void
+swr_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned buffers, double depth, unsigned stencil,
+ unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ fprintf(stderr, "SWR swr_clear_depth_stencil!\n");
+
+ ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+
+static void
+swr_clear_buffer(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ fprintf(stderr, "SWR swr_clear_buffer!\n");
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_resource *buf = swr_resource(res);
+ union pipe_color_union color;
+ enum pipe_format dst_fmt;
+ unsigned width, height, elements;
+
+ assert(res->target == PIPE_BUFFER);
+ assert(buf);
+ assert(size % data_size == 0);
+
+ SWR_SURFACE_STATE &swr_buffer = buf->swr;
+
+ ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+#endif
+
+
+void
+swr_clear_init(struct pipe_context *pipe)
+{
+ pipe->clear = swr_clear;
+#if 0 // XXX, these don't get called. how to get these called? Do we need
+ // them? Docs?
+ pipe->clear_render_target = swr_clear_render_target;
+ pipe->clear_depth_stencil = swr_clear_depth_stencil;
+ pipe->clear_buffer = swr_clear_buffer;
+#endif
+}
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
new file mode 100644
index 0000000..0e7ebb7
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -0,0 +1,407 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+extern "C" {
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+}
+
+#include "swr_context.h"
+#include "swr_memory.h"
+#include "swr_screen.h"
+#include "swr_resource.h"
+#include "swr_scratch.h"
+#include "swr_query.h"
+
+#include "api.h"
+#include "backend.h"
+
+static struct pipe_surface *
+swr_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->context = pipe;
+ ps->format = surf_tmpl->format;
+ if (pt->target != PIPE_BUFFER) {
+ assert(surf_tmpl->u.tex.level <= pt->last_level);
+ ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+ ps->u.tex.level = surf_tmpl->u.tex.level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ if (ps->u.tex.first_layer != ps->u.tex.last_layer) {
+ debug_printf("creating surface with multiple layers, rendering "
+ "to first layer only\n");
+ }
+ } else {
+ /* setting width as number of elements should get us correct
+ * renderbuffer width */
+ ps->width = surf_tmpl->u.buf.last_element
+ - surf_tmpl->u.buf.first_element + 1;
+ ps->height = pt->height0;
+ ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
+ ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
+ assert(ps->u.buf.first_element <= ps->u.buf.last_element);
+ assert(ps->u.buf.last_element < ps->width);
+ }
+ }
+ return ps;
+}
+
+static void
+swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
+{
+ assert(surf->texture);
+ struct pipe_resource *resource = surf->texture;
+
+ /* If the surface being destroyed is a current render target,
+ * call StoreTiles to resolve the hotTile state then set attachment
+ * to NULL.
+ */
+ if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL
+ | PIPE_BIND_DISPLAY_TARGET)) {
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_resource *spr = swr_resource(resource);
+ swr_draw_context *pDC = &ctx->swrDC;
+ SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+ for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
+ if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) {
+ swr_store_render_target(ctx, i, SWR_TILE_RESOLVED);
+
+ /*
+ * Mesa thinks depth/stencil are fused, so we'll never get an
+ * explicit resource for stencil. So, if checking depth, then
+ * also check for stencil.
+ */
+ if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
+ swr_store_render_target(
+ ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_RESOLVED);
+ }
+
+ SwrWaitForIdle(ctx->swrContext);
+ break;
+ }
+ }
+
+ pipe_resource_reference(&surf->texture, NULL);
+ FREE(surf);
+}
+
+
+static void *
+swr_transfer_map(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct swr_resource *spr = swr_resource(resource);
+ struct pipe_transfer *pt;
+ enum pipe_format format = resource->format;
+
+ assert(resource);
+ assert(level <= resource->last_level);
+
+ /*
+ * If mapping any attached rendertarget, store tiles and wait for idle
+ * before giving CPU access to the surface.
+ * (set postStoreTileState to SWR_TILE_INVALID so tiles are reloaded)
+ */
+ if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL
+ | PIPE_BIND_DISPLAY_TARGET)) {
+ struct swr_context *ctx = swr_context(pipe);
+ swr_draw_context *pDC = &ctx->swrDC;
+ SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+ for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
+ if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) {
+ swr_store_render_target(ctx, i, SWR_TILE_INVALID);
+ /*
+ * Mesa thinks depth/stencil are fused, so we'll never get an
+ * explicit map for stencil. So, if mapping depth, then also
+ * store tile for stencil.
+ */
+ if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH))
+ swr_store_render_target(
+ ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_INVALID);
+ SwrWaitForIdle(ctx->swrContext);
+ break;
+ }
+ }
+
+ pt = CALLOC_STRUCT(pipe_transfer);
+ if (!pt)
+ return NULL;
+ pipe_resource_reference(&pt->resource, resource);
+ pt->level = level;
+ pt->box = *box;
+ pt->stride = spr->row_stride[level];
+ pt->layer_stride = spr->img_stride[level];
+
+ /* if we're mapping the depth/stencil, copy in stencil */
+ if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
+ && spr->has_stencil) {
+ for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
+ spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i];
+ }
+ } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
+ && spr->has_stencil) {
+ for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
+ spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i];
+ }
+ }
+
+ unsigned offset = box->z * pt->layer_stride + box->y * pt->stride
+ + box->x * util_format_get_blocksize(format);
+
+ *transfer = pt;
+
+ return spr->swr.pBaseAddress + offset + spr->mip_offsets[level];
+}
+
+static void
+swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
+{
+ assert(transfer->resource);
+
+ /*
+ * XXX TODO: use fences and come up with a real resource manager.
+ *
+ * If this resource has been mapped/unmapped, it's probably in use. Tag it
+ *with this context so
+ * we'll know to check dependencies when it's deleted.
+ */
+ struct swr_resource *res = swr_resource(transfer->resource);
+ res->bound_to_context = (void *)pipe;
+
+ /* if we're mapping the depth/stencil, copy out stencil */
+ if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
+ && res->has_stencil) {
+ for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
+ res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3];
+ }
+ } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
+ && res->has_stencil) {
+ for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
+ res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4];
+ }
+ }
+
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
+}
+
+
+static void
+swr_resource_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx,
+ unsigned dsty,
+ unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
+ || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
+ util_resource_copy_region(
+ pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
+ return;
+ }
+
+ debug_printf("unhandled swr_resource_copy\n");
+}
+
+
+static void
+swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct pipe_blit_info info = *blit_info;
+
+ if (blit_info->render_condition_enable && !swr_check_render_cond(pipe))
+ return;
+
+ if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
+ && !util_format_is_depth_or_stencil(info.src.resource->format)
+ && !util_format_is_pure_integer(info.src.resource->format)) {
+ debug_printf("swr: color resolve unimplemented\n");
+ return;
+ }
+
+ if (util_try_blit_via_copy_region(pipe, &info)) {
+ return; /* done */
+ }
+
+ if (info.mask & PIPE_MASK_S) {
+ debug_printf("swr: cannot blit stencil, skipping\n");
+ info.mask &= ~PIPE_MASK_S;
+ }
+
+ if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
+ debug_printf("swr: blit unsupported %s -> %s\n",
+ util_format_short_name(info.src.resource->format),
+ util_format_short_name(info.dst.resource->format));
+ return;
+ }
+
+ /* XXX turn off occlusion and streamout queries */
+
+ util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
+ util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
+ util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
+ /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/
+ util_blitter_save_so_targets(
+ ctx->blitter,
+ ctx->num_so_targets,
+ (struct pipe_stream_output_target **)ctx->so_targets);
+ util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);
+ util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
+ util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
+ util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);
+ util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);
+ util_blitter_save_depth_stencil_alpha(ctx->blitter,
+ (void *)ctx->depth_stencil);
+ util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
+ util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
+ util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
+ util_blitter_save_fragment_sampler_states(
+ ctx->blitter,
+ ctx->num_samplers[PIPE_SHADER_FRAGMENT],
+ (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
+ util_blitter_save_fragment_sampler_views(
+ ctx->blitter,
+ ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
+ ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+ util_blitter_save_render_condition(ctx->blitter,
+ ctx->render_cond_query,
+ ctx->render_cond_cond,
+ ctx->render_cond_mode);
+
+ util_blitter_blit(ctx->blitter, &info);
+}
+
+
+static void
+swr_destroy(struct pipe_context *pipe)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ctx->blitter)
+ util_blitter_destroy(ctx->blitter);
+
+ if (ctx->swrContext)
+ SwrDestroyContext(ctx->swrContext);
+
+ delete ctx->blendJIT;
+
+ swr_destroy_scratch_buffers(ctx);
+
+ FREE(ctx);
+}
+
+
+static void
+swr_render_condition(struct pipe_context *pipe,
+ struct pipe_query *query,
+ boolean condition,
+ uint mode)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->render_cond_query = query;
+ ctx->render_cond_mode = mode;
+ ctx->render_cond_cond = condition;
+}
+
+
+struct pipe_context *
+swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
+{
+ struct swr_context *ctx = CALLOC_STRUCT(swr_context);
+ ctx->blendJIT =
+ new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
+
+ SWR_CREATECONTEXT_INFO createInfo;
+ createInfo.driver = GL;
+ createInfo.privateStateSize = sizeof(swr_draw_context);
+ createInfo.maxSubContexts = 0;
+ createInfo.pfnLoadTile = swr_LoadHotTile;
+ createInfo.pfnStoreTile = swr_StoreHotTile;
+ createInfo.pfnClearTile = swr_StoreHotTileClear;
+ ctx->swrContext = SwrCreateContext(&createInfo);
+
+ /* Init Load/Store/ClearTiles Tables */
+ swr_InitMemoryModule();
+
+ InitBackendFuncTables();
+
+ if (ctx->swrContext == NULL)
+ goto fail;
+
+ ctx->pipe.screen = screen;
+ ctx->pipe.destroy = swr_destroy;
+ ctx->pipe.priv = priv;
+ ctx->pipe.create_surface = swr_create_surface;
+ ctx->pipe.surface_destroy = swr_surface_destroy;
+ ctx->pipe.transfer_map = swr_transfer_map;
+ ctx->pipe.transfer_unmap = swr_transfer_unmap;
+
+ ctx->pipe.transfer_flush_region = u_default_transfer_flush_region;
+ ctx->pipe.transfer_inline_write = u_default_transfer_inline_write;
+
+ ctx->pipe.resource_copy_region = swr_resource_copy;
+ ctx->pipe.render_condition = swr_render_condition;
+
+ swr_state_init(&ctx->pipe);
+ swr_clear_init(&ctx->pipe);
+ swr_draw_init(&ctx->pipe);
+ swr_query_init(&ctx->pipe);
+
+ ctx->pipe.blit = swr_blit;
+ ctx->blitter = util_blitter_create(&ctx->pipe);
+ if (!ctx->blitter) {
+ goto fail;
+ }
+
+ swr_init_scratch_buffers(ctx);
+
+ return &ctx->pipe;
+
+fail:
+ /* Should really validate the init steps and fail gracefully */
+ swr_destroy(&ctx->pipe);
+ return NULL;
+}
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
new file mode 100644
index 0000000..73a8e8d
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -0,0 +1,182 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_CONTEXT_H
+#define SWR_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_blitter.h"
+#include "jit_api.h"
+#include "swr_state.h"
+#include <unordered_map>
+
+#define SWR_NEW_BLEND (1 << 0)
+#define SWR_NEW_RASTERIZER (1 << 1)
+#define SWR_NEW_DEPTH_STENCIL_ALPHA (1 << 2)
+#define SWR_NEW_SAMPLER (1 << 3)
+#define SWR_NEW_SAMPLER_VIEW (1 << 4)
+#define SWR_NEW_VS (1 << 5)
+#define SWR_NEW_FS (1 << 6)
+#define SWR_NEW_VSCONSTANTS (1 << 7)
+#define SWR_NEW_FSCONSTANTS (1 << 8)
+#define SWR_NEW_VERTEX (1 << 9)
+#define SWR_NEW_STIPPLE (1 << 10)
+#define SWR_NEW_SCISSOR (1 << 11)
+#define SWR_NEW_VIEWPORT (1 << 12)
+#define SWR_NEW_FRAMEBUFFER (1 << 13)
+#define SWR_NEW_CLIP (1 << 14)
+#define SWR_NEW_SO (1 << 15)
+#define SWR_NEW_ALL 0x0000ffff
+
+namespace std
+{
+template <> struct hash<BLEND_COMPILE_STATE> {
+ std::size_t operator()(const BLEND_COMPILE_STATE &k) const
+ {
+ return util_hash_crc32(&k, sizeof(k));
+ }
+};
+};
+
+struct swr_jit_texture {
+ uint32_t width; // same as number of elements
+ uint32_t height;
+ uint32_t depth; // doubles as array size
+ uint32_t first_level;
+ uint32_t last_level;
+ const void *base_ptr;
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+struct swr_jit_sampler {
+ float min_lod;
+ float max_lod;
+ float lod_bias;
+ float border_color[4];
+};
+
+struct swr_draw_context {
+ const float *constantVS[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS];
+ const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
+
+ swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
+ swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
+
+ SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
+};
+
+struct swr_context {
+ struct pipe_context pipe; /**< base class */
+
+ HANDLE swrContext;
+
+ /** Constant state objects */
+ struct swr_blend_state *blend;
+ struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ struct pipe_depth_stencil_alpha_state *depth_stencil;
+ struct pipe_rasterizer_state *rasterizer;
+
+ struct swr_vertex_shader *vs;
+ struct swr_fragment_shader *fs;
+ struct swr_vertex_element_state *velems;
+
+ /** Other rendering state */
+ struct pipe_blend_color blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer
+ constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_sampler_view *
+ sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_index_buffer index_buffer;
+
+ struct blitter_context *blitter;
+
+ /** Conditional query object and mode */
+ struct pipe_query *render_cond_query;
+ uint render_cond_mode;
+ boolean render_cond_cond;
+ unsigned active_queries;
+
+ unsigned num_vertex_buffers;
+ unsigned num_samplers[PIPE_SHADER_TYPES];
+ unsigned num_sampler_views[PIPE_SHADER_TYPES];
+
+ unsigned sample_mask;
+
+ // streamout
+ pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
+ uint32_t num_so_targets;
+
+ /* Temp storage for user_buffer constants */
+ struct swr_scratch_buffers *scratch;
+
+ // blend jit functions
+ std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC> *blendJIT;
+
+ /* Derived SWR API DrawState */
+ struct swr_derived_state derived;
+
+ /* SWR private state - draw context */
+ struct swr_draw_context swrDC;
+
+ unsigned dirty; /**< Mask of SWR_NEW_x flags */
+};
+
+static INLINE struct swr_context *
+swr_context(struct pipe_context *pipe)
+{
+ return (struct swr_context *)pipe;
+}
+
+static INLINE void
+swr_update_draw_context(struct swr_context *ctx)
+{
+ swr_draw_context *pDC =
+ (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
+ memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
+}
+
+struct pipe_context *swr_create_context(struct pipe_screen *, void *priv, unsigned flags);
+
+void swr_state_init(struct pipe_context *pipe);
+
+void swr_clear_init(struct pipe_context *pipe);
+
+void swr_draw_init(struct pipe_context *pipe);
+
+void swr_finish(struct pipe_context *pipe);
+#endif
diff --git a/src/gallium/drivers/swr/swr_context_llvm.h b/src/gallium/drivers/swr/swr_context_llvm.h
new file mode 100644
index 0000000..58da813
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_context_llvm.h
@@ -0,0 +1,124 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_jit_texture
+INLINE static StructType *
+Gen_swr_jit_texture(JitManager *pShG)
+{
+ LLVMContext &ctx = pShG->mContext;
+ std::vector<Type *> members;
+
+ members.push_back(Type::getInt32Ty(ctx)); // width
+ members.push_back(Type::getInt32Ty(ctx)); // height
+ members.push_back(Type::getInt32Ty(ctx)); // depth
+ members.push_back(Type::getInt32Ty(ctx)); // first_level
+ members.push_back(Type::getInt32Ty(ctx)); // last_level
+ members.push_back(PointerType::get(Type::getInt8Ty(ctx), 0)); // base_ptr
+ members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+ PIPE_MAX_TEXTURE_LEVELS)); // row_stride
+ members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+ PIPE_MAX_TEXTURE_LEVELS)); // img_stride
+ members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+ PIPE_MAX_TEXTURE_LEVELS)); // mip_offsets
+
+ return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_jit_texture_width = 0;
+static const UINT swr_jit_texture_height = 1;
+static const UINT swr_jit_texture_depth = 2;
+static const UINT swr_jit_texture_first_level = 3;
+static const UINT swr_jit_texture_last_level = 4;
+static const UINT swr_jit_texture_base_ptr = 5;
+static const UINT swr_jit_texture_row_stride = 6;
+static const UINT swr_jit_texture_img_stride = 7;
+static const UINT swr_jit_texture_mip_offsets = 8;
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_jit_sampler
+INLINE static StructType *
+Gen_swr_jit_sampler(JitManager *pShG)
+{
+ LLVMContext &ctx = pShG->mContext;
+ std::vector<Type *> members;
+
+ members.push_back(Type::getFloatTy(ctx)); // min_lod
+ members.push_back(Type::getFloatTy(ctx)); // max_lod
+ members.push_back(Type::getFloatTy(ctx)); // lod_bias
+ members.push_back(
+ ArrayType::get(Type::getFloatTy(ctx), 4)); // border_color
+
+ return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_jit_sampler_min_lod = 0;
+static const UINT swr_jit_sampler_max_lod = 1;
+static const UINT swr_jit_sampler_lod_bias = 2;
+static const UINT swr_jit_sampler_border_color = 3;
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_draw_context
+INLINE static StructType *
+Gen_swr_draw_context(JitManager *pShG)
+{
+ LLVMContext &ctx = pShG->mContext;
+ std::vector<Type *> members;
+
+ members.push_back(
+ ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0),
+ PIPE_MAX_CONSTANT_BUFFERS)); // constantVS
+ members.push_back(ArrayType::get(
+ Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsVS
+ members.push_back(
+ ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0),
+ PIPE_MAX_CONSTANT_BUFFERS)); // constantFS
+ members.push_back(ArrayType::get(
+ Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsFS
+ members.push_back(
+ ArrayType::get(Gen_swr_jit_texture(pShG),
+ PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesVS
+ members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG),
+ PIPE_MAX_SAMPLERS)); // samplersVS
+ members.push_back(
+ ArrayType::get(Gen_swr_jit_texture(pShG),
+ PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesFS
+ members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG),
+ PIPE_MAX_SAMPLERS)); // samplersFS
+ members.push_back(ArrayType::get(Gen_SWR_SURFACE_STATE(pShG),
+ SWR_NUM_ATTACHMENTS)); // renderTargets
+
+ return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_draw_context_constantVS = 0;
+static const UINT swr_draw_context_num_constantsVS = 1;
+static const UINT swr_draw_context_constantFS = 2;
+static const UINT swr_draw_context_num_constantsFS = 3;
+static const UINT swr_draw_context_texturesVS = 4;
+static const UINT swr_draw_context_samplersVS = 5;
+static const UINT swr_draw_context_texturesFS = 6;
+static const UINT swr_draw_context_samplersFS = 7;
+static const UINT swr_draw_context_renderTargets = 8;
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
new file mode 100644
index 0000000..a775bd2
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -0,0 +1,271 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "swr_screen.h"
+#include "swr_context.h"
+#include "swr_resource.h"
+#include "swr_fence.h"
+#include "swr_query.h"
+#include "jit_api.h"
+
+#include "util/u_draw.h"
+#include "util/u_prim.h"
+
+/*
+ * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
+ */
+static INLINE enum PRIMITIVE_TOPOLOGY
+swr_convert_prim_topology(const unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS:
+ return TOP_POINT_LIST;
+ case PIPE_PRIM_LINES:
+ return TOP_LINE_LIST;
+ case PIPE_PRIM_LINE_LOOP:
+ return TOP_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP:
+ return TOP_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES:
+ return TOP_TRIANGLE_LIST;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return TOP_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return TOP_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS:
+ return TOP_QUAD_LIST;
+ case PIPE_PRIM_QUAD_STRIP:
+ return TOP_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON:
+ return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */
+ case PIPE_PRIM_LINES_ADJACENCY:
+ return TOP_LINE_LIST_ADJ;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return TOP_LISTSTRIP_ADJ;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ return TOP_TRI_LIST_ADJ;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return TOP_TRI_STRIP_ADJ;
+ default:
+ assert(0 && "Unknown topology");
+ return TOP_UNKNOWN;
+ }
+};
+
+
+/*
+ * Draw vertex arrays, with optional indexing, optional instancing.
+ */
+static void
+swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (!swr_check_render_cond(pipe))
+ return;
+
+ if (info->indirect) {
+ util_draw_indirect(pipe, info);
+ return;
+ }
+
+ /* Update derived state, pass draw info to update function */
+ if (ctx->dirty)
+ swr_update_derived(ctx, info);
+
+ swr_update_draw_context(ctx);
+
+ if (ctx->vs->pipe.stream_output.num_outputs) {
+ if (!ctx->vs->soFunc[info->mode]) {
+ STREAMOUT_COMPILE_STATE state = {0};
+ struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
+
+ state.numVertsPerPrim = u_vertices_per_prim(info->mode);
+
+ uint32_t offsets[MAX_SO_STREAMS] = {0};
+ uint32_t num = 0;
+
+ for (uint32_t i = 0; i < so->num_outputs; i++) {
+ assert(so->output[i].stream == 0); // @todo
+ uint32_t output_buffer = so->output[i].output_buffer;
+ if (so->output[i].dst_offset != offsets[output_buffer]) {
+ // hole - need to fill
+ state.stream.decl[num].bufferIndex = output_buffer;
+ state.stream.decl[num].hole = true;
+ state.stream.decl[num].componentMask =
+ (1 << (so->output[i].dst_offset - offsets[output_buffer]))
+ - 1;
+ num++;
+ offsets[output_buffer] = so->output[i].dst_offset;
+ }
+
+ state.stream.decl[num].bufferIndex = output_buffer;
+ state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
+ state.stream.decl[num].componentMask =
+ ((1 << so->output[i].num_components) - 1)
+ << so->output[i].start_component;
+ state.stream.decl[num].hole = false;
+ num++;
+
+ offsets[output_buffer] += so->output[i].num_components;
+ }
+
+ state.stream.numDecls = num;
+
+ HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
+ ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
+ debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]);
+ assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
+ }
+
+ SwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
+ }
+
+ struct swr_vertex_element_state *velems = ctx->velems;
+ if (!velems->fsFunc
+ || (velems->fsState.cutIndex != info->restart_index)
+ || (velems->fsState.bEnableCutIndex != info->primitive_restart)) {
+
+ velems->fsState.cutIndex = info->restart_index;
+ velems->fsState.bEnableCutIndex = info->primitive_restart;
+
+ /* Create Fetch Shader */
+ HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
+ velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
+
+ debug_printf("fetch shader %p\n", velems->fsFunc);
+ assert(velems->fsFunc && "Error: FetchShader = NULL");
+ }
+
+ SwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
+
+ if (info->indexed)
+ SwrDrawIndexedInstanced(ctx->swrContext,
+ swr_convert_prim_topology(info->mode),
+ info->count,
+ info->instance_count,
+ info->start,
+ info->index_bias,
+ info->start_instance);
+ else
+ SwrDrawInstanced(ctx->swrContext,
+ swr_convert_prim_topology(info->mode),
+ info->count,
+ info->instance_count,
+ info->start,
+ info->start_instance);
+}
+
+
+static void
+swr_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_screen *screen = swr_screen(pipe->screen);
+
+ /* If the current renderTarget is the display surface, store tiles back to
+ * the surface, in
+ * preparation for present (swr_flush_frontbuffer)
+ */
+ struct pipe_surface *cb = ctx->framebuffer.cbufs[0];
+ if (cb && swr_resource(cb->texture)->display_target) {
+ swr_store_render_target(ctx, SWR_ATTACHMENT_COLOR0, SWR_TILE_RESOLVED);
+ swr_resource(cb->texture)->bound_to_context = (void*)pipe;
+ }
+
+ // SwrStoreTiles is asynchronous, always submit the "flush" fence.
+ // flush_frontbuffer needs it.
+ swr_fence_submit(ctx, screen->flush_fence);
+
+ if (fence)
+ swr_fence_reference(pipe->screen, fence, screen->flush_fence);
+}
+
+void
+swr_finish(struct pipe_context *pipe)
+{
+ struct swr_screen *screen = swr_screen(pipe->screen);
+ struct pipe_fence_handle *fence = NULL;
+
+ swr_flush(pipe, &fence, 0);
+ swr_fence_finish(&screen->base, fence, 0);
+ swr_fence_reference(&screen->base, &fence, NULL);
+}
+
+
+/*
+ * Store SWR HotTiles back to RenderTarget surface.
+ */
+void
+swr_store_render_target(struct swr_context *ctx,
+ uint32_t attachment,
+ enum SWR_TILE_STATE post_tile_state)
+{
+ struct swr_draw_context *pDC = &ctx->swrDC;
+ struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
+
+ /* Only proceed if there's a valid surface to store to */
+ if (renderTarget->pBaseAddress) {
+ /* Set viewport to full renderTarget width/height and disable scissor
+ * before StoreTiles */
+ boolean change_viewport =
+ (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f
+ || ctx->derived.vp.width != renderTarget->width
+ || ctx->derived.vp.height != renderTarget->height);
+ if (change_viewport) {
+ SWR_VIEWPORT vp = {0};
+ vp.width = renderTarget->width;
+ vp.height = renderTarget->height;
+ SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
+ }
+
+ boolean scissor_enable = ctx->derived.rastState.scissorEnable;
+ if (scissor_enable) {
+ ctx->derived.rastState.scissorEnable = FALSE;
+ SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
+ }
+
+ swr_update_draw_context(ctx);
+ SwrStoreTiles(ctx->swrContext,
+ (enum SWR_RENDERTARGET_ATTACHMENT)attachment,
+ post_tile_state);
+
+ /* Restore viewport and scissor enable */
+ if (change_viewport)
+ SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm);
+ if (scissor_enable) {
+ ctx->derived.rastState.scissorEnable = scissor_enable;
+ SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
+ }
+ }
+}
+
+
+void
+swr_draw_init(struct pipe_context *pipe)
+{
+ pipe->draw_vbo = swr_draw_vbo;
+ pipe->flush = swr_flush;
+}
diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp
new file mode 100644
index 0000000..f97ea22
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -0,0 +1,143 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "os/os_time.h"
+
+#include "swr_context.h"
+#include "swr_screen.h"
+#include "swr_fence.h"
+
+#if defined(PIPE_CC_MSVC) // portable thread yield
+ #define sched_yield SwitchToThread
+#endif
+/*
+ * Fence callback, called by back-end thread on completion of all rendering up
+ * to SwrSync call.
+ */
+static void
+swr_sync_cb(UINT64 userData, UINT64 userData2, UINT64 userData3)
+{
+ struct swr_fence *fence = (struct swr_fence *)userData;
+
+ fence->read = fence->write;
+}
+
+/*
+ * Submit an existing fence.
+ */
+void
+swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh)
+{
+ struct swr_fence *fence = swr_fence(fh);
+
+ fence->write++;
+ SwrSync(ctx->swrContext, swr_sync_cb, (UINT64)fence, 0, 0);
+}
+
+/*
+ * Create a new fence object.
+ */
+struct pipe_fence_handle *
+swr_fence_create()
+{
+ static int fence_id = 0;
+ struct swr_fence *fence = CALLOC_STRUCT(swr_fence);
+ if (!fence)
+ return NULL;
+
+ memset(fence, 0, sizeof(*fence));
+ pipe_reference_init(&fence->reference, 1);
+ fence->id = fence_id++;
+
+ return (struct pipe_fence_handle *)fence;
+}
+
+/** Destroy a fence. Called when refcount hits zero. */
+static void
+swr_fence_destroy(struct swr_fence *fence)
+{
+ FREE(fence);
+}
+
+/**
+ * Set ptr = fence, with reference counting
+ */
+void
+swr_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *f)
+{
+ struct swr_fence *fence = swr_fence(f);
+ struct swr_fence *old;
+
+ if (likely(ptr)) {
+ old = swr_fence(*ptr);
+ *ptr = f;
+ } else {
+ old = NULL;
+ }
+
+ if (pipe_reference(&old->reference, &fence->reference))
+ swr_fence_destroy(old);
+}
+
+/*
+ * Wait for the fence to finish.
+ */
+boolean
+swr_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ uint64_t timeout)
+{
+ struct swr_fence *fence = swr_fence(fence_handle);
+
+ while (!swr_is_fence_done(fence))
+ sched_yield();
+
+ return TRUE;
+}
+
+
+uint64_t
+swr_get_timestamp(struct pipe_screen *screen)
+{
+ return os_time_get_nano();
+}
+
+
+void
+swr_fence_init(struct pipe_screen *p_screen)
+{
+ p_screen->fence_reference = swr_fence_reference;
+ p_screen->fence_finish = swr_fence_finish;
+
+ p_screen->get_timestamp = swr_get_timestamp;
+
+ /*
+ * Create persistant "flush" fence, submitted when swr_flush is called.
+ */
+ struct swr_screen *screen = swr_screen(p_screen);
+ screen->flush_fence = swr_fence_create();
+}
diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h
new file mode 100644
index 0000000..257b240
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_fence.h
@@ -0,0 +1,70 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_FENCE_H
+#define SWR_FENCE_H
+
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+struct pipe_screen;
+
+struct swr_fence {
+ struct pipe_reference reference;
+
+ uint64_t read;
+ uint64_t write;
+
+ unsigned id; /* Just for reference */
+};
+
+
+static inline struct swr_fence *
+swr_fence(struct pipe_fence_handle *fence)
+{
+ return (struct swr_fence *)fence;
+}
+
+static INLINE boolean
+swr_is_fence_done(struct swr_fence *fence)
+{
+ return (fence->read == fence->write);
+}
+
+
+void swr_fence_init(struct pipe_screen *screen);
+
+struct pipe_fence_handle *swr_fence_create();
+
+void swr_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *f);
+
+boolean swr_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ uint64_t timeout);
+
+void
+swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fence);
+
+uint64_t swr_get_timestamp(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp
new file mode 100644
index 0000000..2113c37
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -0,0 +1,67 @@
+/****************************************************************************
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_cpu_detect.h"
+#include "util/u_dl.h"
+#include "swr_public.h"
+
+#include <stdio.h>
+#include <dlfcn.h>
+
+typedef pipe_screen *(*screen_create_proc)(struct sw_winsys *winsys);
+
+struct pipe_screen *
+swr_create_screen(struct sw_winsys *winsys)
+{
+ fprintf(stderr, "SWR detected ");
+
+ util_dl_library *pLibrary = nullptr;
+
+ util_cpu_detect();
+ if (util_cpu_caps.has_avx2) {
+ fprintf(stderr, "AVX2\n");
+ pLibrary = util_dl_open("libswrAVX2.so");
+ } else if (util_cpu_caps.has_avx) {
+ fprintf(stderr, "AVX\n");
+ pLibrary = util_dl_open("libswrAVX.so");
+ } else {
+ fprintf(stderr, "no AVX/AVX2 support. Aborting!\n");
+ exit(-1);
+ }
+
+ if (!pLibrary) {
+ fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
+ exit(-1);
+ }
+
+ util_dl_proc pScreenProc = util_dl_get_proc_address(pLibrary, "swr_create_screen");
+
+ if (!pScreenProc) {
+ fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
+ exit(-1);
+ }
+
+ screen_create_proc pScreenCreate = (screen_create_proc)pScreenProc;
+
+ return pScreenCreate(winsys);
+}
diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h
new file mode 100644
index 0000000..d116781
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_memory.h
@@ -0,0 +1,99 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+void LoadHotTile(
+ SWR_SURFACE_STATE *pSrcSurface,
+ SWR_FORMAT dstFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x, UINT y, uint32_t renderTargetArrayIndex,
+ BYTE *pDstHotTile);
+
+void StoreHotTile(
+ SWR_SURFACE_STATE *pDstSurface,
+ SWR_FORMAT srcFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x, UINT y, uint32_t renderTargetArrayIndex,
+ BYTE *pSrcHotTile);
+
+void StoreHotTileClear(
+ SWR_SURFACE_STATE *pDstSurface,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x,
+ UINT y,
+ const float* pClearColor);
+
+INLINE void
+swr_LoadHotTile(HANDLE hPrivateContext,
+ SWR_FORMAT dstFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x, UINT y,
+ uint32_t renderTargetArrayIndex, BYTE* pDstHotTile)
+{
+ // Grab source surface state from private context
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+ SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex];
+
+ LoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
+}
+
+INLINE void
+swr_StoreHotTile(HANDLE hPrivateContext,
+ SWR_FORMAT srcFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x, UINT y,
+ uint32_t renderTargetArrayIndex, BYTE* pSrcHotTile)
+{
+ // Grab destination surface state from private context
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+ SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
+
+ StoreHotTile(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
+}
+
+INLINE void
+swr_StoreHotTileClear(HANDLE hPrivateContext,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ UINT x,
+ UINT y,
+ const float* pClearColor)
+{
+ // Grab destination surface state from private context
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+ SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
+
+ StoreHotTileClear(pDstSurface, renderTargetIndex, x, y, pClearColor);
+}
+
+void InitSimLoadTilesTable();
+void InitSimStoreTilesTable();
+void InitSimClearTilesTable();
+
+/* Init Load/Store/ClearTiles Tables */
+INLINE void swr_InitMemoryModule()
+{
+ InitSimLoadTilesTable();
+ InitSimStoreTilesTable();
+ InitSimClearTilesTable();
+}
diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h
new file mode 100644
index 0000000..0814c3b
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_public.h
@@ -0,0 +1,46 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_PUBLIC_H
+#define SWR_PUBLIC_H
+
+struct pipe_screen;
+struct sw_winsys;
+struct sw_displaytarget;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
+
+struct sw_winsys *swr_get_winsys(struct pipe_screen *pipe);
+
+struct sw_displaytarget *swr_get_displaytarget(struct pipe_resource *resource);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
new file mode 100644
index 0000000..2510b3a
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -0,0 +1,334 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "os/os_time.h"
+#include "swr_context.h"
+#include "swr_fence.h"
+#include "swr_query.h"
+#include "swr_screen.h"
+#include "swr_state.h"
+
+
+static struct swr_query *
+swr_query(struct pipe_query *p)
+{
+ return (struct swr_query *)p;
+}
+
+static struct pipe_query *
+swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
+{
+ struct swr_query *pq;
+
+ assert(type < PIPE_QUERY_TYPES);
+ assert(index < MAX_SO_STREAMS);
+
+ pq = CALLOC_STRUCT(swr_query);
+
+ if (pq) {
+ pq->type = type;
+ pq->index = index;
+ }
+
+ return (struct pipe_query *)pq;
+}
+
+
+static void
+swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct swr_query *pq = swr_query(q);
+
+ if (pq->fence) {
+ if (!swr_is_fence_done(swr_fence(pq->fence))) {
+ swr_fence_submit(swr_context(pipe), pq->fence);
+ swr_fence_finish(pipe->screen, pq->fence, 0);
+ }
+ swr_fence_reference(pipe->screen, &pq->fence, NULL);
+ }
+
+ FREE(pq);
+}
+
+
+// XXX Create a fence callback, rather than stalling SwrWaitForIdle
+static void
+swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ assert(pq->result);
+ union pipe_query_result *result = pq->result;
+ boolean enable_stats = pq->enable_stats;
+ SWR_STATS swr_stats = {0};
+
+ if (pq->fence) {
+ if (!swr_is_fence_done(swr_fence(pq->fence))) {
+ swr_fence_submit(ctx, pq->fence);
+ swr_fence_finish(pipe->screen, pq->fence, 0);
+ }
+ swr_fence_reference(pipe->screen, &pq->fence, NULL);
+ }
+
+ /*
+ * These queries don't need SWR Stats enabled in the core
+ * Set and return.
+ */
+ switch (pq->type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ result->u64 = swr_get_timestamp(pipe->screen);
+ return;
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* nothing to do here */
+ return;
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
+ vs LastRetiredId? */
+ return;
+ break;
+ default:
+ /* Any query that needs SwrCore stats */
+ break;
+ }
+
+ /*
+ * All other results are collected from SwrCore counters
+ */
+
+ /* XXX, Should turn this into a fence callback and skip the stall */
+ SwrGetStats(ctx->swrContext, &swr_stats);
+ /* SwrGetStats returns immediately, wait for collection */
+ SwrWaitForIdle(ctx->swrContext);
+
+ switch (pq->type) {
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ result->u64 = swr_stats.DepthPassCount;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ result->u64 = swr_stats.IaPrimitives;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
+ struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
+ so_stats->num_primitives_written =
+ swr_stats.SoNumPrimsWritten[pq->index];
+ so_stats->primitives_storage_needed =
+ swr_stats.SoPrimStorageNeeded[pq->index];
+ } break;
+ case PIPE_QUERY_PIPELINE_STATISTICS: {
+ struct pipe_query_data_pipeline_statistics *p_stats =
+ &result->pipeline_statistics;
+ p_stats->ia_vertices = swr_stats.IaVertices;
+ p_stats->ia_primitives = swr_stats.IaPrimitives;
+ p_stats->vs_invocations = swr_stats.VsInvocations;
+ p_stats->gs_invocations = swr_stats.GsInvocations;
+ p_stats->gs_primitives = swr_stats.GsPrimitives;
+ p_stats->c_invocations = swr_stats.CPrimitives;
+ p_stats->c_primitives = swr_stats.CPrimitives;
+ p_stats->ps_invocations = swr_stats.PsInvocations;
+ p_stats->hs_invocations = swr_stats.HsInvocations;
+ p_stats->ds_invocations = swr_stats.DsInvocations;
+ p_stats->cs_invocations = swr_stats.CsInvocations;
+ } break;
+ default:
+ assert(0 && "Unsupported query");
+ break;
+ }
+
+ /* Only change stat collection if there are no active queries */
+ if (ctx->active_queries == 0)
+ SwrEnableStats(ctx->swrContext, enable_stats);
+}
+
+
+static boolean
+swr_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_query *pq = swr_query(q);
+
+ if (pq->fence) {
+ if (!swr_is_fence_done(swr_fence(pq->fence))) {
+ swr_fence_submit(ctx, pq->fence);
+ if (!wait)
+ return FALSE;
+ swr_fence_finish(pipe->screen, pq->fence, 0);
+ }
+ swr_fence_reference(pipe->screen, &pq->fence, NULL);
+ }
+
+ /* XXX: Need to handle counter rollover */
+
+ switch (pq->type) {
+ /* Booleans */
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ result->b = pq->end.b;
+ break;
+ /* Counters */
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ result->u64 = pq->end.u64 - pq->start.u64;
+ break;
+ /* Structures */
+ case PIPE_QUERY_SO_STATISTICS: {
+ struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
+ struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
+ struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+ so_stats->num_primitives_written =
+ end->num_primitives_written - start->num_primitives_written;
+ so_stats->primitives_storage_needed =
+ end->primitives_storage_needed - start->primitives_storage_needed;
+ } break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+ /* os_get_time_nano returns nanoseconds */
+ result->timestamp_disjoint.frequency = UINT64_C(1000000000);
+ result->timestamp_disjoint.disjoint = FALSE;
+ } break;
+ case PIPE_QUERY_PIPELINE_STATISTICS: {
+ struct pipe_query_data_pipeline_statistics *p_stats =
+ &result->pipeline_statistics;
+ struct pipe_query_data_pipeline_statistics *start =
+ &pq->start.pipeline_statistics;
+ struct pipe_query_data_pipeline_statistics *end =
+ &pq->end.pipeline_statistics;
+ p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
+ p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
+ p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
+ p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
+ p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
+ p_stats->c_invocations = end->c_invocations - start->c_invocations;
+ p_stats->c_primitives = end->c_primitives - start->c_primitives;
+ p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
+ p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
+ p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
+ p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
+ } break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
+ struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
+ struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+ uint64_t num_primitives_written =
+ end->num_primitives_written - start->num_primitives_written;
+ uint64_t primitives_storage_needed =
+ end->primitives_storage_needed - start->primitives_storage_needed;
+ result->b = num_primitives_written > primitives_storage_needed;
+ } break;
+ default:
+ assert(0 && "Unsupported query");
+ break;
+ }
+
+ return TRUE;
+}
+
+static boolean
+swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_query *pq = swr_query(q);
+
+ /* Initialize Results */
+ memset(&pq->start, 0, sizeof(pq->start));
+ memset(&pq->end, 0, sizeof(pq->end));
+
+ /* Gather start stats and enable SwrCore counters */
+ pq->result = &pq->start;
+ pq->enable_stats = TRUE;
+ swr_gather_stats(pipe, pq);
+ ctx->active_queries++;
+
+ /* override start timestamp to 0 for TIMESTAMP query */
+ if (pq->type == PIPE_QUERY_TIMESTAMP)
+ pq->start.u64 = 0;
+
+ return true;
+}
+
+static void
+swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_query *pq = swr_query(q);
+
+ assert(ctx->active_queries
+ && "swr_end_query, there are no active queries!");
+ ctx->active_queries--;
+
+ /* Gather end stats and disable SwrCore counters */
+ pq->result = &pq->end;
+ pq->enable_stats = FALSE;
+ swr_gather_stats(pipe, pq);
+}
+
+
+boolean
+swr_check_render_cond(struct pipe_context *pipe)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ boolean b, wait;
+ uint64_t result;
+
+ if (!ctx->render_cond_query)
+ return TRUE; /* no query predicate, draw normally */
+
+ wait = (ctx->render_cond_mode == PIPE_RENDER_COND_WAIT
+ || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+ b = pipe->get_query_result(
+ pipe, ctx->render_cond_query, wait, (union pipe_query_result *)&result);
+ if (b)
+ return (!result == ctx->render_cond_cond);
+ else
+ return TRUE;
+}
+
+void
+swr_query_init(struct pipe_context *pipe)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ pipe->create_query = swr_create_query;
+ pipe->destroy_query = swr_destroy_query;
+ pipe->begin_query = swr_begin_query;
+ pipe->end_query = swr_end_query;
+ pipe->get_query_result = swr_get_query_result;
+
+ ctx->active_queries = 0;
+}
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
new file mode 100644
index 0000000..836d07b
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -0,0 +1,46 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_QUERY_H
+#define SWR_QUERY_H
+
+
+#include <limits.h>
+
+struct swr_query {
+ unsigned type; /* PIPE_QUERY_* */
+ unsigned index;
+
+ union pipe_query_result *result;
+ union pipe_query_result start;
+ union pipe_query_result end;
+
+ struct pipe_fence_handle *fence;
+
+ boolean enable_stats;
+};
+
+extern void swr_query_init(struct pipe_context *pipe);
+
+extern boolean swr_check_render_cond(struct pipe_context *pipe);
+#endif
diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
new file mode 100644
index 0000000..87a27ac
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_resource.h
@@ -0,0 +1,97 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_RESOURCE_H
+#define SWR_RESOURCE_H
+
+#include "pipe/p_state.h"
+#include "api.h"
+
+struct sw_displaytarget;
+
+struct swr_resource {
+ struct pipe_resource base;
+
+ bool has_depth;
+ bool has_stencil;
+
+ UINT alignedWidth;
+ UINT alignedHeight;
+
+ SWR_SURFACE_STATE swr;
+ SWR_SURFACE_STATE secondary; // for faking depth/stencil merged formats
+
+ struct sw_displaytarget *display_target;
+
+ unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Opaque pointer to swr_context to mark resource in use */
+ void *bound_to_context;
+};
+
+
+static INLINE struct swr_resource *
+swr_resource(struct pipe_resource *resource)
+{
+ return (struct swr_resource *)resource;
+}
+
+static INLINE boolean
+swr_resource_is_texture(const struct pipe_resource *resource)
+{
+ switch (resource->target) {
+ case PIPE_BUFFER:
+ return FALSE;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return TRUE;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+
+static INLINE void *
+swr_resource_data(struct pipe_resource *resource)
+{
+ struct swr_resource *swr_r = swr_resource(resource);
+
+ assert(!swr_resource_is_texture(resource));
+
+ return swr_r->swr.pBaseAddress;
+}
+
+
+void swr_store_render_target(struct swr_context *ctx,
+ uint32_t attachment,
+ enum SWR_TILE_STATE post_tile_state);
+#endif
diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp
new file mode 100644
index 0000000..e6c448c
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_scratch.cpp
@@ -0,0 +1,116 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_memory.h"
+#include "swr_context.h"
+#include "swr_scratch.h"
+#include "api.h"
+
+
+void *
+swr_copy_to_scratch_space(struct swr_context *ctx,
+ struct swr_scratch_space *space,
+ const void *user_buffer,
+ unsigned int size)
+{
+ void *ptr;
+ assert(space);
+ assert(user_buffer);
+ assert(size);
+
+ if (size >= 2048) { /* XXX TODO create KNOB_ for this */
+ /* Use per draw SwrAllocDrawContextMemory for larger copies */
+ ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4);
+ } else {
+ /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
+ unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT;
+
+ /* Need to grow space */
+ if (max_size_in_flight > space->current_size) {
+ /* Must idle the pipeline, this is infrequent */
+ SwrWaitForIdle(ctx->swrContext);
+
+ space->current_size = max_size_in_flight;
+
+ if (space->base) {
+ align_free(space->base);
+ space->base = NULL;
+ }
+
+ if (!space->base) {
+ space->base = (BYTE *)align_malloc(space->current_size, 4);
+ space->head = (void *)space->base;
+ }
+ }
+
+ /* Wrap */
+ if (((BYTE *)space->head + size)
+ >= ((BYTE *)space->base + space->current_size)) {
+ /*
+ * TODO XXX: Should add a fence on wrap. Assumption is that
+ * current_space >> size, and there are at least MAX_DRAWS_IN_FLIGHT
+ * draws in scratch. So fence would always be met on wrap. A fence
+ * would ensure that first frame in buffer is done before wrapping.
+ * If fence ever needs to be waited on, can increase buffer size.
+ * So far in testing, this hasn't been necessary.
+ */
+ space->head = space->base;
+ }
+
+ ptr = space->head;
+ space->head = (BYTE *)space->head + size;
+ }
+
+ /* Copy user_buffer to scratch */
+ memcpy(ptr, user_buffer, size);
+
+ return ptr;
+}
+
+
+void
+swr_init_scratch_buffers(struct swr_context *ctx)
+{
+ struct swr_scratch_buffers *scratch;
+
+ scratch = CALLOC_STRUCT(swr_scratch_buffers);
+ ctx->scratch = scratch;
+}
+
+void
+swr_destroy_scratch_buffers(struct swr_context *ctx)
+{
+ struct swr_scratch_buffers *scratch = ctx->scratch;
+
+ if (scratch) {
+ if (scratch->vs_constants.base)
+ align_free(scratch->vs_constants.base);
+ if (scratch->fs_constants.base)
+ align_free(scratch->fs_constants.base);
+ if (scratch->vertex_buffer.base)
+ align_free(scratch->vertex_buffer.base);
+ if (scratch->index_buffer.base)
+ align_free(scratch->index_buffer.base);
+ FREE(scratch);
+ }
+}
diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h
new file mode 100644
index 0000000..74218d6
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_scratch.h
@@ -0,0 +1,63 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_SCRATCH_H
+#define SWR_SCRATCH_H
+
+struct swr_scratch_space {
+ void *head;
+ unsigned int current_size;
+ /* TODO XXX: Add a fence for wrap condition. */
+
+ void *base;
+};
+
+struct swr_scratch_buffers {
+ struct swr_scratch_space vs_constants;
+ struct swr_scratch_space fs_constants;
+ struct swr_scratch_space vertex_buffer;
+ struct swr_scratch_space index_buffer;
+};
+
+
+/*
+ * swr_copy_to_scratch_space
+ * Copies size bytes of user_buffer into the scratch ring buffer.
+ * Used to store temporary data such as client arrays and constants.
+ *
+ * Inputs:
+ * space ptr to scratch pool (vs_constants, fs_constants)
+ * user_buffer, data to copy into scratch space
+ * size to be copied
+ * Returns:
+ * pointer to data copied to scratch space.
+ */
+void *swr_copy_to_scratch_space(struct swr_context *ctx,
+ struct swr_scratch_space *space,
+ const void *user_buffer,
+ unsigned int size);
+
+void swr_init_scratch_buffers(struct swr_context *ctx);
+void swr_destroy_scratch_buffers(struct swr_context *ctx);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
new file mode 100644
index 0000000..f0d48cd
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -0,0 +1,746 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
+
+#include "state_tracker/sw_winsys.h"
+
+extern "C" {
+#include "gallivm/lp_bld_limits.h"
+}
+
+#include "swr_public.h"
+#include "swr_screen.h"
+#include "swr_context.h"
+#include "swr_resource.h"
+#include "swr_fence.h"
+#include "gen_knobs.h"
+
+#include "jit_api.h"
+
+#include <stdio.h>
+
+/* MSVC case instensitive compare */
+#if defined(PIPE_CC_MSVC)
+ #define strcasecmp lstrcmpiA
+#endif
+
+/*
+ * Max texture sizes
+ * XXX Check max texture size values against core and sampler.
+ */
+#define SWR_MAX_TEXTURE_SIZE (4 * 1048 * 1048 * 1024ULL) /* 4GB */
+#define SWR_MAX_TEXTURE_2D_LEVELS 14 /* 8K x 8K for now */
+#define SWR_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */
+#define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */
+#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
+
+static const char *
+swr_get_name(struct pipe_screen *screen)
+{
+ return "SWR";
+}
+
+static const char *
+swr_get_vendor(struct pipe_screen *screen)
+{
+ return "Intel Corporation";
+}
+
+static boolean
+swr_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bind)
+{
+ struct sw_winsys *winsys = swr_screen(screen)->winsys;
+ const struct util_format_description *format_desc;
+
+ assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D
+ || target == PIPE_TEXTURE_1D_ARRAY
+ || target == PIPE_TEXTURE_2D
+ || target == PIPE_TEXTURE_2D_ARRAY
+ || target == PIPE_TEXTURE_RECT
+ || target == PIPE_TEXTURE_3D
+ || target == PIPE_TEXTURE_CUBE
+ || target == PIPE_TEXTURE_CUBE_ARRAY);
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ if (sample_count > 1)
+ return FALSE;
+
+ if (bind
+ & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
+ if (!winsys->is_displaytarget_format_supported(winsys, bind, format))
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
+ return FALSE;
+
+ /*
+ * Although possible, it is unnatural to render into compressed or YUV
+ * surfaces. So disable these here to avoid going into weird paths
+ * inside the state trackers.
+ */
+ if (format_desc->block.width != 1 || format_desc->block.height != 1)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static int
+swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return PIPE_MAX_COLOR_BUFS;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return SWR_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return SWR_MAX_TEXTURE_3D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return SWR_MAX_TEXTURE_CUBE_LEVELS;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ return 0; // Don't support lower left frag coord.
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 1;
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return MAX_SO_STREAMS;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return MAX_ATTRIBUTES;
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 1024;
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 1;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return SWR_MAX_TEXTURE_ARRAY_LAYERS;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ return 1;
+ case PIPE_CAP_TEXTURE_BARRIER:
+ return 0;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */
+ return 1;
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 330;
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return 0;
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 0;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 1;
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return 0;
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 0;
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 0;
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_NATIVE;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ return 0;
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ return 1;
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ return 0;
+ case PIPE_CAP_FAKE_SW_MSAA:
+ return 1;
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 0;
+ case PIPE_CAP_DRAW_INDIRECT:
+ return 1;
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 0;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ /* XXX: Do we want to return the full amount of system memory ? */
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int)(system_memory >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return 1;
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ return 1;
+ case PIPE_CAP_CLIP_HALFZ:
+ return 1;
+ case PIPE_CAP_VERTEXID_NOBASE:
+ return 0;
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ return 1;
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ return 0;
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ return 0; // xxx
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return 0;
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return 0;
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ return 0; // xxx
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ return 1;
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ return 0;
+ }
+
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAP %d query\n", param);
+ return 0;
+}
+
+static int
+swr_get_shader_param(struct pipe_screen *screen,
+ unsigned shader,
+ enum pipe_shader_cap param)
+{
+ if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT)
+ return gallivm_get_shader_param(param);
+
+ // Todo: geometry, tesselation, compute
+ return 0;
+}
+
+
+static float
+swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 0.0;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+ return 0.0;
+}
+
+SWR_FORMAT
+mesa_to_swr_format(enum pipe_format format)
+{
+ const struct util_format_description *format_desc =
+ util_format_description(format);
+ if (!format_desc)
+ return (SWR_FORMAT)-1;
+
+ // more robust check would be comparing all attributes of the formats
+ // luckily format names are mostly standardized
+ for (int i = 0; i < NUM_SWR_FORMATS; i++) {
+ const SWR_FORMAT_INFO &swr_desc = GetFormatInfo((SWR_FORMAT)i);
+
+ if (!strcasecmp(format_desc->short_name, swr_desc.name))
+ return (SWR_FORMAT)i;
+ }
+
+ // ... with some exceptions
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ return R8G8B8A8_UNORM_SRGB;
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ return B8G8R8A8_UNORM_SRGB;
+ case PIPE_FORMAT_I8_UNORM:
+ return R8_UNORM;
+ case PIPE_FORMAT_Z16_UNORM:
+ return R16_UNORM;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return R24_UNORM_X8_TYPELESS;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return R32_FLOAT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return R32_FLOAT_X8X24_TYPELESS;
+ case PIPE_FORMAT_L8A8_UNORM:
+ return R8G8_UNORM;
+ default:
+ break;
+ }
+
+ debug_printf("asked to convert unsupported format %s\n",
+ format_desc->name);
+ return (SWR_FORMAT)-1;
+}
+
+static boolean
+swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
+{
+ struct sw_winsys *winsys = screen->winsys;
+ struct sw_displaytarget *dt;
+
+ UINT stride;
+ dt = winsys->displaytarget_create(winsys,
+ res->base.bind,
+ res->base.format,
+ res->alignedWidth,
+ res->alignedHeight,
+ 64, NULL,
+ &stride);
+
+ if (dt == NULL)
+ return FALSE;
+
+ void *map = winsys->displaytarget_map(winsys, dt, 0);
+
+ res->display_target = dt;
+ res->swr.pBaseAddress = (uint8_t*) map;
+
+ /* Clear the display target surface */
+ if (map)
+ memset(map, 0, res->alignedHeight * stride);
+
+ winsys->displaytarget_unmap(winsys, dt);
+
+ return TRUE;
+}
+
+static boolean
+swr_texture_layout(struct swr_screen *screen,
+ struct swr_resource *res,
+ boolean allocate)
+{
+ struct pipe_resource *pt = &res->base;
+
+ pipe_format fmt = pt->format;
+ const struct util_format_description *desc = util_format_description(fmt);
+
+ res->has_depth = util_format_has_depth(desc);
+ res->has_stencil = util_format_has_stencil(desc);
+
+ if (res->has_stencil && !res->has_depth)
+ fmt = PIPE_FORMAT_R8_UINT;
+
+ res->swr.width = pt->width0;
+ res->swr.height = pt->height0;
+ res->swr.depth = pt->depth0;
+ res->swr.type = swr_convert_target_type(pt->target);
+ res->swr.tileMode = SWR_TILE_NONE;
+ res->swr.format = mesa_to_swr_format(fmt);
+ res->swr.numSamples = (1 << pt->nr_samples);
+
+ SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format);
+
+ unsigned total_size = 0;
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ unsigned layers = pt->array_size;
+
+ for (int level = 0; level <= pt->last_level; level++) {
+ unsigned alignedWidth, alignedHeight;
+ unsigned num_slices;
+
+ if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
+ alignedWidth = align(width, KNOB_MACROTILE_X_DIM);
+ alignedHeight = align(height, KNOB_MACROTILE_Y_DIM);
+ } else {
+ alignedWidth = width;
+ alignedHeight = height;
+ }
+
+ if (level == 0) {
+ res->alignedWidth = alignedWidth;
+ res->alignedHeight = alignedHeight;
+ }
+
+ res->row_stride[level] = alignedWidth * finfo.Bpp;
+ res->img_stride[level] = res->row_stride[level] * alignedHeight;
+ res->mip_offsets[level] = total_size;
+
+ if (pt->target == PIPE_TEXTURE_3D)
+ num_slices = depth;
+ else if (pt->target == PIPE_TEXTURE_1D_ARRAY
+ || pt->target == PIPE_TEXTURE_2D_ARRAY
+ || pt->target == PIPE_TEXTURE_CUBE
+ || pt->target == PIPE_TEXTURE_CUBE_ARRAY)
+ num_slices = layers;
+ else
+ num_slices = 1;
+
+ total_size += res->img_stride[level] * num_slices;
+ if (total_size > SWR_MAX_TEXTURE_SIZE)
+ return FALSE;
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ res->swr.halign = res->alignedWidth;
+ res->swr.valign = res->alignedHeight;
+ res->swr.pitch = res->row_stride[0];
+
+ if (allocate) {
+ res->swr.pBaseAddress = (BYTE *)_aligned_malloc(total_size, 64);
+
+ if (res->has_depth && res->has_stencil) {
+ SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format);
+ res->secondary.width = pt->width0;
+ res->secondary.height = pt->height0;
+ res->secondary.depth = pt->depth0;
+ res->secondary.type = SURFACE_2D;
+ res->secondary.tileMode = SWR_TILE_NONE;
+ res->secondary.format = R8_UINT;
+ res->secondary.numSamples = (1 << pt->nr_samples);
+ res->secondary.pitch = res->alignedWidth * finfo.Bpp;
+
+ res->secondary.pBaseAddress = (BYTE *)_aligned_malloc(
+ res->alignedHeight * res->secondary.pitch, 64);
+ }
+ }
+
+ return TRUE;
+}
+
+static boolean
+swr_can_create_resource(struct pipe_screen *screen,
+ const struct pipe_resource *templat)
+{
+ struct swr_resource res;
+ memset(&res, 0, sizeof(res));
+ res.base = *templat;
+ return swr_texture_layout(swr_screen(screen), &res, false);
+}
+
+static struct pipe_resource *
+swr_resource_create(struct pipe_screen *_screen,
+ const struct pipe_resource *templat)
+{
+ struct swr_screen *screen = swr_screen(_screen);
+ struct swr_resource *res = CALLOC_STRUCT(swr_resource);
+ if (!res)
+ return NULL;
+
+ res->base = *templat;
+ pipe_reference_init(&res->base.reference, 1);
+ res->base.screen = &screen->base;
+
+ if (swr_resource_is_texture(&res->base)) {
+ if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
+ | PIPE_BIND_SHARED)) {
+ /* displayable surface
+ * first call swr_texture_layout without allocating to finish
+ * filling out the SWR_SURFAE_STATE in res */
+ swr_texture_layout(screen, res, false);
+ if (!swr_displaytarget_layout(screen, res))
+ goto fail;
+ } else {
+ /* texture map */
+ if (!swr_texture_layout(screen, res, true))
+ goto fail;
+ }
+ } else {
+ /* other data (vertex buffer, const buffer, etc) */
+ assert(util_format_get_blocksize(templat->format) == 1);
+ assert(templat->height0 == 1);
+ assert(templat->depth0 == 1);
+ assert(templat->last_level == 0);
+
+ /* Easiest to just call swr_texture_layout, as it sets up
+ * SWR_SURFAE_STATE in res */
+ if (!swr_texture_layout(screen, res, true))
+ goto fail;
+ }
+
+ return &res->base;
+
+fail:
+ FREE(res);
+ return NULL;
+}
+
+static void
+swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
+{
+ struct swr_screen *screen = swr_screen(p_screen);
+ struct swr_resource *res = swr_resource(pt);
+
+ /*
+ * If this resource is attached to a context it may still be in use, check
+ * dependencies before freeing
+ * XXX TODO: don't use SwrWaitForIdle, use fences and come up with a real
+ * resource manager.
+ * XXX It's happened that we get a swr_destroy prior to freeing the
+ * framebuffer resource. Don't wait on it.
+ */
+ if (res->bound_to_context && !res->display_target) {
+ struct swr_context *ctx =
+ swr_context((pipe_context *)res->bound_to_context);
+ // XXX, don't SwrWaitForIdle!!! Use a fence.
+ SwrWaitForIdle(ctx->swrContext);
+ }
+
+ /*
+ * Free resource primary surface. If resource is display target, winsys
+ * manages the buffer and will free it on displaytarget_destroy.
+ */
+ if (res->display_target) {
+ /* display target */
+ struct sw_winsys *winsys = screen->winsys;
+ winsys->displaytarget_destroy(winsys, res->display_target);
+ } else
+ _aligned_free(res->swr.pBaseAddress);
+
+ _aligned_free(res->secondary.pBaseAddress);
+
+ FREE(res);
+}
+
+
+static void
+swr_flush_frontbuffer(struct pipe_screen *p_screen,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned layer,
+ void *context_private,
+ struct pipe_box *sub_box)
+{
+ struct swr_screen *screen = swr_screen(p_screen);
+ struct sw_winsys *winsys = screen->winsys;
+ struct swr_resource *res = swr_resource(resource);
+
+ /* Ensure fence set at flush is finished, before reading frame buffer */
+ swr_fence_finish(p_screen, screen->flush_fence, 0);
+
+ SwrEndFrame(swr_context((pipe_context *)res->bound_to_context));
+
+ assert(res->display_target);
+ if (res->display_target)
+ winsys->displaytarget_display(
+ winsys, res->display_target, context_private, sub_box);
+}
+
+
+static void
+swr_destroy_screen(struct pipe_screen *p_screen)
+{
+ struct swr_screen *screen = swr_screen(p_screen);
+ struct sw_winsys *winsys = screen->winsys;
+
+ fprintf(stderr, "SWR destroy screen!\n");
+
+ swr_fence_finish(p_screen, screen->flush_fence, 0);
+ swr_fence_reference(p_screen, &screen->flush_fence, NULL);
+
+ JitDestroyContext(screen->hJitMgr);
+
+ if (winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+PUBLIC
+struct pipe_screen *
+swr_create_screen(struct sw_winsys *winsys)
+{
+ struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
+
+ if (!screen)
+ return NULL;
+
+ if (!getenv("KNOB_MAX_PRIMS_PER_DRAW")) {
+ g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152);
+ }
+
+ screen->winsys = winsys;
+ screen->base.get_name = swr_get_name;
+ screen->base.get_vendor = swr_get_vendor;
+ screen->base.is_format_supported = swr_is_format_supported;
+ screen->base.context_create = swr_create_context;
+ screen->base.can_create_resource = swr_can_create_resource;
+
+ screen->base.destroy = swr_destroy_screen;
+ screen->base.get_param = swr_get_param;
+ screen->base.get_shader_param = swr_get_shader_param;
+ screen->base.get_paramf = swr_get_paramf;
+
+ screen->base.resource_create = swr_resource_create;
+ screen->base.resource_destroy = swr_resource_destroy;
+
+ screen->base.flush_frontbuffer = swr_flush_frontbuffer;
+
+ screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, KNOB_ARCH_STR);
+
+ swr_fence_init(&screen->base);
+
+ return &screen->base;
+}
+
+struct sw_winsys *
+swr_get_winsys(struct pipe_screen *pipe)
+{
+ return ((struct swr_screen *)pipe)->winsys;
+}
+
+struct sw_displaytarget *
+swr_get_displaytarget(struct pipe_resource *resource)
+{
+ return ((struct swr_resource *)resource)->display_target;
+}
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
new file mode 100644
index 0000000..a96dc44
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -0,0 +1,52 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_SCREEN_H
+#define SWR_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "api.h"
+
+struct sw_winsys;
+
+struct swr_screen {
+ struct pipe_screen base;
+
+ struct pipe_fence_handle *flush_fence;
+
+ struct sw_winsys *winsys;
+
+ HANDLE hJitMgr;
+};
+
+static INLINE struct swr_screen *
+swr_screen(struct pipe_screen *pipe)
+{
+ return (struct swr_screen *)pipe;
+}
+
+SWR_FORMAT
+mesa_to_swr_format(enum pipe_format format);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
new file mode 100644
index 0000000..ff16d0f
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -0,0 +1,591 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "JitManager.h"
+#include "state.h"
+#include "state_llvm.h"
+#include "builder.h"
+
+#include "llvm-c/Core.h"
+#include "llvm/Support/CBindingWrapping.h"
+
+#include "tgsi/tgsi_strings.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "swr_context.h"
+#include "swr_context_llvm.h"
+#include "swr_state.h"
+#include "swr_screen.h"
+
+bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs)
+{
+ return !memcmp(&lhs, &rhs, sizeof(lhs));
+}
+
+void
+swr_generate_fs_key(struct swr_jit_key &key,
+ struct swr_context *ctx,
+ swr_fragment_shader *swr_fs)
+{
+ key.nr_cbufs = ctx->framebuffer.nr_cbufs;
+ key.light_twoside = ctx->rasterizer->light_twoside;
+ memcpy(&key.vs_output_semantic_name,
+ &ctx->vs->info.base.output_semantic_name,
+ sizeof(key.vs_output_semantic_name));
+ memcpy(&key.vs_output_semantic_idx,
+ &ctx->vs->info.base.output_semantic_index,
+ sizeof(key.vs_output_semantic_idx));
+
+ key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ for (unsigned i = 0; i < key.nr_samplers; i++) {
+ if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_sampler_state(
+ &key.sampler[i].sampler_state,
+ ctx->samplers[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+
+ /*
+ * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
+ * are dx10-style? Can't really have mixed opcodes, at least not
+ * if we want to skip the holes here (without rescanning tgsi).
+ */
+ if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+ key.nr_sampler_views =
+ swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+ for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+ if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+ lp_sampler_static_texture_state(
+ &key.sampler[i].texture_state,
+ ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+ } else {
+ key.nr_sampler_views = key.nr_samplers;
+ for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+ if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_texture_state(
+ &key.sampler[i].texture_state,
+ ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+ }
+}
+
+struct BuilderSWR : public Builder {
+ BuilderSWR(JitManager *pJitMgr)
+ : Builder(pJitMgr)
+ {
+ pJitMgr->SetupNewModule();
+ }
+
+ PFN_VERTEX_FUNC
+ CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
+ PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key);
+};
+
+PFN_VERTEX_FUNC
+BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+{
+ swr_vs->linkageMask = 0;
+
+ for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
+ switch (swr_vs->info.base.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ default:
+ swr_vs->linkageMask |= (1 << i);
+ break;
+ }
+ }
+
+ // tgsi_dump(swr_vs->pipe.tokens, 0);
+
+ struct gallivm_state *gallivm =
+ gallivm_create("VS", wrap(&JM()->mContext));
+ gallivm->module = wrap(JM()->mpCurrentModule);
+
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+ memset(outputs, 0, sizeof(outputs));
+
+ AttrBuilder attrBuilder;
+ attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+ AttributeSet attrSet = AttributeSet::get(
+ JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+
+ std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+ PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
+ FunctionType *vsFuncType =
+ FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
+
+ // create new vertex shader function
+ auto pFunction = Function::Create(vsFuncType,
+ GlobalValue::ExternalLinkage,
+ "VS",
+ JM()->mpCurrentModule);
+ pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+
+ BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+ IRB()->SetInsertPoint(block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+ auto argitr = pFunction->arg_begin();
+ Value *hPrivateData = &*argitr++;
+ hPrivateData->setName("hPrivateData");
+ Value *pVsCtx = &*argitr++;
+ pVsCtx->setName("vsCtx");
+
+ Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
+
+ consts_ptr->setName("vs_constants");
+ Value *const_sizes_ptr =
+ GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
+ const_sizes_ptr->setName("num_vs_constants");
+
+ Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
+
+ for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
+ const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
+ for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+ if (mask & (1 << channel)) {
+ inputs[attrib][channel] =
+ wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
+ }
+ }
+ }
+
+ struct lp_bld_tgsi_system_values system_values;
+ memset(&system_values, 0, sizeof(system_values));
+ system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
+ system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
+
+ lp_build_tgsi_soa(gallivm,
+ swr_vs->pipe.tokens,
+ lp_type_float_vec(32, 32 * 8),
+ NULL, // mask
+ wrap(consts_ptr),
+ wrap(const_sizes_ptr),
+ &system_values,
+ inputs,
+ outputs,
+ NULL, // wrap(hPrivateData), (sampler context)
+ NULL, // thread data
+ NULL, // sampler
+ &swr_vs->info.base,
+ NULL); // geometry shader face
+
+ IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+ Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
+
+ for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+ for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
+ if (!outputs[attrib][channel])
+ continue;
+
+ Value *val = LOAD(unwrap(outputs[attrib][channel]));
+
+ uint32_t outSlot = attrib;
+ if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
+ outSlot = VERTEX_POINT_SIZE_SLOT;
+ STORE(val, vtxOutput, {0, 0, outSlot, channel});
+ }
+ }
+
+ RET_VOID();
+
+ gallivm_verify_function(gallivm, wrap(pFunction));
+ gallivm_compile_module(gallivm);
+
+ // lp_debug_dump_value(func);
+
+ PFN_VERTEX_FUNC pFunc =
+ (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
+
+ debug_printf("vert shader %p\n", pFunc);
+ assert(pFunc && "Error: VertShader = NULL");
+
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
+ JM()->mIsModuleFinalized = true;
+#endif
+
+ return pFunc;
+}
+
+PFN_VERTEX_FUNC
+swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+{
+ BuilderSWR builder(
+ reinterpret_cast<JitManager *>(swr_screen(ctx->screen)->hJitMgr));
+ return builder.CompileVS(ctx, swr_vs);
+}
+
+static unsigned
+locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
+{
+ for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+ if ((info->output_semantic_name[i] == name)
+ && (info->output_semantic_index[i] == index)) {
+ return i - 1; // position is not part of the linkage
+ }
+ }
+
+ if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback
+ for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+ if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR)
+ && (info->output_semantic_index[i] == index)) {
+ return i - 1; // position is not part of the linkage
+ }
+ }
+ }
+
+ return 0xFFFFFFFF;
+}
+
+PFN_PIXEL_KERNEL
+BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
+{
+ struct swr_fragment_shader *swr_fs = ctx->fs;
+
+ // tgsi_dump(swr_fs->pipe.tokens, 0);
+
+ struct gallivm_state *gallivm =
+ gallivm_create("FS", wrap(&JM()->mContext));
+ gallivm->module = wrap(JM()->mpCurrentModule);
+
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+ memset(inputs, 0, sizeof(inputs));
+ memset(outputs, 0, sizeof(outputs));
+
+ struct lp_build_sampler_soa *sampler = NULL;
+
+ AttrBuilder attrBuilder;
+ attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+ AttributeSet attrSet = AttributeSet::get(
+ JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+
+ std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+ PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
+ FunctionType *funcType =
+ FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
+
+ auto pFunction = Function::Create(funcType,
+ GlobalValue::ExternalLinkage,
+ "FS",
+ JM()->mpCurrentModule);
+ pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+
+ BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+ IRB()->SetInsertPoint(block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+ auto args = pFunction->arg_begin();
+ Value *hPrivateData = &*args++;
+ hPrivateData->setName("hPrivateData");
+ Value *pPS = &*args++;
+ pPS->setName("psCtx");
+
+ Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
+ consts_ptr->setName("fs_constants");
+ Value *const_sizes_ptr =
+ GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
+ const_sizes_ptr->setName("num_fs_constants");
+
+ // xxx should check for flat shading versus interpolation
+
+
+ // load *pAttribs, *pPerspAttribs
+ Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
+ Value *pPerspAttribs =
+ LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
+
+ swr_fs->constantMask = 0;
+ swr_fs->pointSpriteMask = 0;
+
+ for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
+ const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
+ const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
+ const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
+
+ if (!mask)
+ continue;
+
+ // load i,j
+ Value *vi = nullptr, *vj = nullptr;
+ switch (interpLoc) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
+ vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
+ vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
+ vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
+ break;
+ }
+
+ // load/compute w
+ Value *vw = nullptr, *pAttribs;
+ if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) {
+ pAttribs = pPerspAttribs;
+ switch (interpLoc) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
+ break;
+ }
+ } else {
+ pAttribs = pRawAttribs;
+ vw = VIMMED1(1.f);
+ }
+
+ vw->setName("w");
+
+ ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
+ ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
+
+ if (semantic_name == TGSI_SEMANTIC_FACE) {
+ Value *ff =
+ UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
+ ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
+ ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
+
+ inputs[attrib][0] = wrap(ff);
+ inputs[attrib][1] = wrap(VIMMED1(0.0f));
+ inputs[attrib][2] = wrap(VIMMED1(0.0f));
+ inputs[attrib][3] = wrap(VIMMED1(1.0f));
+ continue;
+ } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
+ inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
+ inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
+ inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
+ inputs[attrib][3] =
+ wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
+ continue;
+ } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+ Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
+ inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
+ inputs[attrib][1] = wrap(VIMMED1(0));
+ inputs[attrib][2] = wrap(VIMMED1(0));
+ inputs[attrib][3] = wrap(VIMMED1(0));
+ continue;
+ }
+
+ unsigned linkedAttrib =
+ locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+ if (linkedAttrib == 0xFFFFFFFF) {
+ // not found - check for point sprite
+ if (ctx->rasterizer->sprite_coord_enable) {
+ linkedAttrib = ctx->vs->info.base.num_outputs - 1;
+ swr_fs->pointSpriteMask |= (1 << linkedAttrib);
+ } else {
+ fprintf(stderr,
+ "Missing %s[%d]\n",
+ tgsi_semantic_names[semantic_name],
+ semantic_idx);
+ assert(0 && "attribute linkage not found");
+ }
+ }
+
+ if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+ swr_fs->constantMask |= 1 << linkedAttrib;
+ }
+
+ for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+ if (mask & (1 << channel)) {
+ Value *indexA = C(linkedAttrib * 12 + channel);
+ Value *indexB = C(linkedAttrib * 12 + channel + 4);
+ Value *indexC = C(linkedAttrib * 12 + channel + 8);
+
+ if ((semantic_name == TGSI_SEMANTIC_COLOR)
+ && ctx->rasterizer->light_twoside) {
+ unsigned bcolorAttrib = locate_linkage(
+ TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base);
+
+ unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
+
+ Value *back =
+ XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
+
+ Value *offset = MUL(back, C(diff));
+ offset->setName("offset");
+
+ indexA = ADD(indexA, offset);
+ indexB = ADD(indexB, offset);
+ indexC = ADD(indexC, offset);
+
+ if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+ swr_fs->constantMask |= 1 << bcolorAttrib;
+ }
+ }
+
+ Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
+ Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
+ Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
+
+ if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+ inputs[attrib][channel] = wrap(va);
+ } else {
+ Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
+
+ vc = FMUL(vk, vc);
+
+ Value *interp = FMUL(va, vi);
+ Value *interp1 = FMUL(vb, vj);
+ interp = FADD(interp, interp1);
+ interp = FADD(interp, vc);
+ if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE)
+ interp = FMUL(interp, vw);
+ inputs[attrib][channel] = wrap(interp);
+ }
+ }
+ }
+ }
+
+ sampler = swr_sampler_soa_create(key.sampler);
+
+ struct lp_bld_tgsi_system_values system_values;
+ memset(&system_values, 0, sizeof(system_values));
+
+ struct lp_build_mask_context mask;
+
+ if (swr_fs->info.base.uses_kill) {
+ Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
+ lp_build_mask_begin(
+ &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+ }
+
+ lp_build_tgsi_soa(gallivm,
+ swr_fs->pipe.tokens,
+ lp_type_float_vec(32, 32 * 8),
+ swr_fs->info.base.uses_kill ? &mask : NULL, // mask
+ wrap(consts_ptr),
+ wrap(const_sizes_ptr),
+ &system_values,
+ inputs,
+ outputs,
+ wrap(hPrivateData),
+ NULL, // thread data
+ sampler, // sampler
+ &swr_fs->info.base,
+ NULL); // geometry shader face
+
+ IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+ for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
+ attrib++) {
+ switch (swr_fs->info.base.output_semantic_name[attrib]) {
+ case TGSI_SEMANTIC_POSITION: {
+ // write z
+ LLVMValueRef outZ =
+ LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
+ STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
+ break;
+ }
+ case TGSI_SEMANTIC_COLOR: {
+ for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+ if (!outputs[attrib][channel])
+ continue;
+
+ LLVMValueRef out =
+ LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
+ if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
+ for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
+ STORE(unwrap(out),
+ pPS,
+ {0, SWR_PS_CONTEXT_shaded, rt, channel});
+ }
+ } else {
+ STORE(unwrap(out),
+ pPS,
+ {0,
+ SWR_PS_CONTEXT_shaded,
+ swr_fs->info.base.output_semantic_index[attrib],
+ channel});
+ }
+ }
+ break;
+ }
+ default: {
+ fprintf(stderr,
+ "unknown output from FS %s[%d]\n",
+ tgsi_semantic_names[swr_fs->info.base
+ .output_semantic_name[attrib]],
+ swr_fs->info.base.output_semantic_index[attrib]);
+ break;
+ }
+ }
+ }
+
+ LLVMValueRef mask_result = 0;
+ if (swr_fs->info.base.uses_kill) {
+ mask_result = lp_build_mask_end(&mask);
+ }
+
+ IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+ if (swr_fs->info.base.uses_kill) {
+ STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
+ }
+
+ RET_VOID();
+
+ gallivm_verify_function(gallivm, wrap(pFunction));
+
+ gallivm_compile_module(gallivm);
+
+ PFN_PIXEL_KERNEL kernel =
+ (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
+ debug_printf("frag shader %p\n", kernel);
+ assert(kernel && "Error: FragShader = NULL");
+
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
+ JM()->mIsModuleFinalized = true;
+#endif
+
+ return kernel;
+}
+
+PFN_PIXEL_KERNEL
+swr_compile_fs(struct swr_context *ctx, swr_jit_key &key)
+{
+ BuilderSWR builder(
+ reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr));
+ return builder.CompileFS(ctx, key);
+}
diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
new file mode 100644
index 0000000..e22a7c4
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_shader.h
@@ -0,0 +1,60 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+class swr_vertex_shader;
+class swr_fragment_shader;
+class swr_jit_key;
+
+PFN_VERTEX_FUNC
+swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
+
+PFN_PIXEL_KERNEL
+swr_compile_fs(struct swr_context *ctx, swr_jit_key &key);
+
+void swr_generate_fs_key(struct swr_jit_key &key,
+ struct swr_context *ctx,
+ swr_fragment_shader *swr_fs);
+
+struct swr_jit_key {
+ unsigned nr_cbufs;
+ unsigned light_twoside;
+ ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+ ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned nr_samplers;
+ unsigned nr_sampler_views;
+ struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+namespace std
+{
+template <> struct hash<swr_jit_key> {
+ std::size_t operator()(const swr_jit_key &k) const
+ {
+ return util_hash_crc32(&k, sizeof(k));
+ }
+};
+};
+
+bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs);
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
new file mode 100644
index 0000000..49035b5
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -0,0 +1,1370 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "common/os.h"
+#include "jit_api.h"
+#include "JitManager.h"
+#include "state_llvm.h"
+
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_format.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_helpers.h"
+#include "util/u_framebuffer.h"
+
+#include "swr_state.h"
+#include "swr_context.h"
+#include "swr_context_llvm.h"
+#include "swr_screen.h"
+#include "swr_resource.h"
+#include "swr_tex_sample.h"
+#include "swr_scratch.h"
+#include "swr_shader.h"
+
+/* These should be pulled out into separate files as necessary
+ * Just initializing everything here to get going. */
+
+static void *
+swr_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
+
+ memcpy(&state->pipe, blend, sizeof(*blend));
+
+ struct pipe_blend_state *pipe_blend = &state->pipe;
+
+ for (int target = 0;
+ target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
+ target++) {
+
+ struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
+ SWR_RENDER_TARGET_BLEND_STATE &blendState =
+ state->blendState.renderTarget[target];
+ RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
+ state->compileState[target];
+
+ if (target != 0 && !pipe_blend->independent_blend_enable) {
+ memcpy(&compileState,
+ &state->compileState[0],
+ sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
+ continue;
+ }
+
+ compileState.blendEnable = rt_blend->blend_enable;
+ if (compileState.blendEnable) {
+ compileState.sourceAlphaBlendFactor =
+ swr_convert_blend_factor(rt_blend->alpha_src_factor);
+ compileState.destAlphaBlendFactor =
+ swr_convert_blend_factor(rt_blend->alpha_dst_factor);
+ compileState.sourceBlendFactor =
+ swr_convert_blend_factor(rt_blend->rgb_src_factor);
+ compileState.destBlendFactor =
+ swr_convert_blend_factor(rt_blend->rgb_dst_factor);
+
+ compileState.colorBlendFunc =
+ swr_convert_blend_func(rt_blend->rgb_func);
+ compileState.alphaBlendFunc =
+ swr_convert_blend_func(rt_blend->alpha_func);
+ }
+ compileState.logicOpEnable = state->pipe.logicop_enable;
+ if (compileState.logicOpEnable) {
+ compileState.logicOpFunc =
+ swr_convert_logic_op(state->pipe.logicop_func);
+ }
+
+ blendState.writeDisableRed =
+ (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
+ blendState.writeDisableGreen =
+ (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
+ blendState.writeDisableBlue =
+ (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
+ blendState.writeDisableAlpha =
+ (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
+
+ if (rt_blend->colormask == 0)
+ compileState.blendEnable = false;
+ }
+
+ return state;
+}
+
+static void
+swr_bind_blend_state(struct pipe_context *pipe, void *blend)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ctx->blend == blend)
+ return;
+
+ ctx->blend = (swr_blend_state *)blend;
+
+ ctx->dirty |= SWR_NEW_BLEND;
+}
+
+static void
+swr_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE(blend);
+}
+
+static void
+swr_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *color)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->blend_color = *color;
+
+ ctx->dirty |= SWR_NEW_BLEND;
+}
+
+static void
+swr_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *ref)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->stencil_ref = *ref;
+
+ ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void *
+swr_create_depth_stencil_state(
+ struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ struct pipe_depth_stencil_alpha_state *state;
+
+ state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
+ sizeof *depth_stencil);
+
+ return state;
+}
+
+static void
+swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
+ return;
+
+ ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
+
+ ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void
+swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
+{
+ FREE(depth);
+}
+
+
+static void *
+swr_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rast)
+{
+ struct pipe_rasterizer_state *state;
+ state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
+
+ return state;
+}
+
+static void
+swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ const struct pipe_rasterizer_state *rasterizer =
+ (const struct pipe_rasterizer_state *)handle;
+
+ if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
+ return;
+
+ ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
+
+ ctx->dirty |= SWR_NEW_RASTERIZER;
+}
+
+static void
+swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
+{
+ FREE(rasterizer);
+}
+
+
+static void *
+swr_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ struct pipe_sampler_state *state =
+ (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
+
+ return state;
+}
+
+static void
+swr_bind_sampler_states(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ void **samplers)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ unsigned i;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(ctx->samplers[shader]));
+
+ /* set the new samplers */
+ ctx->num_samplers[shader] = num;
+ for (i = 0; i < num; i++) {
+ ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
+ }
+
+ ctx->dirty |= SWR_NEW_SAMPLER;
+}
+
+static void
+swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
+{
+ FREE(sampler);
+}
+
+
+static struct pipe_sampler_view *
+swr_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+
+ if (view) {
+ *view = *templ;
+ view->reference.count = 1;
+ view->texture = NULL;
+ pipe_resource_reference(&view->texture, texture);
+ view->context = pipe;
+ }
+
+ return view;
+}
+
+static void
+swr_set_sampler_views(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(ctx->sampler_views[shader]));
+
+ /* set the new sampler views */
+ ctx->num_sampler_views[shader] = num;
+ for (i = 0; i < num; i++) {
+ /* Note: we're using pipe_sampler_view_release() here to work around
+ * a possible crash when the old view belongs to another context that
+ * was already destroyed.
+ */
+ pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
+ pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
+ views[i]);
+ }
+
+ ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
+}
+
+static void
+swr_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void *
+swr_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *vs)
+{
+ struct swr_vertex_shader *swr_vs =
+ (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader);
+ if (!swr_vs)
+ return NULL;
+
+ swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
+ swr_vs->pipe.stream_output = vs->stream_output;
+
+ lp_build_tgsi_info(vs->tokens, &swr_vs->info);
+
+ swr_vs->func = swr_compile_vs(pipe, swr_vs);
+
+ swr_vs->soState = {0};
+
+ if (swr_vs->pipe.stream_output.num_outputs) {
+ pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
+
+ swr_vs->soState.soEnable = true;
+ // soState.rasterizerDisable set on state dirty
+ // soState.streamToRasterizer not used
+
+ for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
+ swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
+ 1 << (stream_output->output[i].register_index - 1);
+ }
+ for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
+ swr_vs->soState.streamNumEntries[i] =
+ _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
+ }
+ }
+
+ return swr_vs;
+}
+
+static void
+swr_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ctx->vs == vs)
+ return;
+
+ ctx->vs = (swr_vertex_shader *)vs;
+ ctx->dirty |= SWR_NEW_VS;
+}
+
+static void
+swr_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
+ FREE((void *)swr_vs->pipe.tokens);
+ FREE(vs);
+}
+
+static void *
+swr_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *fs)
+{
+ struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
+ if (!swr_fs)
+ return NULL;
+
+ swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
+
+ lp_build_tgsi_info(fs->tokens, &swr_fs->info);
+
+ return swr_fs;
+}
+
+
+static void
+swr_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ctx->fs == fs)
+ return;
+
+ ctx->fs = (swr_fragment_shader *)fs;
+ ctx->dirty |= SWR_NEW_FS;
+}
+
+static void
+swr_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
+ FREE((void *)swr_fs->pipe.tokens);
+ delete swr_fs;
+}
+
+
+static void
+swr_set_constant_buffer(struct pipe_context *pipe,
+ uint shader,
+ uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct pipe_resource *constants = cb ? cb->buffer : NULL;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index < Elements(ctx->constants[shader]));
+
+ /* note: reference counting */
+ util_copy_constant_buffer(&ctx->constants[shader][index], cb);
+
+ if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+ ctx->dirty |= SWR_NEW_VSCONSTANTS;
+ } else if (shader == PIPE_SHADER_FRAGMENT) {
+ ctx->dirty |= SWR_NEW_FSCONSTANTS;
+ }
+
+ if (cb && cb->user_buffer) {
+ pipe_resource_reference(&constants, NULL);
+ }
+}
+
+
+static void *
+swr_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *attribs)
+{
+ struct swr_vertex_element_state *velems;
+ assert(num_elements <= PIPE_MAX_ATTRIBS);
+ velems = CALLOC_STRUCT(swr_vertex_element_state);
+ if (velems) {
+ velems->fsState.numAttribs = num_elements;
+ for (unsigned i = 0; i < num_elements; i++) {
+ // XXX: we should do this keyed on the VS usage info
+
+ const struct util_format_description *desc =
+ util_format_description(attribs[i].src_format);
+
+ velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
+ velems->fsState.layout[i].Format =
+ mesa_to_swr_format(attribs[i].src_format);
+ velems->fsState.layout[i].StreamIndex =
+ attribs[i].vertex_buffer_index;
+ velems->fsState.layout[i].InstanceEnable =
+ attribs[i].instance_divisor != 0;
+ velems->fsState.layout[i].ComponentControl0 =
+ desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
+ ? ComponentControl::StoreSrc
+ : ComponentControl::Store0;
+ velems->fsState.layout[i].ComponentControl1 =
+ desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
+ ? ComponentControl::StoreSrc
+ : ComponentControl::Store0;
+ velems->fsState.layout[i].ComponentControl2 =
+ desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
+ ? ComponentControl::StoreSrc
+ : ComponentControl::Store0;
+ velems->fsState.layout[i].ComponentControl3 =
+ desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
+ ? ComponentControl::StoreSrc
+ : ComponentControl::Store1Fp;
+ velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
+ velems->fsState.layout[i].InstanceDataStepRate =
+ attribs[i].instance_divisor;
+
+ /* Calculate the pitch of each stream */
+ const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
+ mesa_to_swr_format(attribs[i].src_format));
+ velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
+ }
+ }
+
+ return velems;
+}
+
+static void
+swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ struct swr_context *ctx = swr_context(pipe);
+ struct swr_vertex_element_state *swr_velems =
+ (struct swr_vertex_element_state *)velems;
+
+ ctx->velems = swr_velems;
+ ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+static void
+swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ /* XXX Need to destroy fetch shader? */
+ FREE(velems);
+}
+
+
+static void
+swr_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_elements,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ assert(num_elements <= PIPE_MAX_ATTRIBS);
+
+ util_set_vertex_buffers_count(ctx->vertex_buffer,
+ &ctx->num_vertex_buffers,
+ buffers,
+ start_slot,
+ num_elements);
+
+ ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+
+static void
+swr_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (ib)
+ memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
+ else
+ memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
+
+ ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+static void
+swr_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->poly_stipple = *stipple; /* struct copy */
+ ctx->dirty |= SWR_NEW_STIPPLE;
+}
+
+static void
+swr_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->clip = *clip;
+ /* XXX Unimplemented, but prevents crash */
+
+ ctx->dirty |= SWR_NEW_CLIP;
+}
+
+
+static void
+swr_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_scissor_state *scissor)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->scissor = *scissor;
+ ctx->dirty |= SWR_NEW_SCISSOR;
+}
+
+static void
+swr_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *vpt)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ ctx->viewport = *vpt;
+ ctx->dirty |= SWR_NEW_VIEWPORT;
+}
+
+
+static void
+swr_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
+
+ assert(fb->width <= KNOB_GUARDBAND_WIDTH);
+ assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
+
+ if (changed) {
+ unsigned i;
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ pipe_surface_reference(&ctx->framebuffer.cbufs[i], fb->cbufs[i]);
+ for (; i < ctx->framebuffer.nr_cbufs; ++i)
+ pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
+
+ ctx->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+ ctx->framebuffer.width = fb->width;
+ ctx->framebuffer.height = fb->height;
+
+ pipe_surface_reference(&ctx->framebuffer.zsbuf, fb->zsbuf);
+
+ ctx->dirty |= SWR_NEW_FRAMEBUFFER;
+ }
+}
+
+
+static void
+swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct swr_context *ctx = swr_context(pipe);
+
+ if (sample_mask != ctx->sample_mask) {
+ ctx->sample_mask = sample_mask;
+ ctx->dirty |= SWR_NEW_RASTERIZER;
+ }
+}
+
+
+void
+swr_update_derived(struct swr_context *ctx,
+ const struct pipe_draw_info *p_draw_info)
+{
+ /* Any state that requires dirty flags to be re-triggered sets this mask */
+ /* For example, user_buffer vertex and index buffers. */
+ unsigned post_update_dirty_flags = 0;
+
+ /* Render Targets */
+ if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
+ struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+ SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0};
+ UINT i;
+
+ /* colorbuffer targets */
+ if (fb->nr_cbufs)
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ if (fb->cbufs[i]) {
+ struct swr_resource *colorBuffer =
+ swr_resource(fb->cbufs[i]->texture);
+ new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr;
+ }
+
+ /* depth/stencil target */
+ if (fb->zsbuf) {
+ struct swr_resource *depthStencilBuffer =
+ swr_resource(fb->zsbuf->texture);
+ if (depthStencilBuffer->has_depth) {
+ new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr;
+
+ if (depthStencilBuffer->has_stencil)
+ new_attachment[SWR_ATTACHMENT_STENCIL] =
+ &depthStencilBuffer->secondary;
+
+ } else if (depthStencilBuffer->has_stencil)
+ new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr;
+ }
+
+ /* Make the attachment updates */
+ swr_draw_context *pDC = &ctx->swrDC;
+ SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+ for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) {
+ void *new_base = nullptr;
+ if (new_attachment[i])
+ new_base = new_attachment[i]->pBaseAddress;
+
+ /* StoreTile for changed target */
+ if (renderTargets[i].pBaseAddress != new_base) {
+ if (renderTargets[i].pBaseAddress) {
+ enum SWR_TILE_STATE post_state = (new_attachment[i]
+ ? SWR_TILE_INVALID : SWR_TILE_RESOLVED);
+ swr_store_render_target(ctx, i, post_state);
+ }
+
+ /* Make new attachment */
+ if (new_attachment[i])
+ renderTargets[i] = *new_attachment[i];
+ else
+ if (renderTargets[i].pBaseAddress)
+ renderTargets[i] = {0};
+ }
+ }
+ }
+
+ /* Raster state */
+ if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+ pipe_rasterizer_state *rasterizer = ctx->rasterizer;
+ pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+ SWR_RASTSTATE *rastState = &ctx->derived.rastState;
+ rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
+ rastState->frontWinding = rasterizer->front_ccw
+ ? SWR_FRONTWINDING_CCW
+ : SWR_FRONTWINDING_CW;
+ rastState->scissorEnable = rasterizer->scissor;
+ rastState->pointSize = rasterizer->point_size > 0.0f
+ ? rasterizer->point_size
+ : 1.0f;
+ rastState->lineWidth = rasterizer->line_width > 0.0f
+ ? rasterizer->line_width
+ : 1.0f;
+
+ rastState->pointParam = rasterizer->point_size_per_vertex;
+
+ rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
+ rastState->pointSpriteTopOrigin =
+ rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
+
+ /* XXX TODO: Add multisample */
+ rastState->msaaRastEnable = false;
+ rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
+ rastState->sampleCount = SWR_MULTISAMPLE_1X;
+ rastState->bForcedSampleCount = false;
+
+ bool do_offset = false;
+ switch (rasterizer->fill_front) {
+ case PIPE_POLYGON_MODE_FILL:
+ do_offset = rasterizer->offset_tri;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ do_offset = rasterizer->offset_line;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ do_offset = rasterizer->offset_point;
+ break;
+ }
+
+ if (do_offset) {
+ rastState->depthBias = rasterizer->offset_units;
+ rastState->slopeScaledDepthBias = rasterizer->offset_scale;
+ rastState->depthBiasClamp = rasterizer->offset_clamp;
+ } else {
+ rastState->depthBias = 0;
+ rastState->slopeScaledDepthBias = 0;
+ rastState->depthBiasClamp = 0;
+ }
+ struct pipe_surface *zb = fb->zsbuf;
+ if (zb && swr_resource(zb->texture)->has_depth)
+ rastState->depthFormat = swr_resource(zb->texture)->swr.format;
+
+ rastState->depthClipEnable = rasterizer->depth_clip;
+
+ SwrSetRastState(ctx->swrContext, rastState);
+ }
+
+ /* Scissor */
+ if (ctx->dirty & SWR_NEW_SCISSOR) {
+ pipe_scissor_state *scissor = &ctx->scissor;
+ BBOX bbox(scissor->miny, scissor->maxy,
+ scissor->minx, scissor->maxx);
+ SwrSetScissorRects(ctx->swrContext, 1, &bbox);
+ }
+
+ /* Viewport */
+ if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
+ | SWR_NEW_RASTERIZER)) {
+ pipe_viewport_state *state = &ctx->viewport;
+ pipe_framebuffer_state *fb = &ctx->framebuffer;
+ pipe_rasterizer_state *rasterizer = ctx->rasterizer;
+
+ SWR_VIEWPORT *vp = &ctx->derived.vp;
+ SWR_VIEWPORT_MATRIX *vpm = &ctx->derived.vpm;
+
+ vp->x = state->translate[0] - state->scale[0];
+ vp->width = state->translate[0] + state->scale[0];
+ vp->y = state->translate[1] - fabs(state->scale[1]);
+ vp->height = state->translate[1] + fabs(state->scale[1]);
+ if (rasterizer->clip_halfz == 0) {
+ vp->minZ = state->translate[2] - state->scale[2];
+ vp->maxZ = state->translate[2] + state->scale[2];
+ } else {
+ vp->minZ = state->translate[2];
+ vp->maxZ = state->translate[2] + state->scale[2];
+ }
+
+ vpm->m00 = state->scale[0];
+ vpm->m11 = state->scale[1];
+ vpm->m22 = state->scale[2];
+ vpm->m30 = state->translate[0];
+ vpm->m31 = state->translate[1];
+ vpm->m32 = state->translate[2];
+
+ /* Now that the matrix is calculated, clip the view coords to screen
+ * size. OpenGL allows for -ve x,y in the viewport.
+ */
+ vp->x = std::max(vp->x, 0.0f);
+ vp->y = std::max(vp->y, 0.0f);
+ vp->width = std::min(vp->width, (float)fb->width);
+ vp->height = std::min(vp->height, (float)fb->height);
+
+ SwrSetViewports(ctx->swrContext, 1, vp, vpm);
+ }
+
+ /* Set vertex & index buffers */
+ /* (using draw info if called by swr_draw_vbo) */
+ if (ctx->dirty & SWR_NEW_VERTEX) {
+ uint32_t size, pitch, max_vertex, partial_inbounds;
+ const uint8_t *p_data;
+
+ /* If being called by swr_draw_vbo, copy draw details */
+ struct pipe_draw_info info = {0};
+ if (p_draw_info)
+ info = *p_draw_info;
+
+ /* vertex buffers */
+ SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
+ for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
+ pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
+
+ pitch = vb->stride;
+ if (!vb->user_buffer) {
+ /* VBO
+ * size is based on buffer->width0 rather than info.max_index
+ * to prevent having to validate VBO on each draw */
+ size = vb->buffer->width0;
+ max_vertex = size / pitch;
+ partial_inbounds = size % pitch;
+
+ p_data = (const uint8_t *)swr_resource_data(vb->buffer)
+ + vb->buffer_offset;
+ } else {
+ /* Client buffer
+ * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
+ * revalidate on each draw */
+ post_update_dirty_flags |= SWR_NEW_VERTEX;
+
+ if (pitch) {
+ size = (info.max_index - info.min_index + 1) * pitch;
+ } else {
+ /* pitch = 0, means constant value
+ * set size to 1 vertex */
+ size = ctx->velems->stream_pitch[i];
+ }
+
+ max_vertex = info.max_index + 1;
+ partial_inbounds = 0;
+
+ /* Copy only needed vertices to scratch space */
+ size = AlignUp(size, 4);
+ const void *ptr = (const uint8_t *) vb->user_buffer
+ + info.min_index * pitch;
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->vertex_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr - info.min_index * pitch;
+ }
+
+ swrVertexBuffers[i] = {0};
+ swrVertexBuffers[i].index = i;
+ swrVertexBuffers[i].pitch = pitch;
+ swrVertexBuffers[i].pData = p_data;
+ swrVertexBuffers[i].size = size;
+ swrVertexBuffers[i].maxVertex = max_vertex;
+ swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
+ }
+
+ SwrSetVertexBuffers(
+ ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
+
+ /* index buffer, if required (info passed in by swr_draw_vbo) */
+ SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
+ if (info.indexed) {
+ pipe_index_buffer *ib = &ctx->index_buffer;
+
+ pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
+ index_type = swr_convert_index_type(pitch);
+
+ if (!ib->user_buffer) {
+ /* VBO
+ * size is based on buffer->width0 rather than info.count
+ * to prevent having to validate VBO on each draw */
+ size = ib->buffer->width0;
+ p_data =
+ (const uint8_t *)swr_resource_data(ib->buffer) + ib->offset;
+ } else {
+ /* Client buffer
+ * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
+ * revalidate on each draw */
+ post_update_dirty_flags |= SWR_NEW_VERTEX;
+
+ size = info.count * pitch;
+ size = AlignUp(size, 4);
+
+ /* Copy indices to scratch space */
+ const void *ptr = ib->user_buffer;
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->index_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr;
+ }
+
+ SWR_INDEX_BUFFER_STATE swrIndexBuffer;
+ swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
+ swrIndexBuffer.pIndices = p_data;
+ swrIndexBuffer.size = size;
+
+ SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
+ }
+
+ struct swr_vertex_element_state *velems = ctx->velems;
+ if (velems && velems->fsState.indexType != index_type) {
+ velems->fsFunc = NULL;
+ velems->fsState.indexType = index_type;
+ }
+ }
+
+ /* VertexShader */
+ if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) {
+ SwrSetVertexFunc(ctx->swrContext, ctx->vs->func);
+ }
+
+ swr_jit_key key;
+ if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
+ | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+ memset(&key, 0, sizeof(key));
+ swr_generate_fs_key(key, ctx, ctx->fs);
+ auto search = ctx->fs->map.find(key);
+ PFN_PIXEL_KERNEL func;
+ if (search != ctx->fs->map.end()) {
+ func = search->second;
+ } else {
+ func = swr_compile_fs(ctx, key);
+ ctx->fs->map.insert(std::make_pair(key, func));
+ }
+ SWR_PS_STATE psState = {0};
+ psState.pfnPixelShader = func;
+ psState.killsPixel = ctx->fs->info.base.uses_kill;
+ psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
+ psState.writesODepth = ctx->fs->info.base.writes_z;
+ psState.usesSourceDepth = ctx->fs->info.base.reads_z;
+ psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
+ psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
+ psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
+ uint32_t barycentricsMask = 0;
+#if 0
+ // when we switch to mesa-master
+ if (ctx->fs->info.base.uses_persp_center ||
+ ctx->fs->info.base.uses_linear_center)
+ barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
+ if (ctx->fs->info.base.uses_persp_centroid ||
+ ctx->fs->info.base.uses_linear_centroid)
+ barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
+ if (ctx->fs->info.base.uses_persp_sample ||
+ ctx->fs->info.base.uses_linear_sample)
+ barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
+#else
+ for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
+ switch (ctx->fs->info.base.input_interpolate_loc[i]) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
+ break;
+ }
+ }
+#endif
+ psState.barycentricsMask = barycentricsMask;
+ psState.usesUAV = false; // XXX
+ psState.forceEarlyZ = false;
+ SwrSetPixelShaderState(ctx->swrContext, &psState);
+ }
+
+ /* JIT sampler state */
+ if (ctx->dirty & SWR_NEW_SAMPLER) {
+ swr_draw_context *pDC = &ctx->swrDC;
+
+ for (unsigned i = 0; i < key.nr_samplers; i++) {
+ const struct pipe_sampler_state *sampler =
+ ctx->samplers[PIPE_SHADER_FRAGMENT][i];
+
+ if (sampler) {
+ pDC->samplersFS[i].min_lod = sampler->min_lod;
+ pDC->samplersFS[i].max_lod = sampler->max_lod;
+ pDC->samplersFS[i].lod_bias = sampler->lod_bias;
+ COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f);
+ }
+ }
+ }
+
+ /* JIT sampler view state */
+ if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+ swr_draw_context *pDC = &ctx->swrDC;
+
+ for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+ struct pipe_sampler_view *view =
+ ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
+
+ if (view) {
+ struct pipe_resource *res = view->texture;
+ struct swr_resource *swr_res = swr_resource(res);
+ struct swr_jit_texture *jit_tex = &pDC->texturesFS[i];
+ memset(jit_tex, 0, sizeof(*jit_tex));
+ jit_tex->width = res->width0;
+ jit_tex->height = res->height0;
+ jit_tex->depth = res->depth0;
+ jit_tex->first_level = view->u.tex.first_level;
+ jit_tex->last_level = view->u.tex.last_level;
+ jit_tex->base_ptr = swr_res->swr.pBaseAddress;
+
+ for (unsigned level = jit_tex->first_level;
+ level <= jit_tex->last_level;
+ level++) {
+ jit_tex->row_stride[level] = swr_res->row_stride[level];
+ jit_tex->img_stride[level] = swr_res->img_stride[level];
+ jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
+ }
+ }
+ }
+ }
+
+ /* VertexShader Constants */
+ if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
+ swr_draw_context *pDC = &ctx->swrDC;
+
+ for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+ const pipe_constant_buffer *cb =
+ &ctx->constants[PIPE_SHADER_VERTEX][i];
+ pDC->num_constantsVS[i] = cb->buffer_size;
+ if (cb->buffer)
+ pDC->constantVS[i] =
+ (const float *)((const BYTE *)cb->buffer + cb->buffer_offset);
+ else {
+ /* Need to copy these constants to scratch space */
+ if (cb->user_buffer && cb->buffer_size) {
+ const void *ptr =
+ ((const BYTE *)cb->user_buffer + cb->buffer_offset);
+ uint32_t size = AlignUp(cb->buffer_size, 4);
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->vs_constants, ptr, size);
+ pDC->constantVS[i] = (const float *)ptr;
+ }
+ }
+ }
+ }
+
+ /* FragmentShader Constants */
+ if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
+ swr_draw_context *pDC = &ctx->swrDC;
+
+ for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+ const pipe_constant_buffer *cb =
+ &ctx->constants[PIPE_SHADER_FRAGMENT][i];
+ pDC->num_constantsFS[i] = cb->buffer_size;
+ if (cb->buffer)
+ pDC->constantFS[i] =
+ (const float *)((const BYTE *)cb->buffer + cb->buffer_offset);
+ else {
+ /* Need to copy these constants to scratch space */
+ if (cb->user_buffer && cb->buffer_size) {
+ const void *ptr =
+ ((const BYTE *)cb->user_buffer + cb->buffer_offset);
+ uint32_t size = AlignUp(cb->buffer_size, 4);
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->fs_constants, ptr, size);
+ pDC->constantFS[i] = (const float *)ptr;
+ }
+ }
+ }
+ }
+
+ /* Depth/stencil state */
+ if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
+ struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
+ struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
+ SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
+
+ /* XXX, incomplete. Need to flesh out stencil & alpha test state
+ struct pipe_stencil_state *front_stencil =
+ ctx->depth_stencil.stencil[0];
+ struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
+ struct pipe_alpha_state alpha;
+ */
+ if (stencil[0].enabled) {
+ depthStencilState.stencilWriteEnable = 1;
+ depthStencilState.stencilTestEnable = 1;
+ depthStencilState.stencilTestFunc =
+ swr_convert_depth_func(stencil[0].func);
+
+ depthStencilState.stencilPassDepthPassOp =
+ swr_convert_stencil_op(stencil[0].zpass_op);
+ depthStencilState.stencilPassDepthFailOp =
+ swr_convert_stencil_op(stencil[0].zfail_op);
+ depthStencilState.stencilFailOp =
+ swr_convert_stencil_op(stencil[0].fail_op);
+ depthStencilState.stencilWriteMask = stencil[0].writemask;
+ depthStencilState.stencilTestMask = stencil[0].valuemask;
+ depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
+ }
+ if (stencil[1].enabled) {
+ depthStencilState.doubleSidedStencilTestEnable = 1;
+
+ depthStencilState.backfaceStencilTestFunc =
+ swr_convert_depth_func(stencil[1].func);
+
+ depthStencilState.backfaceStencilPassDepthPassOp =
+ swr_convert_stencil_op(stencil[1].zpass_op);
+ depthStencilState.backfaceStencilPassDepthFailOp =
+ swr_convert_stencil_op(stencil[1].zfail_op);
+ depthStencilState.backfaceStencilFailOp =
+ swr_convert_stencil_op(stencil[1].fail_op);
+ depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
+ depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
+
+ depthStencilState.backfaceStencilRefValue =
+ ctx->stencil_ref.ref_value[1];
+ }
+
+ depthStencilState.depthTestEnable = depth->enabled;
+ depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
+ depthStencilState.depthWriteEnable = depth->writemask;
+ SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
+ }
+
+ /* Blend State */
+ if (ctx->dirty & (SWR_NEW_BLEND |
+ SWR_NEW_FRAMEBUFFER |
+ SWR_NEW_DEPTH_STENCIL_ALPHA)) {
+ struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+ SWR_BLEND_STATE blendState;
+ memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
+ blendState.constantColor[0] = ctx->blend_color.color[0];
+ blendState.constantColor[1] = ctx->blend_color.color[1];
+ blendState.constantColor[2] = ctx->blend_color.color[2];
+ blendState.constantColor[3] = ctx->blend_color.color[3];
+ blendState.alphaTestReference =
+ *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
+
+ // XXX MSAA
+ blendState.sampleMask = 0;
+ blendState.sampleCount = SWR_MULTISAMPLE_1X;
+
+ /* If there are no color buffers bound, disable writes on RT0
+ * and skip loop */
+ if (fb->nr_cbufs == 0) {
+ blendState.renderTarget[0].writeDisableRed = 1;
+ blendState.renderTarget[0].writeDisableGreen = 1;
+ blendState.renderTarget[0].writeDisableBlue = 1;
+ blendState.renderTarget[0].writeDisableAlpha = 1;
+ SwrSetBlendFunc(ctx->swrContext, 0, NULL);
+ }
+ else
+ for (int target = 0;
+ target < std::min(SWR_NUM_RENDERTARGETS,
+ PIPE_MAX_COLOR_BUFS);
+ target++) {
+ if (!fb->cbufs[target])
+ continue;
+
+ struct swr_resource *colorBuffer =
+ swr_resource(fb->cbufs[target]->texture);
+
+ BLEND_COMPILE_STATE compileState;
+ memset(&compileState, 0, sizeof(compileState));
+ compileState.format = colorBuffer->swr.format;
+ memcpy(&compileState.blendState,
+ &ctx->blend->compileState[target],
+ sizeof(compileState.blendState));
+
+ if (compileState.blendState.blendEnable == false &&
+ compileState.blendState.logicOpEnable == false) {
+ SwrSetBlendFunc(ctx->swrContext, target, NULL);
+ continue;
+ }
+
+ compileState.desc.alphaTestEnable =
+ ctx->depth_stencil->alpha.enabled;
+ compileState.desc.independentAlphaBlendEnable =
+ ctx->blend->pipe.independent_blend_enable;
+ compileState.desc.alphaToCoverageEnable =
+ ctx->blend->pipe.alpha_to_coverage;
+ compileState.desc.sampleMaskEnable = 0; // XXX
+ compileState.desc.numSamples = 1; // XXX
+
+ compileState.alphaTestFunction =
+ swr_convert_depth_func(ctx->depth_stencil->alpha.func);
+ compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
+
+ PFN_BLEND_JIT_FUNC func = NULL;
+ auto search = ctx->blendJIT->find(compileState);
+ if (search != ctx->blendJIT->end()) {
+ func = search->second;
+ } else {
+ HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
+ func = JitCompileBlend(hJitMgr, compileState);
+ debug_printf("BLEND shader %p\n", func);
+ assert(func && "Error: BlendShader = NULL");
+
+ ctx->blendJIT->insert(std::make_pair(compileState, func));
+ }
+ SwrSetBlendFunc(ctx->swrContext, target, func);
+ }
+
+ SwrSetBlendState(ctx->swrContext, &blendState);
+ }
+
+ if (ctx->dirty & SWR_NEW_STIPPLE) {
+ /* XXX What to do with this one??? SWR doesn't stipple */
+ }
+
+ if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
+ ctx->vs->soState.rasterizerDisable =
+ ctx->rasterizer->rasterizer_discard;
+ SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
+
+ pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
+
+ for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+ SWR_STREAMOUT_BUFFER buffer = {0};
+ if (!ctx->so_targets[i])
+ continue;
+ buffer.enable = true;
+ buffer.pBuffer =
+ (uint32_t *)swr_resource_data(ctx->so_targets[i]->buffer);
+ buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
+ buffer.pitch = stream_output->stride[i];
+ buffer.streamOffset = ctx->so_targets[i]->buffer_offset >> 2;
+
+ SwrSetSoBuffers(ctx->swrContext, &buffer, i);
+ }
+ }
+
+ uint32_t linkage = ctx->vs->linkageMask;
+ if (ctx->rasterizer->sprite_coord_enable)
+ linkage |= (1 << ctx->vs->info.base.num_outputs);
+
+ SwrSetLinkage(ctx->swrContext, linkage, NULL);
+
+ // set up frontend state
+ SWR_FRONTEND_STATE feState = {0};
+ SwrSetFrontendState(ctx->swrContext, &feState);
+
+ // set up backend state
+ SWR_BACKEND_STATE backendState = {0};
+ backendState.numAttributes = 1;
+ backendState.numComponents[0] = 4;
+ backendState.constantInterpolationMask = ctx->fs->constantMask;
+ backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
+
+ SwrSetBackendState(ctx->swrContext, &backendState);
+
+ ctx->dirty = post_update_dirty_flags;
+}
+
+static struct pipe_stream_output_target *
+swr_create_so_target(struct pipe_context *pipe,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ target->context = pipe;
+ target->reference.count = 1;
+ pipe_resource_reference(&target->buffer, buffer);
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+ return target;
+}
+
+static void
+swr_destroy_so_target(struct pipe_context *pipe,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ FREE(target);
+}
+
+static void
+swr_set_so_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct swr_context *swr = swr_context(pipe);
+ uint32_t i;
+
+ assert(num_targets < MAX_SO_STREAMS);
+
+ for (i = 0; i < num_targets; i++) {
+ pipe_so_target_reference(
+ (struct pipe_stream_output_target **)&swr->so_targets[i],
+ targets[i]);
+ }
+
+ for (/* fall-through */; i < swr->num_so_targets; i++) {
+ pipe_so_target_reference(
+ (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
+ }
+
+ swr->num_so_targets = num_targets;
+
+ swr->dirty = SWR_NEW_SO;
+}
+
+
+void
+swr_state_init(struct pipe_context *pipe)
+{
+ pipe->create_blend_state = swr_create_blend_state;
+ pipe->bind_blend_state = swr_bind_blend_state;
+ pipe->delete_blend_state = swr_delete_blend_state;
+
+ pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
+ pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
+ pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
+
+ pipe->create_rasterizer_state = swr_create_rasterizer_state;
+ pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
+ pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
+
+ pipe->create_sampler_state = swr_create_sampler_state;
+ pipe->bind_sampler_states = swr_bind_sampler_states;
+ pipe->delete_sampler_state = swr_delete_sampler_state;
+
+ pipe->create_sampler_view = swr_create_sampler_view;
+ pipe->set_sampler_views = swr_set_sampler_views;
+ pipe->sampler_view_destroy = swr_sampler_view_destroy;
+
+ pipe->create_vs_state = swr_create_vs_state;
+ pipe->bind_vs_state = swr_bind_vs_state;
+ pipe->delete_vs_state = swr_delete_vs_state;
+
+ pipe->create_fs_state = swr_create_fs_state;
+ pipe->bind_fs_state = swr_bind_fs_state;
+ pipe->delete_fs_state = swr_delete_fs_state;
+
+ pipe->set_constant_buffer = swr_set_constant_buffer;
+
+ pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
+ pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
+ pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
+
+ pipe->set_vertex_buffers = swr_set_vertex_buffers;
+ pipe->set_index_buffer = swr_set_index_buffer;
+
+ pipe->set_polygon_stipple = swr_set_polygon_stipple;
+ pipe->set_clip_state = swr_set_clip_state;
+ pipe->set_scissor_states = swr_set_scissor_states;
+ pipe->set_viewport_states = swr_set_viewport_states;
+
+ pipe->set_framebuffer_state = swr_set_framebuffer_state;
+
+ pipe->set_blend_color = swr_set_blend_color;
+ pipe->set_stencil_ref = swr_set_stencil_ref;
+
+ pipe->set_sample_mask = swr_set_sample_mask;
+
+ pipe->create_stream_output_target = swr_create_so_target;
+ pipe->stream_output_target_destroy = swr_destroy_so_target;
+ pipe->set_stream_output_targets = swr_set_so_targets;
+}
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
new file mode 100644
index 0000000..a2b4d80
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_state.h
@@ -0,0 +1,307 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_STATE_H
+#define SWR_STATE_H
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_hash.h"
+#include "api.h"
+#include "swr_tex_sample.h"
+#include "swr_shader.h"
+#include <unordered_map>
+
+/* skeleton */
+struct swr_vertex_shader {
+ struct pipe_shader_state pipe;
+ struct lp_tgsi_info info;
+ unsigned linkageMask;
+ PFN_VERTEX_FUNC func;
+ SWR_STREAMOUT_STATE soState;
+ PFN_SO_FUNC soFunc[PIPE_PRIM_MAX];
+};
+
+struct swr_fragment_shader {
+ struct pipe_shader_state pipe;
+ struct lp_tgsi_info info;
+ uint32_t constantMask;
+ uint32_t pointSpriteMask;
+ std::unordered_map<swr_jit_key, PFN_PIXEL_KERNEL> map;
+};
+
+/* Vertex element state */
+struct swr_vertex_element_state {
+ FETCH_COMPILE_STATE fsState;
+ PFN_FETCH_FUNC fsFunc;
+ uint32_t stream_pitch[PIPE_MAX_ATTRIBS];
+};
+
+struct swr_blend_state {
+ struct pipe_blend_state pipe;
+ SWR_BLEND_STATE blendState;
+ RENDER_TARGET_BLEND_COMPILE_STATE compileState[PIPE_MAX_COLOR_BUFS];
+};
+
+/*
+ * Derived SWR API DrawState
+ * For convenience of making simple changes without re-deriving state.
+ */
+struct swr_derived_state {
+ SWR_RASTSTATE rastState;
+ SWR_VIEWPORT vp;
+ SWR_VIEWPORT_MATRIX vpm;
+};
+
+void swr_update_derived(struct swr_context *,
+ const struct pipe_draw_info * = nullptr);
+
+/*
+ * Conversion functions: Convert mesa state defines to SWR.
+ */
+
+static INLINE SWR_LOGIC_OP
+swr_convert_logic_op(const UINT op)
+{
+ switch (op) {
+ case PIPE_LOGICOP_CLEAR:
+ return LOGICOP_CLEAR;
+ case PIPE_LOGICOP_NOR:
+ return LOGICOP_NOR;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return LOGICOP_CLEAR;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return LOGICOP_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return LOGICOP_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT:
+ return LOGICOP_INVERT;
+ case PIPE_LOGICOP_XOR:
+ return LOGICOP_XOR;
+ case PIPE_LOGICOP_NAND:
+ return LOGICOP_NAND;
+ case PIPE_LOGICOP_AND:
+ return LOGICOP_AND;
+ case PIPE_LOGICOP_EQUIV:
+ return LOGICOP_EQUIV;
+ case PIPE_LOGICOP_NOOP:
+ return LOGICOP_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return LOGICOP_OR_INVERTED;
+ case PIPE_LOGICOP_COPY:
+ return LOGICOP_COPY;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return LOGICOP_OR_REVERSE;
+ case PIPE_LOGICOP_OR:
+ return LOGICOP_OR;
+ case PIPE_LOGICOP_SET:
+ return LOGICOP_SET;
+ default:
+ assert(0 && "Unsupported logic op");
+ return LOGICOP_NOOP;
+ }
+}
+
+static INLINE SWR_STENCILOP
+swr_convert_stencil_op(const UINT op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return STENCILOP_INVERT;
+ default:
+ assert(0 && "Unsupported stencil op");
+ return STENCILOP_KEEP;
+ }
+}
+
+static INLINE SWR_FORMAT
+swr_convert_index_type(const UINT index_size)
+{
+ switch (index_size) {
+ case sizeof(unsigned char):
+ return R8_UINT;
+ case sizeof(unsigned short):
+ return R16_UINT;
+ case sizeof(unsigned int):
+ return R32_UINT;
+ default:
+ assert(0 && "Unsupported index type");
+ return R32_UINT;
+ }
+}
+
+
+static INLINE SWR_ZFUNCTION
+swr_convert_depth_func(const UINT pipe_func)
+{
+ switch (pipe_func) {
+ case PIPE_FUNC_NEVER:
+ return ZFUNC_NEVER;
+ case PIPE_FUNC_LESS:
+ return ZFUNC_LT;
+ case PIPE_FUNC_EQUAL:
+ return ZFUNC_EQ;
+ case PIPE_FUNC_LEQUAL:
+ return ZFUNC_LE;
+ case PIPE_FUNC_GREATER:
+ return ZFUNC_GT;
+ case PIPE_FUNC_NOTEQUAL:
+ return ZFUNC_NE;
+ case PIPE_FUNC_GEQUAL:
+ return ZFUNC_GE;
+ case PIPE_FUNC_ALWAYS:
+ return ZFUNC_ALWAYS;
+ default:
+ assert(0 && "Unsupported depth func");
+ return ZFUNC_ALWAYS;
+ }
+}
+
+
+static INLINE SWR_CULLMODE
+swr_convert_cull_mode(const UINT cull_face)
+{
+ switch (cull_face) {
+ case PIPE_FACE_NONE:
+ return SWR_CULLMODE_NONE;
+ case PIPE_FACE_FRONT:
+ return SWR_CULLMODE_FRONT;
+ case PIPE_FACE_BACK:
+ return SWR_CULLMODE_BACK;
+ case PIPE_FACE_FRONT_AND_BACK:
+ return SWR_CULLMODE_BOTH;
+ default:
+ assert(0 && "Invalid cull mode");
+ return SWR_CULLMODE_NONE;
+ }
+}
+
+static INLINE SWR_BLEND_OP
+swr_convert_blend_func(const UINT blend_func)
+{
+ switch (blend_func) {
+ case PIPE_BLEND_ADD:
+ return BLENDOP_ADD;
+ case PIPE_BLEND_SUBTRACT:
+ return BLENDOP_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLENDOP_REVSUBTRACT;
+ case PIPE_BLEND_MIN:
+ return BLENDOP_MIN;
+ case PIPE_BLEND_MAX:
+ return BLENDOP_MAX;
+ default:
+ assert(0 && "Invalid blend func");
+ return BLENDOP_ADD;
+ }
+}
+
+static INLINE SWR_BLEND_FACTOR
+swr_convert_blend_factor(const UINT blend_factor)
+{
+ switch (blend_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return BLENDFACTOR_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ return BLENDFACTOR_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO:
+ return BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BLENDFACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return BLENDFACTOR_INV_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return BLENDFACTOR_INV_SRC1_ALPHA;
+ default:
+ assert(0 && "Invalid blend factor");
+ return BLENDFACTOR_ONE;
+ }
+}
+
+static INLINE enum SWR_SURFACE_TYPE
+swr_convert_target_type(const enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_BUFFER:
+ return SURFACE_BUFFER;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return SURFACE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ return SURFACE_2D;
+ case PIPE_TEXTURE_3D:
+ return SURFACE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return SURFACE_CUBE;
+ default:
+ assert(0);
+ return SURFACE_NULL;
+ }
+}
+#endif
diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp
new file mode 100644
index 0000000..8e01e32
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_tex_sample.cpp
@@ -0,0 +1,338 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Largely a copy of llvmpipe's lp_tex_sample.c
+ */
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - SWR driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "state.h"
+#include "JitManager.h"
+#include "state_llvm.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_memory.h"
+
+#include "swr_tex_sample.h"
+#include "swr_context_llvm.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct swr_sampler_dynamic_state {
+ struct lp_sampler_dynamic_state base;
+
+ const struct swr_sampler_static_state *static_state;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct swr_sampler_soa {
+ struct lp_build_sampler_soa base;
+
+ struct swr_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+swr_texture_member(const struct lp_sampler_dynamic_state *base,
+ struct gallivm_state *gallivm,
+ LLVMValueRef context_ptr,
+ unsigned texture_unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ /* context[0] */
+ indices[0] = lp_build_const_int32(gallivm, 0);
+ /* context[0].textures */
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS);
+ /* context[0].textures[unit] */
+ indices[2] = lp_build_const_int32(gallivm, texture_unit);
+ /* context[0].textures[unit].member */
+ indices[3] = lp_build_const_int32(gallivm, member_index);
+
+ ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", texture_unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * swr internals.
+ */
+#define SWR_TEXTURE_MEMBER(_name, _emit_load) \
+ static LLVMValueRef swr_texture_##_name( \
+ const struct lp_sampler_dynamic_state *base, \
+ struct gallivm_state *gallivm, \
+ LLVMValueRef context_ptr, \
+ unsigned texture_unit) \
+ { \
+ return swr_texture_member(base, \
+ gallivm, \
+ context_ptr, \
+ texture_unit, \
+ swr_jit_texture_##_name, \
+ #_name, \
+ _emit_load); \
+ }
+
+
+SWR_TEXTURE_MEMBER(width, TRUE)
+SWR_TEXTURE_MEMBER(height, TRUE)
+SWR_TEXTURE_MEMBER(depth, TRUE)
+SWR_TEXTURE_MEMBER(first_level, TRUE)
+SWR_TEXTURE_MEMBER(last_level, TRUE)
+SWR_TEXTURE_MEMBER(base_ptr, TRUE)
+SWR_TEXTURE_MEMBER(row_stride, FALSE)
+SWR_TEXTURE_MEMBER(img_stride, FALSE)
+SWR_TEXTURE_MEMBER(mip_offsets, FALSE)
+
+
+/**
+ * Fetch the specified member of the lp_jit_sampler structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+swr_sampler_member(const struct lp_sampler_dynamic_state *base,
+ struct gallivm_state *gallivm,
+ LLVMValueRef context_ptr,
+ unsigned sampler_unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(sampler_unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = lp_build_const_int32(gallivm, 0);
+ /* context[0].samplers */
+ indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS);
+ /* context[0].samplers[unit] */
+ indices[2] = lp_build_const_int32(gallivm, sampler_unit);
+ /* context[0].samplers[unit].member */
+ indices[3] = lp_build_const_int32(gallivm, member_index);
+
+ ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name);
+
+ return res;
+}
+
+
+#define SWR_SAMPLER_MEMBER(_name, _emit_load) \
+ static LLVMValueRef swr_sampler_##_name( \
+ const struct lp_sampler_dynamic_state *base, \
+ struct gallivm_state *gallivm, \
+ LLVMValueRef context_ptr, \
+ unsigned sampler_unit) \
+ { \
+ return swr_sampler_member(base, \
+ gallivm, \
+ context_ptr, \
+ sampler_unit, \
+ swr_jit_sampler_##_name, \
+ #_name, \
+ _emit_load); \
+ }
+
+
+SWR_SAMPLER_MEMBER(min_lod, TRUE)
+SWR_SAMPLER_MEMBER(max_lod, TRUE)
+SWR_SAMPLER_MEMBER(lod_bias, TRUE)
+SWR_SAMPLER_MEMBER(border_color, FALSE)
+
+
+static void
+swr_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+swr_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ struct gallivm_state *gallivm,
+ const struct lp_sampler_params *params)
+{
+ struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
+ unsigned texture_index = params->texture_index;
+ unsigned sampler_index = params->sampler_index;
+
+ assert(sampler_index < PIPE_MAX_SAMPLERS);
+ assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+#if 0
+ lp_build_sample_nop(gallivm, params->type, params->coords, params->texel);
+#else
+ lp_build_sample_soa(
+ &sampler->dynamic_state.static_state[texture_index].texture_state,
+ &sampler->dynamic_state.static_state[sampler_index].sampler_state,
+ &sampler->dynamic_state.base,
+ gallivm,
+ params);
+#endif
+}
+
+/**
+ * Fetch the texture size.
+ */
+static void
+swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
+ struct gallivm_state *gallivm,
+ struct lp_type type,
+ unsigned texture_unit,
+ unsigned target,
+ LLVMValueRef context_ptr,
+ boolean is_sviewinfo,
+ enum lp_sampler_lod_property lod_property,
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *sizes_out)
+{
+ struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
+
+ assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ lp_build_size_query_soa(
+ gallivm,
+ &sampler->dynamic_state.static_state[texture_unit].texture_state,
+ &sampler->dynamic_state.base,
+ type,
+ texture_unit,
+ target,
+ context_ptr,
+ is_sviewinfo,
+ lod_property,
+ explicit_lod,
+ sizes_out);
+}
+
+
+struct lp_build_sampler_soa *
+swr_sampler_soa_create(const struct swr_sampler_static_state *static_state)
+{
+ struct swr_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(swr_sampler_soa);
+ if (!sampler)
+ return NULL;
+
+ sampler->base.destroy = swr_sampler_soa_destroy;
+ sampler->base.emit_tex_sample = swr_sampler_soa_emit_fetch_texel;
+ sampler->base.emit_size_query = swr_sampler_soa_emit_size_query;
+ sampler->dynamic_state.base.width = swr_texture_width;
+ sampler->dynamic_state.base.height = swr_texture_height;
+ sampler->dynamic_state.base.depth = swr_texture_depth;
+ sampler->dynamic_state.base.first_level = swr_texture_first_level;
+ sampler->dynamic_state.base.last_level = swr_texture_last_level;
+ sampler->dynamic_state.base.base_ptr = swr_texture_base_ptr;
+ sampler->dynamic_state.base.row_stride = swr_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = swr_texture_img_stride;
+ sampler->dynamic_state.base.mip_offsets = swr_texture_mip_offsets;
+ sampler->dynamic_state.base.min_lod = swr_sampler_min_lod;
+ sampler->dynamic_state.base.max_lod = swr_sampler_max_lod;
+ sampler->dynamic_state.base.lod_bias = swr_sampler_lod_bias;
+ sampler->dynamic_state.base.border_color = swr_sampler_border_color;
+
+ sampler->dynamic_state.static_state = static_state;
+
+ return &sampler->base;
+}
diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h
new file mode 100644
index 0000000..f5c368c
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_tex_sample.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#pragma once
+
+#include "gallivm/lp_bld.h"
+
+struct swr_sampler_static_state {
+ /*
+ * These attributes are effectively interleaved for more sane key handling.
+ * However, there might be lots of null space if the amount of samplers and
+ * textures isn't the same.
+ */
+ struct lp_static_sampler_state sampler_state;
+ struct lp_static_texture_state texture_state;
+};
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ */
+struct lp_build_sampler_soa *
+swr_sampler_soa_create(const struct swr_sampler_static_state *key);