diff options
author | Brian Paul <brianp@vmware.com> | 2010-01-22 12:17:02 -0700 |
---|---|---|
committer | Brian Paul <brianp@vmware.com> | 2010-01-22 12:17:02 -0700 |
commit | cd8614b0287dc5a69725ec4ee0208fad61f7789e (patch) | |
tree | 3ee089b8384e7a60c5c3a3cc87f2a633bd724bbe /src/gallium | |
parent | 2b20b604277e3cdf7afb2431b50dbb05da12ff1c (diff) | |
parent | 64871747bb7b611ffe429fbf1724bd98ee25dd84 (diff) | |
download | external_mesa3d-cd8614b0287dc5a69725ec4ee0208fad61f7789e.zip external_mesa3d-cd8614b0287dc5a69725ec4ee0208fad61f7789e.tar.gz external_mesa3d-cd8614b0287dc5a69725ec4ee0208fad61f7789e.tar.bz2 |
Merge branch 'mesa_7_7_branch'
Conflicts:
src/gallium/auxiliary/draw/draw_context.c
src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
src/gallium/auxiliary/pipebuffer/Makefile
src/gallium/auxiliary/pipebuffer/SConscript
src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
src/gallium/auxiliary/tgsi/tgsi_scan.c
src/gallium/drivers/i915/i915_surface.c
src/gallium/drivers/i915/i915_texture.c
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
src/gallium/drivers/llvmpipe/lp_texture.c
src/gallium/drivers/softpipe/sp_prim_vbuf.c
src/gallium/state_trackers/xorg/xorg_dri2.c
src/gallium/winsys/drm/intel/gem/intel_drm_api.c
src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
src/gallium/winsys/drm/radeon/core/radeon_drm.c
src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
src/mesa/state_tracker/st_cb_clear.c
Diffstat (limited to 'src/gallium')
109 files changed, 3007 insertions, 842 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 8f937e3..9b6babc 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -48,12 +48,10 @@ C_SOURCES = \ draw/draw_vs_sse.c \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ - pipebuffer/pb_buffer_fenced.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ pipebuffer/pb_bufmgr_cache.c \ pipebuffer/pb_bufmgr_debug.c \ - pipebuffer/pb_bufmgr_fenced.c \ pipebuffer/pb_bufmgr_mm.c \ pipebuffer/pb_bufmgr_ondemand.c \ pipebuffer/pb_bufmgr_pool.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index e90dfc5..fb1bc05 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,7 +34,6 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "draw_context.h" -#include "draw_vbuf.h" #include "draw_vs.h" #include "draw_gs.h" #include "draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1c6d657..11d6485 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -106,10 +106,9 @@ void draw_pipeline_destroy( struct draw_context *draw ) - - - - +/** + * Build primitive to render a point with vertex at v0. + */ static void do_point( struct draw_context *draw, const char *v0 ) { @@ -123,6 +122,10 @@ static void do_point( struct draw_context *draw, } +/** + * Build primitive to render a line with vertices at v0, v1. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_line( struct draw_context *draw, ushort flags, const char *v0, @@ -139,6 +142,10 @@ static void do_line( struct draw_context *draw, } +/** + * Build primitive to render a triangle with vertices at v0, v1, v2. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_triangle( struct draw_context *draw, ushort flags, char *v0, @@ -157,7 +164,10 @@ static void do_triangle( struct draw_context *draw, } - +/* + * Set up macros for draw_pt_decompose.h template code. + * This code uses vertex indexes / elements. + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -175,16 +185,16 @@ static void do_triangle( struct draw_context *draw, #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ - elts[i0], /* flags */ \ + elts[i0], /* flags */ \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); #define LINE(flags,i0,i1) \ do_line( draw, \ - elts[i0], \ + elts[i0], \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); #define POINT(i0) \ do_point( draw, \ @@ -213,7 +223,9 @@ static void do_triangle( struct draw_context *draw, -/* Code to run the pipeline on a fairly arbitary collection of vertices. +/** + * Code to run the pipeline on a fairly arbitary collection of vertices. + * For drawing indexed primitives. * * Vertex headers must be pre-initialized with the * UNDEFINED_VERTEX_ID, this code will cause that id to become @@ -243,6 +255,12 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } + + +/* + * Set up macros for draw_pt_decompose.h template code. + * This code is for non-indexed rendering (no elts). + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -293,6 +311,10 @@ void draw_pipeline_run( struct draw_context *draw, #include "draw_pt_decompose.h" + +/* + * For drawing non-indexed primitives. + */ void draw_pipeline_run_linear( struct draw_context *draw, unsigned prim, struct vertex_header *vertices, diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index a5ddec5..f5ed32d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,7 +33,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" -#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "util/u_math.h" #include "util/u_prim.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 55e7a7b..252be50 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 734c05f..c5dfbcf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -40,7 +40,6 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" -#include "translate/translate.h" struct fetch_shade_emit; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 5515182..9728d5c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" struct pt_post_vs { diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 9f40030..b87a465 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -38,7 +38,6 @@ #include "draw/draw_vertex.h" #include "draw/draw_vs.h" #include "translate/translate.h" -#include "translate/translate_cache.h" /* A first pass at incorporating vertex fetch/emit functionality into */ diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile new file mode 100644 index 0000000..21d25d2 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = pipebuffer + +C_SOURCES = \ + pb_buffer_fenced.c \ + pb_buffer_malloc.c \ + pb_bufmgr_alt.c \ + pb_bufmgr_cache.c \ + pb_bufmgr_debug.c \ + pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ + pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ + pb_validate.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 0000000..a074a55 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,18 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_alt.c', + 'pb_bufmgr_cache.c', + 'pb_bufmgr_debug.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', + 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', + 'pb_validate.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a4b78f1..1ac424d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007-2009 VMware, Inc. + * Copyright 2007-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,9 +29,9 @@ /** * \file * Implementation of fenced buffers. - * - * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> - * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * + * \author Jose Fonseca <jfonseca-at-vmware-dot-com> + * \author Thomas Hellström <thellstrom-at-vmware-dot-com> */ @@ -50,6 +51,7 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" @@ -59,32 +61,79 @@ #define SUPER(__derived) (&(__derived)->base) -struct fenced_buffer_list +struct fenced_manager { - pipe_mutex mutex; - + struct pb_manager base; + struct pb_manager *provider; struct pb_fence_ops *ops; - - pb_size numDelayed; - struct list_head delayed; - -#ifdef DEBUG - pb_size numUnfenced; + + /** + * Maximum buffer size that can be safely allocated. + */ + pb_size max_buffer_size; + + /** + * Maximum cpu memory we can allocate before we start waiting for the + * GPU to idle. + */ + pb_size max_cpu_total_size; + + /** + * Following members are mutable and protected by this mutex. + */ + pipe_mutex mutex; + + /** + * Fenced buffer list. + * + * All fenced buffers are placed in this listed, ordered from the oldest + * fence to the newest fence. + */ + struct list_head fenced; + pb_size num_fenced; + struct list_head unfenced; -#endif + pb_size num_unfenced; + + /** + * How much temporary CPU memory is being used to hold unvalidated buffers. + */ + pb_size cpu_total_size; }; /** + * Fenced buffer. + * * Wrapper around a pipe buffer which adds fencing and reference counting. */ struct fenced_buffer { + /* + * Immutable members. + */ + struct pb_buffer base; - + struct fenced_manager *mgr; + + /* + * Following members are mutable and protected by fenced_manager::mutex. + */ + + struct list_head head; + + /** + * Buffer with storage. + */ struct pb_buffer *buffer; + pb_size size; + struct pb_desc desc; - /* FIXME: protect access with mutex */ + /** + * Temporary CPU storage data. Used when there isn't enough GPU memory to + * store the buffer. + */ + void *data; /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -93,15 +142,22 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; unsigned validation_flags; - struct pipe_fence_handle *fence; - struct list_head head; - struct fenced_buffer_list *list; + struct pipe_fence_handle *fence; }; +static INLINE struct fenced_manager * +fenced_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_manager *)mgr; +} + + static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { @@ -110,221 +166,569 @@ fenced_buffer(struct pb_buffer *buf) } -static INLINE void -_fenced_buffer_add(struct fenced_buffer *fenced_buf) -{ - struct fenced_buffer_list *fenced_list = fenced_buf->list; +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(fenced_buf->fence); +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf); +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait); + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf); + + +/** + * Dump the fenced buffer list. + * + * Useful to understand failures to allocate buffers. + */ +static void +fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) +{ #ifdef DEBUG - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + debug_printf("%10s %7s %8s %7s %10s %s\n", + "buffer", "size", "refcount", "storage", "fence", "signalled"); + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(!fenced_buf->fence); + debug_printf("%10p %7u %8u %7s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); + curr = next; + next = curr->next; + } + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + int signaled; + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(fenced_buf->buffer); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + debug_printf("%10p %7u %8u %7s %10p %s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + "gpu", + (void *) fenced_buf->fence, + signaled == 0 ? "y" : "n"); + curr = next; + next = curr->next; + } +#else + (void)fenced_mgr; #endif - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); - ++fenced_list->numDelayed; } -/** - * Actually destroy the buffer. - */ static INLINE void -_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(!fenced_buf->fence); -#ifdef DEBUG assert(fenced_buf->head.prev); assert(fenced_buf->head.next); LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; -#else - (void)fenced_list; -#endif - pb_reference(&fenced_buf->buffer, NULL); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + FREE(fenced_buf); } +/** + * Add the buffer to the fenced list. + * + * Reference count should be incremented before calling this function. + */ static INLINE void -_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(fenced_buf->fence); + + p_atomic_inc(&fenced_buf->base.base.reference.count); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); + ++fenced_mgr->num_fenced; +} + + +/** + * Remove the buffer from the fenced list, and potentially destroy the buffer + * if the reference count reaches zero. + * + * Returns TRUE if the buffer was detroyed. + */ +static INLINE boolean +fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; assert(fenced_buf->fence); - assert(fenced_buf->list == fenced_list); - + assert(fenced_buf->mgr == fenced_mgr); + ops->fence_reference(ops, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - + assert(fenced_buf->head.prev); assert(fenced_buf->head.next); - + LIST_DEL(&fenced_buf->head); - assert(fenced_list->numDelayed); - --fenced_list->numDelayed; - -#ifdef DEBUG - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; -#endif - - /** - * FIXME!!! - */ + assert(fenced_mgr->num_fenced); + --fenced_mgr->num_fenced; + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + + if (p_atomic_dec_zero(&fenced_buf->base.base.reference.count)) { + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + return TRUE; + } - if(!pipe_is_referenced(&fenced_buf->base.base.reference)) - _fenced_buffer_destroy(fenced_buf); + return FALSE; } +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * This function will release and re-aquire the mutex, so any copy of mutable + * state must be discarded after calling it. + */ static INLINE enum pipe_error -_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; + enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); + if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { - return PIPE_ERROR; + struct pipe_fence_handle *fence = NULL; + int finished; + boolean proceed; + + ops->fence_reference(ops, &fence, fenced_buf->fence); + + pipe_mutex_unlock(fenced_mgr->mutex); + + finished = ops->fence_finish(ops, fenced_buf->fence, 0); + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + /* + * Only proceed if the fence object didn't change in the meanwhile. + * Otherwise assume the work has been already carried out by another + * thread that re-aquired the lock before us. + */ + proceed = fence == fenced_buf->fence ? TRUE : FALSE; + + ops->fence_reference(ops, &fence, NULL); + + if(proceed && finished == 0) { + /* + * Remove from the fenced list + */ + + boolean destroyed; + + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + /* TODO: remove consequents buffers with the same fence? */ + + assert(!destroyed); + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; } - /* Remove from the fenced list */ - /* TODO: remove consequents */ - _fenced_buffer_remove(fenced_list, fenced_buf); } - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return PIPE_OK; + return ret; } /** - * Free as many fenced buffers from the list head as possible. + * Remove as many fenced buffers from the fenced list as possible. + * + * Returns TRUE if at least one buffer was removed. */ -static void -_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait) +static boolean +fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, + boolean wait) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; + boolean ret = FALSE; - curr = fenced_list->delayed.next; + curr = fenced_mgr->fenced.next; next = curr->next; - while(curr != &fenced_list->delayed) { + while(curr != &fenced_mgr->fenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); if(fenced_buf->fence != prev_fence) { int signaled; - if (wait) + + if (wait) { signaled = ops->fence_finish(ops, fenced_buf->fence, 0); - else + + /* + * Don't return just now. Instead preemptively check if the + * following buffers' fences already expired, without further waits. + */ + wait = FALSE; + } + else { signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) - break; + } + + if (signaled != 0) { + return ret; + } + prev_fence = fenced_buf->fence; } else { + /* This buffer's fence object is identical to the previous buffer's + * fence object, so no need to check the fence again. + */ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - _fenced_buffer_remove(fenced_list, fenced_buf); + fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + ret = TRUE; + + curr = next; + next = curr->next; + } + + return ret; +} + + +/** + * Try to free some GPU memory by backing it up into CPU memory. + * + * Returns TRUE if at least one buffer was freed. + */ +static boolean +fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr) +{ + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; - curr = next; + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + /* + * We can only move storage if the buffer is not mapped and not + * validated. + */ + if(fenced_buf->buffer && + !fenced_buf->mapcount && + !fenced_buf->vl) { + enum pipe_error ret; + + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + if(ret == PIPE_OK) { + ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf); + if(ret == PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + return TRUE; + } + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + + curr = next; next = curr->next; } + + return FALSE; } +/** + * Destroy CPU storage for this buffer. + */ static void -fenced_buffer_destroy(struct pb_buffer *buf) +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; + if(fenced_buf->data) { + align_free(fenced_buf->data); + fenced_buf->data = NULL; + assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size); + fenced_buf->mgr->cpu_total_size -= fenced_buf->size; + } +} - pipe_mutex_lock(fenced_list->mutex); - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - if (fenced_buf->fence) { - struct pb_fence_ops *ops = fenced_list->ops; - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { - struct list_head *curr, *prev; - curr = &fenced_buf->head; - prev = curr->prev; - do { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); - _fenced_buffer_remove(fenced_list, fenced_buf); - curr = prev; - prev = curr->prev; - } while (curr != &fenced_list->delayed); - } - else { - /* delay destruction */ + +/** + * Create CPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!fenced_buf->data); + if(fenced_buf->data) + return PIPE_OK; + + if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment); + if(!fenced_buf->data) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_mgr->cpu_total_size += fenced_buf->size; + + return PIPE_OK; +} + + +/** + * Destroy the GPU storage. + */ +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->buffer) { + pb_reference(&fenced_buf->buffer, NULL); + } +} + + +/** + * Try to create GPU storage for this buffer. + * + * This function is a shorthand around pb_manager::create_buffer for + * fenced_buffer_create_gpu_storage_locked()'s benefit. + */ +static INLINE boolean +fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_manager *provider = fenced_mgr->provider; + + assert(!fenced_buf->buffer); + + fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, + fenced_buf->size, + &fenced_buf->desc); + return fenced_buf->buffer ? TRUE : FALSE; +} + + +/** + * Create GPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait) +{ + assert(!fenced_buf->buffer); + + /* + * Check for signaled buffers before trying to allocate. + */ + fenced_manager_check_signalled_locked(fenced_mgr, FALSE); + + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + + /* + * Keep trying while there is some sort of progress: + * - fences are expiring, + * - or buffers are being being swapped out from GPU memory into CPU memory. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, FALSE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + + if(!fenced_buf->buffer && wait) { + /* + * Same as before, but this time around, wait to free buffers if + * necessary. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, TRUE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); } } - else { - _fenced_buffer_destroy(fenced_buf); + + if(!fenced_buf->buffer) { + if(0) + fenced_manager_dump_locked(fenced_mgr); + + /* give up */ + return PIPE_ERROR_OUT_OF_MEMORY; } - pipe_mutex_unlock(fenced_list->mutex); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) +{ + uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(!map) + return PIPE_ERROR; + + memcpy(map, fenced_buf->data, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) +{ + const uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) + return PIPE_ERROR; + + memcpy(fenced_buf->data, map, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + + pipe_mutex_lock(fenced_mgr->mutex); + + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + + pipe_mutex_unlock(fenced_mgr->mutex); } static void * -fenced_buffer_map(struct pb_buffer *buf, +fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pb_fence_ops *ops = fenced_list->ops; - void *map; + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + void *map = NULL; + + pipe_mutex_lock(fenced_mgr->mutex); assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - - /* Serialize writes */ - if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || - ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { - /* Don't wait for the GPU to finish writing */ - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) - _fenced_buffer_remove(fenced_list, fenced_buf); - else - return NULL; + + /* + * Serialize writes. + */ + while((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { + + /* + * Don't wait for the GPU to finish accessing it, if blocking is forbidden. + */ + if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + goto done; } - else { - /* Wait for the GPU to finish writing */ - _fenced_buffer_finish(fenced_buf); + + if (flags & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) { + break; } + + /* + * Wait for the GPU to finish accessing. This will release and re-acquire + * the mutex, so all copies of mutable state must be discarded. + */ + fenced_buffer_finish_locked(fenced_mgr, fenced_buf); } -#if 0 - /* Check for CPU write access (read is OK) */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* this is legal -- just for debugging */ - debug_warning("concurrent CPU writes"); + if(fenced_buf->buffer) { + map = pb_map(fenced_buf->buffer, flags); } -#endif - - map = pb_map(fenced_buf->buffer, flags); + else { + assert(fenced_buf->data); + map = fenced_buf->data; + } + if(map) { ++fenced_buf->mapcount; fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } +done: + pipe_mutex_unlock(fenced_mgr->mutex); + return map; } @@ -333,13 +737,20 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { - pb_unmap(fenced_buf->buffer); + if (fenced_buf->buffer) + pb_unmap(fenced_buf->buffer); --fenced_buf->mapcount; if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } + + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -349,48 +760,72 @@ fenced_buffer_validate(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; enum pipe_error ret; - + + pipe_mutex_lock(fenced_mgr->mutex); + if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - return PIPE_OK; + ret = PIPE_OK; + goto done; } - + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; - /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) - return PIPE_ERROR_RETRY; - -#if 0 - /* Do not validate if buffer is still mapped */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* TODO: wait for the thread that mapped the buffer to unmap it */ - return PIPE_ERROR_RETRY; + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto done; } - /* Final sanity checking */ - assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); - assert(!fenced_buf->mapcount); -#endif if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - return PIPE_OK; + ret = PIPE_OK; + goto done; + } + + /* + * Create and update GPU storage. + */ + if(!fenced_buf->buffer) { + assert(!fenced_buf->mapcount); + + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + if(ret != PIPE_OK) { + goto done; + } + + ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf); + if(ret != PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + goto done; + } + + if(fenced_buf->mapcount) { + debug_printf("warning: validating a buffer while it is still mapped\n"); + } + else { + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } } - + ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - return ret; - + goto done; + fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; - - return PIPE_OK; + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return ret; } @@ -398,36 +833,37 @@ static void fenced_buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) { - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pb_fence_ops *ops; + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - ops = fenced_list->ops; - - if(fence == fenced_buf->fence) { - /* Nothing to do */ - return; - } + pipe_mutex_lock(fenced_mgr->mutex); - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - _fenced_buffer_add(fenced_buf); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(fenced_buf->buffer); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + boolean destroyed; + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + assert(!destroyed); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + fenced_buffer_add_locked(fenced_mgr, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; } - pipe_mutex_unlock(fenced_list->mutex); - - pb_fence(fenced_buf->buffer, fence); - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -437,11 +873,29 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * This should only be called when the buffer is validated. Typically + * when processing relocations. + */ + assert(fenced_buf->vl); + assert(fenced_buf->buffer); + + if(fenced_buf->buffer) + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + else { + *base_buf = buf; + *offset = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); } -static const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, @@ -452,147 +906,166 @@ fenced_buffer_vtbl = { }; -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced_list, - struct pb_buffer *buffer) +/** + * Wrap a buffer in a fenced buffer. + */ +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) { - struct fenced_buffer *buf; - - if(!buffer) - return NULL; - - buf = CALLOC_STRUCT(fenced_buffer); - if(!buf) { - pb_reference(&buffer, NULL); - return NULL; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + struct fenced_buffer *fenced_buf; + enum pipe_error ret; + + /* + * Don't stall the GPU, waste time evicting buffers, or waste memory + * trying to create a buffer that will most likely never fit into the + * graphics aperture. + */ + if(size > fenced_mgr->max_buffer_size) { + goto no_buffer; } - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = buffer->base.alignment; - buf->base.base.usage = buffer->base.usage; - buf->base.base.size = buffer->base.size; - - buf->base.vtbl = &fenced_buffer_vtbl; - buf->buffer = buffer; - buf->list = fenced_list; - -#ifdef DEBUG - pipe_mutex_lock(fenced_list->mutex); - LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; - pipe_mutex_unlock(fenced_list->mutex); -#endif - return &buf->base; -} + fenced_buf = CALLOC_STRUCT(fenced_buffer); + if(!fenced_buf) + goto no_buffer; + pipe_reference_init(&fenced_buf->base.base.reference, 1); + fenced_buf->base.base.alignment = desc->alignment; + fenced_buf->base.base.usage = desc->usage; + fenced_buf->base.base.size = size; + fenced_buf->size = size; + fenced_buf->desc = *desc; -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops) -{ - struct fenced_buffer_list *fenced_list; + fenced_buf->base.vtbl = &fenced_buffer_vtbl; + fenced_buf->mgr = fenced_mgr; - fenced_list = CALLOC_STRUCT(fenced_buffer_list); - if (!fenced_list) - return NULL; + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * Try to create GPU storage without stalling, + */ + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE); - fenced_list->ops = ops; + /* + * Attempt to use CPU memory to avoid stalling the GPU. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + } - LIST_INITHEAD(&fenced_list->delayed); - fenced_list->numDelayed = 0; - -#ifdef DEBUG - LIST_INITHEAD(&fenced_list->unfenced); - fenced_list->numUnfenced = 0; -#endif + /* + * Create GPU storage, waiting for some to be available. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + } + + /* + * Give up. + */ + if(ret != PIPE_OK) { + goto no_storage; + } - pipe_mutex_init(fenced_list->mutex); + assert(fenced_buf->buffer || fenced_buf->data); - return fenced_list; -} + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + pipe_mutex_unlock(fenced_mgr->mutex); + return &fenced_buf->base; -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait) -{ - pipe_mutex_lock(fenced_list->mutex); - _fenced_buffer_list_check_free(fenced_list, wait); - pipe_mutex_unlock(fenced_list->mutex); +no_storage: + pipe_mutex_unlock(fenced_mgr->mutex); + FREE(fenced_buf); +no_buffer: + return NULL; } -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_flush(struct pb_manager *mgr) { - struct pb_fence_ops *ops = fenced_list->ops; - struct list_head *curr, *next; - struct fenced_buffer *fenced_buf; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); - pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + pipe_mutex_unlock(fenced_mgr->mutex); - debug_printf("%10s %7s %7s %10s %s\n", - "buffer", "size", "refcount", "fence", "signalled"); - - curr = fenced_list->unfenced.next; - next = curr->next; - while(curr != &fenced_list->unfenced) { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(!fenced_buf->fence); - debug_printf("%10p %7u %7u\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count)); - curr = next; - next = curr->next; - } - - curr = fenced_list->delayed.next; - next = curr->next; - while(curr != &fenced_list->delayed) { - int signaled; - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - debug_printf("%10p %7u %7u %10p %s\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count), - (void *) fenced_buf->fence, - signaled == 0 ? "y" : "n"); - curr = next; - next = curr->next; - } - - pipe_mutex_unlock(fenced_list->mutex); + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); } -#endif -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) { - pipe_mutex_lock(fenced_list->mutex); + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); /* Wait on outstanding fences */ - while (fenced_list->numDelayed) { - pipe_mutex_unlock(fenced_list->mutex); + while (fenced_mgr->num_fenced) { + pipe_mutex_unlock(fenced_mgr->mutex); #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif - _fenced_buffer_list_check_free(fenced_list, 1); - pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; } #ifdef DEBUG - /*assert(!fenced_list->numUnfenced);*/ + /*assert(!fenced_mgr->num_unfenced);*/ #endif - - pipe_mutex_unlock(fenced_list->mutex); - - fenced_list->ops->destroy(fenced_list->ops); - - FREE(fenced_list); + + pipe_mutex_unlock(fenced_mgr->mutex); + pipe_mutex_destroy(fenced_mgr->mutex); + + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); + + fenced_mgr->ops->destroy(fenced_mgr->ops); + + FREE(fenced_mgr); } +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size) +{ + struct fenced_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->ops = ops; + fenced_mgr->max_buffer_size = max_buffer_size; + fenced_mgr->max_cpu_total_size = max_cpu_total_size; + + LIST_INITHEAD(&fenced_mgr->fenced); + fenced_mgr->num_fenced = 0; + + LIST_INITHEAD(&fenced_mgr->unfenced); + fenced_mgr->num_unfenced = 0; + + pipe_mutex_init(fenced_mgr->mutex); + + return &fenced_mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 034ca1e..0372f81 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -98,43 +98,6 @@ struct pb_fence_ops }; -/** - * Create a fenced buffer list. - * - * See also fenced_bufmgr_create for a more convenient way to use this. - */ -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops); - - -/** - * Walk the fenced buffer list to check and free signalled buffers. - */ -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait); - - -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list); -#endif - - -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list); - - -/** - * Wrap a buffer in a fenced buffer. - * - * NOTE: this will not increase the buffer reference count. - */ -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced, - struct pb_buffer *buffer); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8c8d713..0666991 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -175,7 +175,9 @@ struct pb_fence_ops; */ struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops); + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size); struct pb_manager * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 6e3214c..8f74180 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -371,6 +371,9 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, struct pb_desc real_desc; pb_size real_size; + assert(size); + assert(desc->alignment); + buf = CALLOC_STRUCT(pb_debug_buffer); if(!buf) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c deleted file mode 100644 index 97dd142..0000000 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ /dev/null @@ -1,152 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/** - * \file - * A buffer manager that wraps buffers in fenced buffers. - * - * \author Jose Fonseca <jrfonseca@tungstengraphics.dot.com> - */ - - -#include "util/u_debug.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" -#include "pb_buffer_fenced.h" -#include "pb_bufmgr.h" - - -struct fenced_pb_manager -{ - struct pb_manager base; - - struct pb_manager *provider; - - struct fenced_buffer_list *fenced_list; -}; - - -static INLINE struct fenced_pb_manager * -fenced_pb_manager(struct pb_manager *mgr) -{ - assert(mgr); - return (struct fenced_pb_manager *)mgr; -} - - -static struct pb_buffer * -fenced_bufmgr_create_buffer(struct pb_manager *mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - struct pb_buffer *buf; - struct pb_buffer *fenced_buf; - - /* check for free buffers before allocating new ones */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { - /* try harder to get a buffer */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { -#if 0 - fenced_buffer_list_dump(fenced_mgr->fenced_list); -#endif - - /* give up */ - return NULL; - } - } - - fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); - if(!fenced_buf) { - pb_reference(&buf, NULL); - } - - return fenced_buf; -} - - -static void -fenced_bufmgr_flush(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_check_free(fenced_mgr->fenced_list, TRUE); - - assert(fenced_mgr->provider->flush); - if(fenced_mgr->provider->flush) - fenced_mgr->provider->flush(fenced_mgr->provider); -} - - -static void -fenced_bufmgr_destroy(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_destroy(fenced_mgr->fenced_list); - - if(fenced_mgr->provider) - fenced_mgr->provider->destroy(fenced_mgr->provider); - - FREE(fenced_mgr); -} - - -struct pb_manager * -fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops) -{ - struct fenced_pb_manager *fenced_mgr; - - if(!provider) - return NULL; - - fenced_mgr = CALLOC_STRUCT(fenced_pb_manager); - if (!fenced_mgr) - return NULL; - - fenced_mgr->base.destroy = fenced_bufmgr_destroy; - fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; - fenced_mgr->base.flush = fenced_bufmgr_flush; - - fenced_mgr->provider = provider; - fenced_mgr->fenced_list = fenced_buffer_list_create(ops); - if(!fenced_mgr->fenced_list) { - FREE(fenced_mgr); - return NULL; - } - - return &fenced_mgr->base; -} diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index ce40c0c..903afc7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -39,7 +39,6 @@ #include "util/u_debug.h" #include "pb_buffer.h" -#include "pb_buffer_fenced.h" #include "pb_validate.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index a6cc773..b9be8dc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -101,12 +101,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens, src->Register.File == TGSI_FILE_SYSTEM_VALUE) { const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { - info->uses_fogcoord = TRUE; - } - else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { - info->uses_frontfacing = TRUE; - } + info->uses_fogcoord = TRUE; + } + else if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FACE) { + info->uses_frontfacing = TRUE; } } } diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9725890..236f1e4 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -226,8 +226,8 @@ setup_vertex_data_tex(struct blit_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 7602379..5426c91 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1411,8 +1411,8 @@ set_vertex_data(struct gen_mipmap_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 975ee89..55a6537 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -85,7 +85,9 @@ my_buffer_write(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); if (map == NULL) return PIPE_ERROR_OUT_OF_MEMORY; diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 90530f2..0d0859f 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -32,7 +32,6 @@ #include "util/u_clear.h" #include "i915_context.h" -#include "i915_state.h" /** diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 89feead..4c5ff37 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -29,12 +29,9 @@ #include "i915_state.h" #include "i915_screen.h" #include "i915_batch.h" -#include "i915_texture.h" -#include "i915_reg.h" #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index c6e6d6f..237654d 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -29,7 +29,6 @@ #include "i915_context.h" #include "i915_debug.h" #include "i915_batch.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index 9c5b117..f9c40d8 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -29,7 +29,6 @@ #include "i915_reg.h" #include "i915_debug.h" #include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 0fab6e1..06949c1 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -38,7 +37,6 @@ #include "i915_context.h" #include "i915_reg.h" -#include "i915_state.h" #include "i915_state_inlines.h" #include "i915_fpc.h" diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 03dd509..f5b0e9f 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -33,7 +33,6 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "i915_fpc.h" diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index cbac417..e5c6d87 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -27,7 +27,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "i915_state_inlines.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index c693eb3..13684aa 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -27,14 +27,10 @@ #include "i915_context.h" #include "i915_blit.h" -#include "i915_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" #include "util/u_format.h" -#include "util/u_tile.h" -#include "util/u_rect.h" /* Assumes all values are within bounds -- no checking at this level - diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 50a9e19..441bc4f 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -41,7 +41,6 @@ #include "i915_context.h" #include "i915_texture.h" -#include "i915_debug.h" #include "i915_screen.h" #include "intel_winsys.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 2b4bc5c..7245730 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -35,7 +35,6 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index eea6b5d..1aee9b3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -56,7 +56,6 @@ #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" -#include "lp_bld_debug.h" #include "lp_bld_arit.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index 9511299..5d5ca7a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -71,7 +71,6 @@ #include "pipe/p_state.h" #include "lp_bld_type.h" -#include "lp_bld_const.h" #include "lp_bld_arit.h" #include "lp_bld_blend.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 9935209..ebf554c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -63,11 +63,9 @@ #include "util/u_debug.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_intr.h" #include "lp_bld_arit.h" #include "lp_bld_pack.h" #include "lp_bld_conv.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 10e82f1..dfa080b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -38,7 +38,6 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_format.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index fb1eda4..85e3b1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -47,13 +47,11 @@ #include "tgsi/tgsi_exec.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_intr.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" -#include "lp_bld_debug.h" #define LP_MAX_TEMPS 256 diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index c152b44..a031619 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -33,8 +33,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" #include "util/u_prim.h" #include "lp_buffer.h" diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index cd8381f..6c81012 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -37,8 +37,6 @@ #include "lp_surface.h" #include "lp_state.h" #include "lp_tile_cache.h" -#include "lp_tex_cache.h" -#include "lp_winsys.h" void diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 4ef0783..9e0118c 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,7 +39,6 @@ #include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" -#include "lp_bld_misc.h" #include "lp_jit.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9f4bbef..70e2a08 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -62,7 +62,6 @@ #include "util/u_memory.h" #include "util/u_format.h" #include "util/u_debug_dump.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" @@ -85,7 +84,6 @@ #include "lp_context.h" #include "lp_buffer.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_tex_sample.h" #include "lp_debug.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index e37ff04..3b08b0d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -30,7 +30,6 @@ #include "lp_context.h" #include "lp_state.h" -#include "lp_surface.h" #include "lp_tile_cache.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 1a17631..57ac25e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -31,7 +31,6 @@ #include "lp_context.h" #include "lp_state.h" -#include "lp_surface.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 6c29e8d..7b65bab 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -38,7 +38,6 @@ #include "lp_bld_type.h" -#include "lp_bld_arit.h" #include "lp_bld_blend.h" #include "lp_bld_debug.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index a6d9a2c..8094625 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -38,7 +38,6 @@ #include "util/u_format.h" #include "util/u_math.h" #include "lp_context.h" -#include "lp_surface.h" #include "lp_texture.h" #include "lp_tex_cache.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c new file mode 100644 index 0000000..d1f5d95 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -0,0 +1,1712 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_texture.h" +#include "lp_tex_sample.h" +#include "lp_tex_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - util_ifloor(f)) + + +/** + * Linear interpolation macro + */ +static INLINE float +lerp(float a, float v0, float v1) +{ + return v0 + a * (v1 - v0); +} + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = lerp(a, v00, v10); + const float temp1 = lerp(a, v01, v11); + return lerp(b, temp0, temp1); +} + + +/** + * As above, but 3D interpolation of 8 values. + */ +static INLINE float +lerp_3d(float a, float b, float c, + float v000, float v100, float v010, float v110, + float v001, float v101, float v011, float v111) +{ + const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); + const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); + return lerp(c, temp0, temp1); +} + + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture indexes + * for a vector of four texcoords (S or T or P). + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the incoming texcoords + * \param size the texture image size + * \param icoord returns the integer texcoords + * \return integer texture index + */ +static INLINE void +nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = REMAINDER(i, size); + } + return; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = fabsf(s[ch]); + if (u <= 0.0F) + icoord[ch] = 0; + else if (u >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + default: + assert(0); + } +} + + +/** + * Used to compute texel locations for linear sampling for four texcoords. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoords + * \param size the texture image size + * \param icoord0 returns first texture indexes + * \param icoord1 returns second texture indexes (usually icoord0 + 1) + * \param w returns blend factor/weight between texture indexes + * \param icoord returns the computed integer texture coords + */ +static INLINE void +linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = REMAINDER(util_ifloor(u), size); + icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE void +nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); + } + return; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break; + default: + assert(0); + } +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float rho, lambda; + + if (samp->processor == TGSI_PROCESSOR_VERTEX) + return lodbias; + + assert(sampler->normalized_coords); + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + rho = MAX2(dsdx, dsdy) * texture->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + max = MAX2(dtdx, dtdy) * texture->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = fabsf(dpdx); + dpdy = fabsf(dpdy); + max = MAX2(dpdx, dpdy) * texture->depth[0]; + rho = MAX2(rho, max); + } + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + * \param level0 Returns first mipmap level to sample from + * \param level1 Returns second mipmap level to sample from + * \param levelBlend Returns blend factor between levels, in [0,1] + * \param imgFilter Returns either the min or mag filter, depending on lambda + */ +static void +choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *level0 = *level1 = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->min_img_filter != sampler->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + if (lambda <= 0.0) { + *imgFilter = sampler->mag_img_filter; + } + else { + *imgFilter = sampler->min_img_filter; + } + } + else { + *imgFilter = sampler->mag_img_filter; + } + } + else { + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into lp_tile_cache.c and merge with the + * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, + const uint8_t *out[4]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + + const struct llvmpipe_cached_tex_tile *tile + = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, 0, face, level)); + + y %= TEX_TILE_SIZE; + x %= TEX_TILE_SIZE; + + out[0] = &tile->color[y ][x ][0]; + out[1] = &tile->color[y ][x+1][0]; + out[2] = &tile->color[y+1][x ][0]; + out[3] = &tile->color[y+1][x+1][0]; +} + +static INLINE const uint8_t * +get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + + const struct llvmpipe_cached_tex_tile *tile + = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, 0, face, level)); + + y %= TEX_TILE_SIZE; + x %= TEX_TILE_SIZE; + + return &tile->color[y][x][0]; +} + + +static void +get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, + int x0, int y0, + int x1, int y1, + const uint8_t *out[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + unsigned tx = (i & 1) ? x1 : x0; + unsigned ty = (i >> 1) ? y1 : y0; + + out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); + } +} + +static void +get_texel(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + rgba[0][j] = sampler->border_color[0]; + rgba[1][j] = sampler->border_color[1]; + rgba[2][j] = sampler->border_color[2]; + rgba[3][j] = sampler->border_color[3]; + } + else { + const unsigned tx = x % TEX_TILE_SIZE; + const unsigned ty = y % TEX_TILE_SIZE; + const struct llvmpipe_cached_tex_tile *tile; + + tile = lp_get_cached_tex_tile(samp->cache, + tex_tile_address(x, y, z, face, level)); + + rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); + rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); + rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); + rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); + if (0) + { + debug_printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], + pf_name(texture->format)); + } + } +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + * \param rgba quad of (depth) texel values + * \param p texture 'P' components for four pixels in quad + * \param j which pixel in the quad to test [0..3] + */ +static INLINE void +shadow_compare(const struct pipe_sampler_state *sampler, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + k = 0; + assert(0); + break; + } + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; + rgba[3][j] = 1.0F; +} + + +/** + * As above, but do four z/texture comparisons. + */ +static INLINE void +shadow_compare4(const struct pipe_sampler_state *sampler, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE]) +{ + int j, k0, k1, k2, k3; + float val; + + /* compare four texcoords vs. four texture samples */ + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k0 = p[0] < rgba[0][0]; + k1 = p[1] < rgba[0][1]; + k2 = p[2] < rgba[0][2]; + k3 = p[3] < rgba[0][3]; + break; + case PIPE_FUNC_LEQUAL: + k0 = p[0] <= rgba[0][0]; + k1 = p[1] <= rgba[0][1]; + k2 = p[2] <= rgba[0][2]; + k3 = p[3] <= rgba[0][3]; + break; + case PIPE_FUNC_GREATER: + k0 = p[0] > rgba[0][0]; + k1 = p[1] > rgba[0][1]; + k2 = p[2] > rgba[0][2]; + k3 = p[3] > rgba[0][3]; + break; + case PIPE_FUNC_GEQUAL: + k0 = p[0] >= rgba[0][0]; + k1 = p[1] >= rgba[0][1]; + k2 = p[2] >= rgba[0][2]; + k3 = p[3] >= rgba[0][3]; + break; + case PIPE_FUNC_EQUAL: + k0 = p[0] == rgba[0][0]; + k1 = p[1] == rgba[0][1]; + k2 = p[2] == rgba[0][2]; + k3 = p[3] == rgba[0][3]; + break; + case PIPE_FUNC_NOTEQUAL: + k0 = p[0] != rgba[0][0]; + k1 = p[1] != rgba[0][1]; + k2 = p[2] != rgba[0][2]; + k3 = p[3] != rgba[0][3]; + break; + case PIPE_FUNC_ALWAYS: + k0 = k1 = k2 = k3 = 1; + break; + case PIPE_FUNC_NEVER: + k0 = k1 = k2 = k3 = 0; + break; + default: + k0 = k1 = k2 = k3 = 0; + assert(0); + break; + } + + /* convert four pass/fail values to an intensity in [0,1] */ + val = 0.25F * (k0 + k1 + k2 + k3); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for (j = 0; j < 4; j++) { + rgba[0][j] = rgba[1][j] = rgba[2][j] = val; + rgba[3][j] = 1.0F; + } +} + + + +static void +lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ + unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + float xw = u - (float)uflr; + float yw = v - (float)vflr; + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const uint8_t *tx[4]; + + + /* Can we fetch all four at once: + */ + if (x0 < xmax && y0 < ymax) + { + get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); + } + else + { + unsigned x1 = (x0 + 1) & (xpot - 1); + unsigned y1 = (y0 + 1) & (ypot - 1); + get_texel_quad_2d_mt(tgsi_sampler, 0, level, + x0, y0, x1, y1, tx); + } + + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw, yw, + ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), + ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); + } + } +} + + +static void +lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = ubyte_to_float(out[c]); + } + } +} + + +static void +lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int x0, y0; + const uint8_t *out; + + x0 = util_ifloor(u); + if (x0 < 0) + x0 = 0; + else if (x0 > xpot - 1) + x0 = xpot - 1; + + y0 = util_ifloor(v); + if (y0 < 0) + y0 = 0; + else if (y0 > ypot - 1) + y0 = ypot - 1; + + out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = ubyte_to_float(out[c]); + } + } +} + + +static void +lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; + + lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + level0 = (int)lambda; + + if (lambda < 0.0) { + samp->level = 0; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba ); + } + else if (level0 >= texture->last_level) { + samp->level = texture->last_level; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba ); + } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + samp->level = level0; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba0 ); + + samp->level = level0+1; + lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); + } + } + } +} + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend = 0.0F; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(sampler, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, + rgba2, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(sampler, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare4(sampler, tx, p); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], + tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + + /* XXX: This is incorrect -- will often end up with (x0 + * == x1 && y0 == y1), meaning that we fetch the same + * texel four times and linearly interpolate between + * identical values. The correct approach would be to + * call linear_texcoord again for the second level. + */ + x0[j] /= 2; + y0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare4(sampler, tx, p); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static INLINE void +lp_get_samples_1d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + lp_get_samples_2d_common(sampler, s, tzero, NULL, + lodbias, rgba, faces); +} + + +static INLINE void +lp_get_samples_2d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + lp_get_samples_2d_common(sampler, s, t, p, + lodbias, rgba, faces); +} + + +static INLINE void +lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4], z[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + nearest_texcoord_4(sampler->wrap_r, p, depth, z); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + z[j] /= 2; + get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + float tx0[4][4], tx1[4][4]; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + z0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + z1[j] /= 2; + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static void +lp_get_samples_cube(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + lp_get_samples_2d_common(sampler, ssss, tttt, NULL, + lodbias, rgba, faces); +} + + +static void +lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + const uint face = 0; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); + nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(sampler, rgba, p, j); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare4(sampler, tx, p); + } + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Error condition handler + */ +static INLINE void +lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + int i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + rgba[i][j] = 1.0; +} + +/** + * Called via tgsi_sampler::get_samples() when using a sampler for the + * first time. Determine the actual sampler function, link it in and + * call it. + */ +void +lp_get_samples(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + /* Default to the 'undefined' case: + */ + tgsi_sampler->get_samples = lp_get_samples_null; + + if (!texture) { + assert(0); /* is this legal?? */ + goto out; + } + + if (!sampler->normalized_coords) { + assert (texture->target == PIPE_TEXTURE_2D); + tgsi_sampler->get_samples = lp_get_samples_rect; + goto out; + } + + switch (texture->target) { + case PIPE_TEXTURE_1D: + tgsi_sampler->get_samples = lp_get_samples_1d; + break; + case PIPE_TEXTURE_2D: + tgsi_sampler->get_samples = lp_get_samples_2d; + break; + case PIPE_TEXTURE_3D: + tgsi_sampler->get_samples = lp_get_samples_3d; + break; + case PIPE_TEXTURE_CUBE: + tgsi_sampler->get_samples = lp_get_samples_cube; + break; + default: + assert(0); + break; + } + + /* Do this elsewhere: + */ + samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); + samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + + /* Try to hook in a faster sampler. Ultimately we'll have to + * code-generate these. Luckily most of this looks like it is + * orthogonal state within the sampler. + */ + if (texture->target == PIPE_TEXTURE_2D && + sampler->min_img_filter == sampler->mag_img_filter && + sampler->wrap_s == sampler->wrap_t && + sampler->compare_mode == FALSE && + sampler->normalized_coords) + { + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + samp->level = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; + break; + case PIPE_TEX_FILTER_LINEAR: + tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; + break; + default: + break; + } + } + else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; + break; + default: + break; + } + } + } + else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; + break; + default: + break; + } + } + } + } + else if (0) { + _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", + texture->target, PIPE_TEXTURE_2D, + sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, + sampler->min_img_filter, sampler->mag_img_filter, + sampler->wrap_s, sampler->wrap_t, + sampler->compare_mode, FALSE, + sampler->normalized_coords, TRUE); + } + +out: + tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); +} + + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index d2a6ae2..5138ccf 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -44,7 +44,6 @@ #include "pipe/p_shader_tokens.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" -#include "lp_bld_intr.h" #include "lp_bld_sample.h" #include "lp_bld_tgsi.h" #include "lp_state.h" diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 2c13502..039539d 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -42,7 +42,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tex_cache.h" #include "lp_screen.h" #include "lp_winsys.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 7a1ecf5..971d933 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -38,8 +38,6 @@ #include "util/u_tile.h" #include "util/u_rect.h" #include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" #include "lp_tile_soa.h" #include "lp_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 8e01793..73e075f 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -43,7 +43,6 @@ #include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" -#include "sp_texture.h" #include "sp_winsys.h" #include "sp_query.h" diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 75dac81..e8952bf 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -34,11 +34,9 @@ #include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" -#include "sp_surface.h" #include "sp_state.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" -#include "sp_winsys.h" void diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 5812d1e..98c08ea 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -526,7 +526,8 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - align_free(cvbr->vertex_buffer); + if(cvbr->vertex_buffer) + align_free(cvbr->vertex_buffer); sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index d9babe8..3b8c2d5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -35,7 +35,6 @@ #include "util/u_memory.h" #include "sp_context.h" #include "sp_quad.h" -#include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_quad_pipe.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 0ca86c4..a981775 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -30,11 +30,11 @@ */ #include "pipe/p_defines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "tgsi/tgsi_scan.h" #include "sp_context.h" #include "sp_quad.h" -#include "sp_surface.h" #include "sp_quad_pipe.h" #include "sp_tile_cache.h" #include "sp_state.h" /* for sp_fragment_shader */ @@ -651,6 +651,20 @@ static unsigned mask_count[16] = +/** helper to get number of Z buffer bits */ +static unsigned +get_depth_bits(struct quad_stage *qs) +{ + struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf; + if (zsurf) + return util_format_get_component_bits(zsurf->format, + UTIL_FORMAT_COLORSPACE_ZS, 0); + else + return 0; +} + + + static void depth_test_quads_fallback(struct quad_stage *qs, struct quad_header *quads[], @@ -666,7 +680,7 @@ depth_test_quads_fallback(struct quad_stage *qs, nr = alpha_test_quads(qs, quads, nr); } - if (qs->softpipe->framebuffer.zsbuf && + if (get_depth_bits(qs) > 0 && (qs->softpipe->depth_stencil->depth.enabled || qs->softpipe->depth_stencil->stencil[0].enabled)) { @@ -884,7 +898,7 @@ choose_depth_test(struct quad_stage *qs, boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; - boolean depth = (qs->softpipe->framebuffer.zsbuf && + boolean depth = (get_depth_bits(qs) > 0 && qs->softpipe->depth_stencil->depth.enabled); unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; @@ -895,7 +909,6 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; - if (!alpha && !depth && !stencil) { diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1e7533d..e799df1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -45,8 +45,6 @@ #include "sp_state.h" #include "sp_quad.h" #include "sp_quad_pipe.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" struct quad_shade_stage diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 3da7536..f6c3a2b 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -41,7 +41,6 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index f615410..3946678 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -30,7 +30,6 @@ #include "sp_context.h" #include "sp_state.h" -#include "sp_surface.h" #include "sp_tile_cache.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 46b6991..b491d92 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -31,7 +31,6 @@ #include "sp_context.h" #include "sp_state.h" -#include "sp_surface.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index e50a76a..50242d5 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -37,7 +37,6 @@ #include "util/u_tile.h" #include "util/u_math.h" #include "sp_context.h" -#include "sp_surface.h" #include "sp_texture.h" #include "sp_tex_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 45e80c5..a5fff91 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -38,7 +38,6 @@ #include "util/u_memory.h" #include "sp_context.h" -#include "sp_state.h" #include "sp_texture.h" #include "sp_screen.h" #include "sp_winsys.h" diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 75492df..6b6ebc9 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -26,7 +26,6 @@ #include "svga_cmd.h" #include "pipe/p_inlines.h" -#include "util/u_prim.h" #include "indices/u_indices.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index 167d817..022b444 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -24,7 +24,6 @@ **********************************************************/ #include "pipe/p_inlines.h" -#include "util/u_prim.h" #include "util/u_upload_mgr.h" #include "indices/u_indices.h" diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index 855d228..3ad3f97 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -29,7 +29,6 @@ #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index ca2c7c4..93022f3 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -30,9 +30,6 @@ #include "tgsi/tgsi_parse.h" #include "svga_context.h" -#include "svga_state.h" -#include "svga_hw_reg.h" -#include "svga_cmd.h" /*********************************************************************** * Constant buffers diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c index df636c0..34e60cb 100644 --- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -29,7 +29,6 @@ #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 0f24ef4..4e0c499 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -33,7 +33,6 @@ #include "svga_hw_reg.h" #include "svga_context.h" #include "svga_screen.h" -#include "svga_winsys.h" #include "svga_draw.h" #include "svga_state.h" #include "svga_swtnl.h" diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index 0becb07..3eb1033 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -28,13 +28,8 @@ #include "svga_screen_texture.h" #include "svga_context.h" #include "svga_winsys.h" -#include "svga_draw.h" #include "svga_debug.h" -#include "svga_hw_reg.h" - - - static void svga_flush( struct pipe_context *pipe, unsigned flags, diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index 5f1213e..32f07fb 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -32,11 +32,9 @@ #include "svga_screen.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" -#include "svga_draw.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 58cb1e6..8cf1f2e 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -27,12 +27,6 @@ #include "svga_context.h" #include "svga_screen_texture.h" -#include "svga_state.h" -#include "svga_winsys.h" - -#include "svga_hw_reg.h" - - static void svga_set_scissor_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 01336b0..08283e3 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -32,7 +32,6 @@ #include "svga_screen.h" #include "svga_screen_buffer.h" #include "svga_winsys.h" -#include "svga_draw.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index b03f8eb..9ea11aa 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -30,7 +30,6 @@ #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 460a101..161c66d 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -32,9 +32,6 @@ #include "svga_context.h" #include "svga_screen_texture.h" -#include "svga_state.h" - -#include "svga_hw_reg.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 42f290d..0bf43fa 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -32,10 +32,6 @@ #include "svga_screen.h" #include "svga_screen_buffer.h" #include "svga_context.h" -#include "svga_state.h" -#include "svga_winsys.h" - -#include "svga_hw_reg.h" static void svga_set_vertex_buffers(struct pipe_context *pipe, diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index 7e6ab57..c4ac530 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -33,7 +33,6 @@ #include "svga_screen.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index fc1b3c9..cd1ed7b 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -33,10 +33,8 @@ #include "svga_screen.h" #include "svga_screen_texture.h" #include "svga_screen_buffer.h" -#include "svga_cmd.h" #include "svga_debug.h" -#include "svga_hw_reg.h" #include "svga3d_shaderdefs.h" @@ -393,8 +391,6 @@ svga_screen_create(struct svga_winsys_screen *sws) pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); - LIST_INITHEAD(&svgascreen->cached_buffers); - svga_screen_cache_init(svgascreen); return screen; diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index b94ca7f..a009b60 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -68,12 +68,6 @@ struct svga_screen pipe_mutex tex_mutex; pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */ - /** - * List of buffers with cached GMR. Ordered from the most recently used to - * the least recently used - */ - struct list_head cached_buffers; - struct svga_host_surface_cache cache; }; diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 58a1aba..430a697 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -113,68 +113,9 @@ svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) if(sbuf->hw.buf) { sws->buffer_destroy(sws, sbuf->hw.buf); sbuf->hw.buf = NULL; - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); -#ifdef DEBUG - sbuf->head.next = sbuf->head.prev = NULL; -#endif } } -static INLINE enum pipe_error -svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf) -{ - if (sbuf->hw.buf && sbuf->hw.num_ranges) { - void *src; - - if (!sbuf->swbuf) - sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); - if (!sbuf->swbuf) - return PIPE_ERROR_OUT_OF_MEMORY; - - src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, - PIPE_BUFFER_USAGE_CPU_READ); - if (!src) - return PIPE_ERROR; - - memcpy(sbuf->swbuf, src, sbuf->base.size); - ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); - } - - return PIPE_OK; -} - -/** - * Try to make GMR space available by freeing the hardware storage of - * unmapped - */ -boolean -svga_buffer_free_cached_hw_storage(struct svga_screen *ss) -{ - struct list_head *curr; - struct svga_buffer *sbuf; - enum pipe_error ret = PIPE_OK; - - curr = ss->cached_buffers.prev; - - /* free the least recently used buffer's hw storage which is not mapped */ - do { - if(curr == &ss->cached_buffers) - return FALSE; - - sbuf = LIST_ENTRY(struct svga_buffer, curr, head); - - curr = curr->prev; - if (sbuf->map.count == 0) - ret = svga_buffer_backup(ss, sbuf); - - } while(sbuf->map.count != 0 || ret != PIPE_OK); - - svga_buffer_destroy_hw_storage(ss, sbuf); - - return TRUE; -} - struct svga_winsys_buffer * svga_winsys_buffer_create( struct svga_screen *ss, unsigned alignment, @@ -195,12 +136,6 @@ svga_winsys_buffer_create( struct svga_screen *ss, svga_screen_flush(ss, NULL); buf = sws->buffer_create(sws, alignment, usage, size); - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", - size); - - /* Try evicing all buffer storage */ - while(!buf && svga_buffer_free_cached_hw_storage(ss)) - buf = sws->buffer_create(sws, alignment, usage, size); } return buf; @@ -226,8 +161,6 @@ svga_buffer_create_hw_storage(struct svga_screen *ss, return PIPE_ERROR_OUT_OF_MEMORY; assert(!sbuf->needs_flush); - assert(!sbuf->head.prev && !sbuf->head.next); - LIST_ADD(&sbuf->head, &ss->cached_buffers); } return PIPE_OK; @@ -311,7 +244,6 @@ static void svga_buffer_upload_flush(struct svga_context *svga, struct svga_buffer *sbuf) { - struct svga_screen *ss = svga_screen(svga->pipe.screen); SVGA3dCopyBox *boxes; unsigned i; @@ -348,13 +280,16 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(sbuf->head.prev && sbuf->head.next); LIST_DEL(&sbuf->head); +#ifdef DEBUG + sbuf->head.next = sbuf->head.prev = NULL; +#endif sbuf->needs_flush = FALSE; - /* XXX: do we care about cached_buffers any more ?*/ - LIST_ADD(&sbuf->head, &ss->cached_buffers); sbuf->hw.svga = NULL; sbuf->hw.boxes = NULL; + sbuf->host_written = TRUE; + /* Decrement reference count */ pipe_reference(&(sbuf->base.reference), NULL); sbuf = NULL; @@ -437,17 +372,17 @@ svga_buffer_map_range( struct pipe_screen *screen, } else { if(!sbuf->hw.buf) { - struct svga_winsys_surface *handle = sbuf->handle; - if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) return NULL; /* Populate the hardware storage if the host surface pre-existed */ - if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) { + if(sbuf->host_written) { SVGA3dSurfaceDMAFlags flags; enum pipe_error ret; struct pipe_fence_handle *fence = NULL; + assert(sbuf->handle); + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", sbuf->handle, 0, sbuf->base.size); @@ -478,17 +413,6 @@ svga_buffer_map_range( struct pipe_screen *screen, sws->fence_reference(sws, &fence, NULL); } } - else { - if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) { - /* We already had the hardware storage but we would have to issue - * a download if we hadn't, so move the buffer to the begginning - * of the LRU list. - */ - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); - LIST_ADD(&sbuf->head, &ss->cached_buffers); - } - } map = sws->buffer_map(sws, sbuf->hw.buf, usage); } @@ -572,10 +496,8 @@ svga_buffer_destroy( struct pipe_buffer *buf ) assert(!sbuf->needs_flush); - if(sbuf->handle) { - SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size); - svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); - } + if(sbuf->handle) + svga_buffer_destroy_host_surface(ss, sbuf); if(sbuf->hw.buf) svga_buffer_destroy_hw_storage(ss, sbuf); @@ -595,6 +517,9 @@ svga_buffer_create(struct pipe_screen *screen, struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; + assert(size); + assert(alignment); + sbuf = CALLOC_STRUCT(svga_buffer); if(!sbuf) goto error1; @@ -755,8 +680,7 @@ svga_buffer_handle(struct svga_context *svga, assert(sbuf->hw.svga == svga); sbuf->needs_flush = TRUE; - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); + assert(!sbuf->head.prev && !sbuf->head.next); LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); } diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h index 5d7af5a..448ac10 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.h +++ b/src/gallium/drivers/svga/svga_screen_buffer.h @@ -135,6 +135,11 @@ struct svga_buffer */ struct svga_winsys_surface *handle; + /** + * Whether the host has been ever written. + */ + boolean host_written; + struct { unsigned count; boolean writing; @@ -178,9 +183,6 @@ svga_buffer_handle(struct svga_context *svga, void svga_context_flush_buffers(struct svga_context *svga); -boolean -svga_buffer_free_cached_hw_storage(struct svga_screen *ss); - struct svga_winsys_buffer * svga_winsys_buffer_create(struct svga_screen *ss, unsigned alignment, diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 2224c2d..0d69007 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -306,11 +306,19 @@ svga_texture_create(struct pipe_screen *screen, tex->key.numFaces = 1; } + tex->key.cachable = 1; + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; - if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if(templat->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + tex->key.cachable = 0; + } + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) { tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; + tex->key.cachable = 0; + } /* * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot @@ -333,8 +341,6 @@ svga_texture_create(struct pipe_screen *screen, if(tex->key.format == SVGA3D_FORMAT_INVALID) goto error2; - tex->key.cachable = 1; - SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle); tex->handle = svga_screen_surface_create(svgascreen, &tex->key); if (tex->handle) @@ -416,6 +422,62 @@ svga_texture_blanket(struct pipe_screen * screen, } +struct pipe_texture * +svga_screen_texture_wrap_surface(struct pipe_screen *screen, + struct pipe_texture *base, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf) +{ + struct svga_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth0 != 1) { + return NULL; + } + + if (!srf) + return NULL; + + if (svga_translate_format(base->format) != format) { + unsigned f1 = svga_translate_format(base->format); + unsigned f2 = format; + + /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ + if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || + (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) { + debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); + return NULL; + } + } + + tex = CALLOC_STRUCT(svga_texture); + if (!tex) + return NULL; + + tex->base = *base; + + + if (format == 1) + tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; + else if (format == 2) + tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; + + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf); + + tex->key.cachable = 0; + tex->handle = srf; + + return &tex->base; +} + + static void svga_texture_destroy(struct pipe_texture *pt) { diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index cfdcae4..eda1aef 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -32,8 +32,6 @@ #include "svga_cmd.h" #include "svga_debug.h" -#include "svga_hw_reg.h" - /*********************************************************************** * Hardware state update diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c index 8b6803a..2f9adae 100644 --- a/src/gallium/drivers/svga/svga_state_rss.c +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -31,9 +31,6 @@ #include "svga_state.h" #include "svga_cmd.h" -#include "svga_hw_reg.h" - - struct rs_queue { unsigned rs_count; diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index b313794..b3c9687 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -33,8 +33,6 @@ #include "svga_state.h" #include "svga_cmd.h" -#include "svga_hw_reg.h" - void svga_cleanup_tss_binding(struct svga_context *svga) { diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index b4f757a..aafb3e2 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -31,7 +31,6 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_simple_shaders.h" #include "svga_context.h" #include "svga_state.h" @@ -87,13 +86,13 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, if (!svga_render->vbuf) { svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); svga_render->vbuf = pipe_buffer_create(screen, - 0, + 16, PIPE_BUFFER_USAGE_VERTEX, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); svga_render->vbuf = pipe_buffer_create(screen, - 0, + 16, PIPE_BUFFER_USAGE_VERTEX, svga_render->vbuf_size); assert(svga_render->vbuf); @@ -123,7 +122,9 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render ) char *ptr = (char*)pipe_buffer_map(screen, svga_render->vbuf, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); return ptr + svga_render->vbuf_offset; } @@ -259,14 +260,14 @@ svga_vbuf_render_draw( struct vbuf_render *render, if (!svga_render->ibuf) { svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, - 0, + 2, PIPE_BUFFER_USAGE_VERTEX, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } - pipe_buffer_write(screen, svga_render->ibuf, - svga_render->ibuf_offset, 2 * nr_indices, indices); + pipe_buffer_write_nooverlap(screen, svga_render->ibuf, + svga_render->ibuf_offset, 2 * nr_indices, indices); /* off to hardware */ diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 7655121..0ae58c7 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -27,7 +27,6 @@ #include "draw/draw_vbuf.h" #include "pipe/p_inlines.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "svga_context.h" #include "svga_swtnl.h" diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 94b6ccc..fe03e20 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -27,7 +27,6 @@ #include "draw/draw_vbuf.h" #include "pipe/p_inlines.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "svga_context.h" #include "svga_swtnl.h" diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c index 23b3ace..1ae9906 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -29,9 +29,6 @@ #include "util/u_memory.h" #include "svga_tgsi_emit.h" -#include "svga_context.h" - - static boolean ps20_input( struct svga_shader_emitter *emit, diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index d1c7336..43fc0d3 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -29,7 +29,6 @@ #include "util/u_memory.h" #include "svga_tgsi_emit.h" -#include "svga_context.h" static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic, unsigned *usage, diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 59f299c..27b99fe 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -296,4 +296,10 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture, struct pipe_buffer **buffer, unsigned *stride); +struct pipe_texture * +svga_screen_texture_wrap_surface(struct pipe_screen *screen, + struct pipe_texture *base, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf); + #endif /* SVGA_WINSYS_H_ */ diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 48d1c40..e7ca3a8 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -173,6 +173,7 @@ trace_drm_create(struct drm_api *api) if (!tr_api) goto error; + tr_api->base.driver_name = api->driver_name; tr_api->base.create_screen = trace_drm_create_screen; tr_api->base.create_context = trace_drm_create_context; tr_api->base.texture_from_shared_handle = trace_drm_texture_from_shared_handle; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 5fbd62a..72f5c1d 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -63,13 +63,6 @@ pipe_buffer_map(struct pipe_screen *screen, if(screen->buffer_map_range) { unsigned offset = 0; unsigned length = buf->size; - - /* XXX: Actually we should be using/detecting DISCARD - * instead of assuming that WRITE implies discard */ - if((usage & PIPE_BUFFER_USAGE_CPU_WRITE) && - !(usage & PIPE_BUFFER_USAGE_DISCARD)) - usage |= PIPE_BUFFER_USAGE_CPU_READ; - return screen->buffer_map_range(screen, buf, offset, length, usage); } else @@ -126,7 +119,39 @@ pipe_buffer_write(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD); + assert(map); + if(map) { + memcpy((uint8_t *)map + offset, data, size); + pipe_buffer_flush_mapped_range(screen, buf, offset, size); + pipe_buffer_unmap(screen, buf); + } +} + +/** + * Special case for writing non-overlapping ranges. + * + * We can avoid GPU/CPU synchronization when writing range that has never + * been written before. + */ +static INLINE void +pipe_buffer_write_nooverlap(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned size, + const void *data) +{ + void *map; + + assert(offset < buf->size); + assert(offset + size <= buf->size); + assert(size); + + map = pipe_buffer_map_range(screen, buf, offset, size, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); assert(map); if(map) { memcpy((uint8_t *)map + offset, data, size); diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h index bb92892..b248a81 100644 --- a/src/gallium/include/state_tracker/drm_api.h +++ b/src/gallium/include/state_tracker/drm_api.h @@ -31,6 +31,11 @@ struct drm_api const char *name; /** + * Kernel driver name, as accepted by drmOpenByName. + */ + const char *driver_name; + + /** * Special buffer functions */ /*@{*/ diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index f2e5f3f..07f0554 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -101,6 +101,12 @@ dri_destroy_context(__DRIcontext * cPriv) { struct dri_context *ctx = dri_context(cPriv); + /* note: we are freeing values and nothing more because + * driParseConfigFiles allocated values only - the rest + * is owned by screen optionCache. + */ + FREE(ctx->optionCache.values); + /* No particular reason to wait for command completion before * destroying a context, but it is probably worthwhile flushing it * to avoid having to add code elsewhere to cope with flushing a diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 0fdfa96..28fd8de 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -123,11 +123,12 @@ dri_get_buffers(__DRIdrawable * dPriv) struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_surface *surface = NULL; - struct pipe_screen *screen = dri_screen(drawable->sPriv)->pipe_screen; + struct dri_screen *st_screen = dri_screen(drawable->sPriv); + struct pipe_screen *screen = st_screen->pipe_screen; __DRIbuffer *buffers = NULL; __DRIscreen *dri_screen = drawable->sPriv; __DRIdrawable *dri_drawable = drawable->dPriv; - struct drm_api *api = ((struct dri_screen*)(dri_screen->private))->api; + struct drm_api *api = st_screen->api; boolean have_depth = FALSE; int i, count; @@ -180,7 +181,9 @@ dri_get_buffers(__DRIdrawable * dPriv) switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: - continue; + if (!st_screen->auto_fake_front) + continue; + /* fallthrough */ case __DRI_BUFFER_FAKE_FRONT_LEFT: index = ST_SURFACE_FRONT_LEFT; format = drawable->color_format; @@ -373,8 +376,8 @@ dri_create_buffer(__DRIscreen * sPriv, /* TODO incase of double buffer visual, delay fake creation */ i = 0; drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; - + if (!screen->auto_fake_front) + drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; if (visual->doubleBufferMode) drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; if (visual->depthBits && visual->stencilBits) diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index d8c0543..cdc8eb1 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -292,6 +292,8 @@ dri_init_screen2(__DRIscreen * sPriv) { struct dri_screen *screen; struct drm_create_screen_arg arg; + const __DRIdri2LoaderExtension *dri2_ext = + sPriv->dri2.loader; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -317,6 +319,9 @@ dri_init_screen2(__DRIscreen * sPriv) driParseOptionInfo(&screen->optionCache, __driConfigOptions, __driNConfigOptions); + screen->auto_fake_front = dri2_ext->base.version >= 3 && + dri2_ext->getBuffersWithFormat != NULL; + return dri_fill_in_modes(screen, 32); fail: return NULL; @@ -326,8 +331,18 @@ static void dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); + int i; screen->pipe_screen->destroy(screen->pipe_screen); + + for (i = 0; i < (1 << screen->optionCache.tableSize); ++i) { + FREE(screen->optionCache.info[i].name); + FREE(screen->optionCache.info[i].ranges); + } + + FREE(screen->optionCache.info); + FREE(screen->optionCache.values); + FREE(screen); sPriv->private = NULL; } diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index 03387a0..75a0ee4 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -59,6 +59,7 @@ struct dri_screen struct pipe_screen *pipe_screen; boolean d_depth_bits_last; boolean sd_depth_bits_last; + boolean auto_fake_front; }; /** cast wrapper */ diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c index 54cc361..7d4c243 100644 --- a/src/gallium/state_trackers/wgl/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c @@ -95,8 +95,6 @@ stw_pf_depth_stencil[] = { { PIPE_FORMAT_Z24X8_UNORM, {24, 0} }, { PIPE_FORMAT_X8Z24_UNORM, {24, 0} }, { PIPE_FORMAT_Z16_UNORM, {16, 0} }, - /* pure stencil */ - { PIPE_FORMAT_S8_UNORM, { 0, 8} }, /* combined depth-stencil */ { PIPE_FORMAT_S8Z24_UNORM, {24, 8} }, { PIPE_FORMAT_Z24S8_UNORM, {24, 8} } @@ -220,7 +218,8 @@ stw_pixelformat_init( void ) const struct stw_pf_color_info *color = &stw_pf_color[j]; if(!screen->is_format_supported(screen, color->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) + PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET, 0)) continue; for(k = 0; k < Elements(stw_pf_doublebuffer); ++k) { diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index 1c248a6..0324441 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,10 +4,7 @@ #include "xorg_exa_tgsi.h" #include "cso_cache/cso_context.h" -#include "util/u_draw_quad.h" -#include "util/u_math.h" -#include "pipe/p_inlines.h" /*XXX also in Xrender.h but the including it here breaks compilition */ #define XFixedToDouble(f) (((double) (f)) / 65536.) diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index fd82f4f..e6a89c7 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -44,9 +44,12 @@ #include "util/u_rect.h" /* Make all the #if cases in the code esier to read */ -/* XXX can it be set to 1? */ #ifndef DRI2INFOREC_VERSION -#define DRI2INFOREC_VERSION 0 +#define DRI2INFOREC_VERSION 1 +#endif + +#if DRI2INFOREC_VERSION == 2 +static Bool set_format_in_do_create_buffer; #endif typedef struct { @@ -147,7 +150,9 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form buffer->driverPrivate = private; buffer->flags = 0; /* not tiled */ #if DRI2INFOREC_VERSION == 2 - ((DRI2Buffer2Ptr)buffer)->format = 0; + /* ABI forwards/backwards compatibility */ + if (set_format_in_do_create_buffer) + ((DRI2Buffer2Ptr)buffer)->format = 0; #elif DRI2INFOREC_VERSION >= 3 buffer->format = 0; #endif @@ -211,7 +216,9 @@ dri2_destroy_buffer(DrawablePtr pDraw, DRI2Buffer2Ptr buffer) xfree(buffer); } -#else /* DRI2INFOREC_VERSION < 2 */ +#endif /* DRI2INFOREC_VERSION >= 2 */ + +#if DRI2INFOREC_VERSION <= 2 static DRI2BufferPtr dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count) @@ -261,7 +268,7 @@ dri2_destroy_buffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count) } } -#endif /* DRI2INFOREC_VERSION >= 2 */ +#endif /* DRI2INFOREC_VERSION <= 2 */ static void dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion, @@ -369,12 +376,17 @@ xorg_dri2_init(ScreenPtr pScreen) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; modesettingPtr ms = modesettingPTR(pScrn); DRI2InfoRec dri2info; + int major, minor; + + if (xf86LoaderCheckSymbol("DRI2Version")) { + DRI2Version(&major, &minor); + } else { + /* Assume version 1.0 */ + major = 1; + minor = 0; + } -#if DRI2INFOREC_VERSION >= 2 dri2info.version = DRI2INFOREC_VERSION; -#else - dri2info.version = 1; -#endif dri2info.fd = ms->fd; dri2info.driverName = pScrn->driverName; @@ -383,7 +395,22 @@ xorg_dri2_init(ScreenPtr pScreen) #if DRI2INFOREC_VERSION >= 2 dri2info.CreateBuffer = dri2_create_buffer; dri2info.DestroyBuffer = dri2_destroy_buffer; -#else +#endif + + /* For X servers in the 1.6.x series there where two DRI2 version. + * This allows us to build one binary that works on both servers. + */ +#if DRI2INFOREC_VERSION == 2 + if (minor == 0) { + set_format_in_do_create_buffer = FALSE; + dri2info.CreateBuffers = dri2_create_buffers; + dri2info.DestroyBuffers = dri2_destroy_buffers; + } else + set_format_in_do_create_buffer = FALSE; +#endif + + /* For version 1 set these unconditionaly. */ +#if DRI2INFOREC_VERSION == 1 dri2info.CreateBuffers = dri2_create_buffers; dri2info.DestroyBuffers = dri2_destroy_buffers; #endif diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index b02fe68..41bfcd0 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -45,7 +45,6 @@ #include "miscstruct.h" #include "dixstruct.h" #include "xf86xv.h" -#include <X11/extensions/Xv.h> #ifndef XSERVER_LIBPCIACCESS #error "libpciaccess needed" #endif @@ -206,16 +205,41 @@ drv_init_drm(ScrnInfoPtr pScrn) ms->PciInfo->dev, ms->PciInfo->func ); - ms->fd = drmOpen(NULL, BusID); - if (ms->fd < 0) - return FALSE; + ms->api = drm_api_create(); + ms->fd = drmOpen(ms->api ? ms->api->driver_name : NULL, BusID); + xfree(BusID); + + if (ms->fd >= 0) + return TRUE; + + if (ms->api && ms->api->destroy) + ms->api->destroy(ms->api); + + ms->api = NULL; + + return FALSE; } return TRUE; } static Bool +drv_close_drm(ScrnInfoPtr pScrn) +{ + modesettingPtr ms = modesettingPTR(pScrn); + + if (ms->api && ms->api->destroy) + ms->api->destroy(ms->api); + ms->api = NULL; + + drmClose(ms->fd); + ms->fd = -1; + + return TRUE; +} + +static Bool drv_init_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); @@ -229,7 +253,6 @@ drv_init_resource_management(ScrnInfoPtr pScrn) if (ms->screen || ms->kms) return TRUE; - ms->api = drm_api_create(); if (ms->api) { ms->screen = ms->api->create_screen(ms->api, ms->fd, NULL); @@ -269,10 +292,6 @@ drv_close_resource_management(ScrnInfoPtr pScrn) } ms->screen = NULL; - if (ms->api && ms->api->destroy) - ms->api->destroy(ms->api); - ms->api = NULL; - #ifdef HAVE_LIBKMS if (ms->kms) kms_destroy(&ms->kms); @@ -823,8 +842,7 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) drv_close_resource_management(pScrn); - drmClose(ms->fd); - ms->fd = -1; + drv_close_drm(pScrn); pScrn->vtSema = FALSE; pScreen->CloseScreen = ms->CloseScreen; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index d9432ba..c91dee7 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -41,7 +41,6 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" #include "util/u_format.h" #include "util/u_rect.h" diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index bed17ca..3e5e6bd 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -6,11 +6,9 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" -#include "util/u_simple_shaders.h" #include "tgsi/tgsi_ureg.h" diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c index 251f331..13c3fb9 100644 --- a/src/gallium/state_trackers/xorg/xorg_output.c +++ b/src/gallium/state_trackers/xorg/xorg_output.c @@ -49,8 +49,6 @@ #include <X11/extensions/dpms.h> #endif -#include "X11/Xatom.h" - #include "xorg_tracker.h" static char *output_enum_list[] = { diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 5bf0e94..7bcf77e 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -11,7 +11,6 @@ #include "cso_cache/cso_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" #include "util/u_format.h" diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c index 450ae09..8c8176e 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c @@ -196,6 +196,7 @@ destroy(struct drm_api *api) struct drm_api intel_drm_api = { .name = "i915", + .driver_name = "i915", .create_context = intel_drm_create_context, .create_screen = intel_drm_create_screen, .texture_from_shared_handle = intel_drm_texture_from_shared_handle, diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 4b2c6a1..c9f39d8 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -255,6 +255,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen, struct drm_api drm_api_hooks = { .name = "nouveau", + .driver_name = "nouveau", .create_screen = nouveau_drm_create_screen, .create_context = nouveau_drm_create_context, .texture_from_shared_handle = nouveau_drm_pt_from_name, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 9552f0a..bff6fdc 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -270,6 +270,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, struct drm_api drm_api_hooks = { .name = "radeon", + .driver_name = "radeon", .create_screen = radeon_create_screen, .create_context = radeon_create_context, .texture_from_shared_handle = radeon_texture_from_shared_handle, diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c index b699758..b5fd4f5 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_context.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c @@ -41,9 +41,18 @@ #define VMW_COMMAND_SIZE (64*1024) #define VMW_SURFACE_RELOCS (1024) +#define VMW_REGION_RELOCS (512) #define VMW_MUST_FLUSH_STACK 8 +struct vmw_region_relocation +{ + struct SVGAGuestPtr *where; + struct pb_buffer *buffer; + /* TODO: put offset info inside where */ + uint32 offset; +}; + struct vmw_svga_winsys_context { struct svga_winsys_context base; @@ -69,10 +78,31 @@ struct vmw_svga_winsys_context uint32_t staged; uint32_t reserved; } surface; + + struct { + struct vmw_region_relocation relocs[VMW_REGION_RELOCS]; + uint32_t size; + uint32_t used; + uint32_t staged; + uint32_t reserved; + } region; struct pb_validate *validate; uint32_t last_fence; + + /** + * The amount of GMR that is referred by the commands currently batched + * in the context. + */ + uint32_t seen_regions; + + /** + * Whether this context should fail to reserve more commands, not because it + * ran out of command space, but because a substantial ammount of GMR was + * referred. + */ + boolean preemptive_flush; }; @@ -96,6 +126,19 @@ vmw_swc_flush(struct svga_winsys_context *swc, ret = pb_validate_validate(vswc->validate); assert(ret == PIPE_OK); if(ret == PIPE_OK) { + + /* Apply relocations */ + for(i = 0; i < vswc->region.used; ++i) { + struct vmw_region_relocation *reloc = &vswc->region.relocs[i]; + struct SVGAGuestPtr ptr; + + if(!vmw_gmr_bufmgr_region_ptr(reloc->buffer, &ptr)) + assert(0); + + ptr.offset += reloc->offset; + + *reloc->where = ptr; + } if (vswc->command.used) vmw_ioctl_command(vswc->vws, @@ -121,9 +164,18 @@ vmw_swc_flush(struct svga_winsys_context *swc, vswc->surface.used = 0; vswc->surface.reserved = 0; + for(i = 0; i < vswc->region.used + vswc->region.staged; ++i) { + pb_reference(&vswc->region.relocs[i].buffer, NULL); + } + + vswc->region.used = 0; + vswc->region.reserved = 0; + #ifdef DEBUG vswc->must_flush = FALSE; #endif + vswc->preemptive_flush = FALSE; + vswc->seen_regions = 0; if(pfence) *pfence = fence; @@ -151,8 +203,10 @@ vmw_swc_reserve(struct svga_winsys_context *swc, if(nr_bytes > vswc->command.size) return NULL; - if(vswc->command.used + nr_bytes > vswc->command.size || - vswc->surface.used + nr_relocs > vswc->surface.size) { + if(vswc->preemptive_flush || + vswc->command.used + nr_bytes > vswc->command.size || + vswc->surface.used + nr_relocs > vswc->surface.size || + vswc->region.used + nr_relocs > vswc->region.size) { #ifdef DEBUG vswc->must_flush = TRUE; debug_backtrace_capture(vswc->must_flush_stack, 1, @@ -163,11 +217,14 @@ vmw_swc_reserve(struct svga_winsys_context *swc, assert(vswc->command.used + nr_bytes <= vswc->command.size); assert(vswc->surface.used + nr_relocs <= vswc->surface.size); - + assert(vswc->region.used + nr_relocs <= vswc->region.size); + vswc->command.reserved = nr_bytes; vswc->surface.reserved = nr_relocs; vswc->surface.staged = 0; - + vswc->region.reserved = nr_relocs; + vswc->region.staged = 0; + return vswc->command.buffer + vswc->command.used; } @@ -206,20 +263,41 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, unsigned flags) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); - struct SVGAGuestPtr ptr; - struct pb_buffer *buf = vmw_pb_buffer(buffer); + struct vmw_region_relocation *reloc; enum pipe_error ret; + + assert(vswc->region.staged < vswc->region.reserved); - if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr)) - assert(0); - - ptr.offset += offset; + reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged]; + reloc->where = where; + pb_reference(&reloc->buffer, vmw_pb_buffer(buffer)); + reloc->offset = offset; - *where = ptr; + ++vswc->region.staged; - ret = pb_validate_add_buffer(vswc->validate, buf, flags); + ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, flags); /* TODO: Update pipebuffer to reserve buffers and not fail here */ assert(ret == PIPE_OK); + + /* + * Flush preemptively the FIFO commands to keep the GMR working set within + * the GMR pool size. + * + * This is necessary for applications like SPECviewperf that generate huge + * amounts of immediate vertex data, so that we don't pile up too much of + * that vertex data neither in the guest nor in the host. + * + * Note that in the current implementation if a region is referred twice in + * a command stream, it will be accounted twice. We could detect repeated + * regions and count only once, but there is no incentive to do that, since + * regions are typically short-lived; always referred in a single command; + * and at the worst we just flush the commands a bit sooner, which for the + * SVGA virtual device it's not a performance issue since flushing commands + * to the FIFO won't cause flushing in the host. + */ + vswc->seen_regions += reloc->buffer->base.size; + if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/2) + vswc->preemptive_flush = TRUE; } @@ -238,6 +316,12 @@ vmw_swc_commit(struct svga_winsys_context *swc) vswc->surface.used += vswc->surface.staged; vswc->surface.staged = 0; vswc->surface.reserved = 0; + + assert(vswc->region.staged <= vswc->region.reserved); + assert(vswc->region.used + vswc->region.staged <= vswc->region.size); + vswc->region.used += vswc->region.staged; + vswc->region.staged = 0; + vswc->region.reserved = 0; } @@ -246,6 +330,11 @@ vmw_swc_destroy(struct svga_winsys_context *swc) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); unsigned i; + + for(i = 0; i < vswc->region.used; ++i) { + pb_reference(&vswc->region.relocs[i].buffer, NULL); + } + for(i = 0; i < vswc->surface.used; ++i) { p_atomic_dec(&vswc->surface.handles[i]->validated); vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); @@ -279,6 +368,7 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) vswc->command.size = VMW_COMMAND_SIZE; vswc->surface.size = VMW_SURFACE_RELOCS; + vswc->region.size = VMW_REGION_RELOCS; vswc->validate = pb_validate_create(); if(!vswc->validate) { diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h index a875107..f1d6986 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen.h +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h @@ -40,6 +40,10 @@ #include "svga_winsys.h" + +#define VMW_GMR_POOL_SIZE (16*1024*1024) + + struct pb_manager; struct vmw_region; diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c index 4f5ccea..8be9d74 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c @@ -85,6 +85,23 @@ vmw_drm_create_screen(struct drm_api *drm_api, struct pipe_screen *screen; struct dri1_create_screen_arg *dri1; + if (!arg || arg->mode == DRM_CREATE_NORMAL) { + struct dri1_api_version drm_ver; + drmVersionPtr ver; + + ver = drmGetVersion(fd); + if (ver == NULL) + return NULL; + + drm_ver.major = ver->version_major; + drm_ver.minor = ver->version_minor; + + drmFreeVersion(ver); + if (!vmw_dri1_check_version(&drm_ver, &drm_required, + &drm_compat, "vmwgfx drm driver")) + return NULL; + } + if (arg != NULL) { switch (arg->mode) { case DRM_CREATE_NORMAL: @@ -220,22 +237,19 @@ vmw_dri1_present_locked(struct pipe_context *locked_pipe, vmw_svga_winsys_surface_reference(&vsrf, NULL); } -/** - * FIXME: We'd probably want to cache these buffers in the - * screen, based on handle. - */ - -static struct pipe_buffer * -vmw_drm_buffer_from_handle(struct drm_api *drm_api, - struct pipe_screen *screen, - const char *name, - unsigned handle) +static struct pipe_texture * +vmw_drm_texture_from_handle(struct drm_api *drm_api, + struct pipe_screen *screen, + struct pipe_texture *templat, + const char *name, + unsigned stride, + unsigned handle) { struct vmw_svga_winsys_surface *vsrf; struct svga_winsys_surface *ssrf; struct vmw_winsys_screen *vws = vmw_winsys_screen(svga_winsys_screen(screen)); - struct pipe_buffer *buf; + struct pipe_texture *tex; union drm_vmw_surface_reference_arg arg; struct drm_vmw_surface_arg *req = &arg.req; struct drm_vmw_surface_create_req *rep = &arg.rep; @@ -282,43 +296,28 @@ vmw_drm_buffer_from_handle(struct drm_api *drm_api, pipe_reference_init(&vsrf->refcnt, 1); p_atomic_set(&vsrf->validated, 0); + vsrf->screen = vws; vsrf->sid = handle; ssrf = svga_winsys_surface(vsrf); - buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf); - if (!buf) + tex = svga_screen_texture_wrap_surface(screen, templat, rep->format, ssrf); + if (!tex) vmw_svga_winsys_surface_reference(&vsrf, NULL); - return buf; + return tex; out_mip: vmw_ioctl_surface_destroy(vws, handle); return NULL; } -static struct pipe_texture * -vmw_drm_texture_from_handle(struct drm_api *drm_api, - struct pipe_screen *screen, - struct pipe_texture *templat, - const char *name, - unsigned stride, - unsigned handle) -{ - struct pipe_buffer *buffer; - buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle); - - if (!buffer) - return NULL; - - return screen->texture_blanket(screen, templat, &stride, buffer); -} - static boolean -vmw_drm_handle_from_buffer(struct drm_api *drm_api, +vmw_drm_handle_from_texture(struct drm_api *drm_api, struct pipe_screen *screen, - struct pipe_buffer *buffer, + struct pipe_texture *texture, + unsigned *stride, unsigned *handle) { struct svga_winsys_surface *surface = - svga_screen_buffer_get_winsys_surface(buffer); + svga_screen_texture_get_winsys_surface(texture); struct vmw_svga_winsys_surface *vsrf; if (!surface) @@ -326,25 +325,13 @@ vmw_drm_handle_from_buffer(struct drm_api *drm_api, vsrf = vmw_svga_winsys_surface(surface); *handle = vsrf->sid; + *stride = pf_get_nblocksx(&texture->block, texture->width[0]) * + texture->block.size; + vmw_svga_winsys_surface_reference(&vsrf, NULL); return TRUE; } -static boolean -vmw_drm_handle_from_texture(struct drm_api *drm_api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) -{ - struct pipe_buffer *buffer; - - if (!svga_screen_buffer_from_texture(texture, &buffer, stride)) - return FALSE; - - return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle); -} - static struct pipe_context* vmw_drm_create_context(struct drm_api *drm_api, struct pipe_screen *screen) @@ -359,6 +346,7 @@ static struct dri1_api dri1_api_hooks = { static struct drm_api vmw_drm_api_hooks = { .name = "vmwgfx", + .driver_name = "vmwgfx", .create_screen = vmw_drm_create_screen, .create_context = vmw_drm_create_context, .texture_from_shared_handle = vmw_drm_texture_from_handle, diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c index b1c24b0..b9823d7 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c @@ -53,14 +53,32 @@ vmw_pools_init(struct vmw_winsys_screen *vws) goto error; vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr, - 16*1024*1024, + VMW_GMR_POOL_SIZE, 12 /* 4096 alignment */); if(!vws->pools.gmr_mm) goto error; + /* + * GMR buffers are typically shortlived, but it's possible that at a given + * instance a buffer is mapped. So to avoid stalling we tell pipebuffer to + * forbid creation of buffers beyond half the GMR pool size, + * + * XXX: It is unclear weather we want to limit the total amount of temporary + * malloc memory used to backup unvalidated GMR buffers. On one hand it is + * preferrable to fail an allocation than exhausting the guest memory with + * temporary data, but on the other hand it is possible that a stupid + * application creates large vertex buffers and does not use them for a long + * time -- since the svga pipe driver only emits the DMA uploads when a + * buffer is used for drawing this would effectively disabling swapping GMR + * buffers to memory. So far, the preemptively flush already seems to keep + * total allocated memory within relatively small numbers, so we don't + * limit. + */ vws->pools.gmr_fenced = fenced_bufmgr_create( vws->pools.gmr_mm, - vmw_fence_ops_create(vws)); + vmw_fence_ops_create(vws), + VMW_GMR_POOL_SIZE/2, + ~0); #ifdef DEBUG vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced, diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index f7c0099..49dbf44 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -204,6 +204,14 @@ xm_buffer_destroy(struct pipe_buffer *buf) { struct xm_buffer *oldBuf = xm_buffer(buf); + /* + * Note oldBuf->data may point to one of three things: + * 1. XShm shared memory image data + * 2. User-provided (wrapped) memory, see xm_user_buffer_create() + * 3. Regular, malloc'd memory + * We need to be careful with freeing that data now. + */ + if (oldBuf->data) { #ifdef USE_XSHM if (oldBuf->shminfo.shmid >= 0) { @@ -213,12 +221,19 @@ xm_buffer_destroy(struct pipe_buffer *buf) oldBuf->shminfo.shmid = -1; oldBuf->shminfo.shmaddr = (char *) -1; } - else -#endif - { - if (!oldBuf->userBuffer) { - align_free(oldBuf->data); + + if (oldBuf->tempImage) { + if (oldBuf->data == oldBuf->tempImage->data) { + /* oldBuf->data points at the xshm memory which we'll now free */ + oldBuf->data = NULL; } + XDestroyImage(oldBuf->tempImage); + } +#endif + + if (oldBuf->data && !oldBuf->userBuffer) { + /* this was regular malloc'd memory */ + align_free(oldBuf->data); } oldBuf->data = NULL; |