diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_compute.c | 145 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.c | 34 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.h | 77 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_program.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 37 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_state.c | 58 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 137 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_surface.c | 40 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_tex.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_winsys.h | 4 |
12 files changed, 283 insertions, 276 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 04488d6..d781f6f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -67,122 +67,94 @@ nv50_screen_compute_setup(struct nv50_screen *screen, if (ret) return ret; - BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->handle); - BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + BEGIN_NV04(push, NV50_CP(UNK02A0), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + BEGIN_NV04(push, NV50_CP(DMA_STACK), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); - BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1); PUSH_DATA (push, 4); - BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + BEGIN_NV04(push, NV50_CP(UNK0290), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + BEGIN_NV04(push, NV50_CP(REG_MODE), 1); PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); - BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + BEGIN_NV04(push, NV50_CP(UNK0384), 1); PUSH_DATA (push, 0x100); - BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1); PUSH_DATA (push, fifo->vram); for (i = 0; i < 15; i++) { - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); } - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1); PUSH_DATA (push, ~0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); - BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1); PUSH_DATA (push, 0x54); - BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + BEGIN_NV04(push, NV50_CP(DMA_TIC), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + BEGIN_NV04(push, NV50_CP(DMA_TSC), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls_bo->offset + 65536); PUSH_DATA (push, screen->tls_bo->offset + 65536); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); return 0; } -static bool -nv50_compute_validate_program(struct nv50_context *nv50) -{ - struct nv50_program *prog = nv50->compprog; - - if (prog->mem) - return true; - - if (!prog->translated) { - prog->translated = nv50_program_translate( - prog, nv50->screen->base.device->chipset, &nv50->base.debug); - if (!prog->translated) - return false; - } - if (unlikely(!prog->code_size)) - return false; - - if (likely(prog->code_size)) { - if (nv50_program_upload_code(nv50, prog)) { - struct nouveau_pushbuf *push = nv50->base.pushbuf; - BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); - PUSH_DATA (push, 0); - return true; - } - } - return false; -} - static void nv50_compute_validate_globals(struct nv50_context *nv50) { @@ -198,26 +170,25 @@ nv50_compute_validate_globals(struct nv50_context *nv50) } } +static struct nv50_state_validate +validate_list_cp[] = { + { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, + { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, +}; + static bool -nv50_compute_state_validate(struct nv50_context *nv50) +nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask) { - if (!nv50_compute_validate_program(nv50)) - return false; - - if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) - nv50_compute_validate_globals(nv50); + bool ret; /* TODO: validate textures, samplers, surfaces */ + ret = nv50_state_validate(nv50, mask, validate_list_cp, + ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp, + nv50->bufctx_cp); - nv50_bufctx_fence(nv50->bufctx_cp, false); - - nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); - if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) - return false; if (unlikely(nv50->state.flushed)) nv50_bufctx_fence(nv50->bufctx_cp, true); - - return true; + return ret; } static void @@ -227,7 +198,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) struct nouveau_pushbuf *push = screen->base.pushbuf; unsigned size = align(nv50->compprog->parm_size, 0x4); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); PUSH_DATA (push, (size / 4) << 8); if (size) { @@ -245,7 +216,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) nouveau_pushbuf_bufctx(push, nv50->bufctx); nouveau_pushbuf_validate(push); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4); nouveau_pushbuf_data(push, bo, offset, size); nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); @@ -278,7 +249,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) struct nv50_program *cp = nv50->compprog; bool ret; - ret = !nv50_compute_state_validate(nv50); + ret = !nv50_state_validate_cp(nv50, ~0); if (ret) { NOUVEAU_ERR("Failed to launch grid !\n"); return; @@ -286,33 +257,33 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) nv50_compute_upload_input(nv50, info->input); - BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); - BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); - BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1); PUSH_DATA (push, cp->max_gpr); /* grid/block setup */ - BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2); PUSH_DATA (push, info->block[1] << 16 | info->block[0]); PUSH_DATA (push, info->block[2]); - BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1); PUSH_DATA (push, 1 << 16 | block_size); - BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + BEGIN_NV04(push, NV50_CP(GRIDDIM), 1); PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]); - BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + BEGIN_NV04(push, NV50_CP(GRIDID), 1); PUSH_DATA (push, 1); /* kernel launching */ - BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + BEGIN_NV04(push, NV50_CP(LAUNCH), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); /* bind a compute shader clobbers fragment shader state */ - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 4874b77..61a52c4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -176,8 +176,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { if (nv50->framebuffer.cbufs[i] && nv50->framebuffer.cbufs[i]->texture == res) { - nv50->dirty |= NV50_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); if (!--ref) return ref; } @@ -186,8 +186,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nv50->framebuffer.zsbuf && nv50->framebuffer.zsbuf->texture == res) { - nv50->dirty |= NV50_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); if (!--ref) return ref; } @@ -202,8 +202,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); for (i = 0; i < nv50->num_vtxbufs; ++i) { if (nv50->vtxbuf[i].buffer == res) { - nv50->dirty |= NV50_NEW_ARRAYS; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX); if (!--ref) return ref; } @@ -211,8 +211,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, if (nv50->idxbuf.buffer == res) { /* Just rebind to the bufctx as there is no separate dirty bit */ - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); - BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX); + BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD); if (!--ref) return ref; } @@ -222,8 +222,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nv50->num_textures[s]; ++i) { if (nv50->textures[s][i] && nv50->textures[s][i]->texture == res) { - nv50->dirty |= NV50_NEW_TEXTURES; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); if (!--ref) return ref; } @@ -236,9 +236,9 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, continue; if (!nv50->constbuf[s][i].user && nv50->constbuf[s][i].u.buf == res) { - nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; nv50->constbuf_dirty[s] |= 1 << i; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); if (!--ref) return ref; } @@ -345,10 +345,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo); if (screen->compute) { BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); @@ -357,7 +357,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); if (screen->compute) BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2620d03..2317fa2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -26,43 +26,43 @@ #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_2d.xml.h" -#define NV50_NEW_BLEND (1 << 0) -#define NV50_NEW_RASTERIZER (1 << 1) -#define NV50_NEW_ZSA (1 << 2) -#define NV50_NEW_VERTPROG (1 << 3) -#define NV50_NEW_GMTYPROG (1 << 6) -#define NV50_NEW_FRAGPROG (1 << 7) -#define NV50_NEW_BLEND_COLOUR (1 << 8) -#define NV50_NEW_STENCIL_REF (1 << 9) -#define NV50_NEW_CLIP (1 << 10) -#define NV50_NEW_SAMPLE_MASK (1 << 11) -#define NV50_NEW_FRAMEBUFFER (1 << 12) -#define NV50_NEW_STIPPLE (1 << 13) -#define NV50_NEW_SCISSOR (1 << 14) -#define NV50_NEW_VIEWPORT (1 << 15) -#define NV50_NEW_ARRAYS (1 << 16) -#define NV50_NEW_VERTEX (1 << 17) -#define NV50_NEW_CONSTBUF (1 << 18) -#define NV50_NEW_TEXTURES (1 << 19) -#define NV50_NEW_SAMPLERS (1 << 20) -#define NV50_NEW_STRMOUT (1 << 21) -#define NV50_NEW_MIN_SAMPLES (1 << 22) -#define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_3D_BLEND (1 << 0) +#define NV50_NEW_3D_RASTERIZER (1 << 1) +#define NV50_NEW_3D_ZSA (1 << 2) +#define NV50_NEW_3D_VERTPROG (1 << 3) +#define NV50_NEW_3D_GMTYPROG (1 << 6) +#define NV50_NEW_3D_FRAGPROG (1 << 7) +#define NV50_NEW_3D_BLEND_COLOUR (1 << 8) +#define NV50_NEW_3D_STENCIL_REF (1 << 9) +#define NV50_NEW_3D_CLIP (1 << 10) +#define NV50_NEW_3D_SAMPLE_MASK (1 << 11) +#define NV50_NEW_3D_FRAMEBUFFER (1 << 12) +#define NV50_NEW_3D_STIPPLE (1 << 13) +#define NV50_NEW_3D_SCISSOR (1 << 14) +#define NV50_NEW_3D_VIEWPORT (1 << 15) +#define NV50_NEW_3D_ARRAYS (1 << 16) +#define NV50_NEW_3D_VERTEX (1 << 17) +#define NV50_NEW_3D_CONSTBUF (1 << 18) +#define NV50_NEW_3D_TEXTURES (1 << 19) +#define NV50_NEW_3D_SAMPLERS (1 << 20) +#define NV50_NEW_3D_STRMOUT (1 << 21) +#define NV50_NEW_3D_MIN_SAMPLES (1 << 22) +#define NV50_NEW_3D_CONTEXT (1 << 31) #define NV50_NEW_CP_PROGRAM (1 << 0) #define NV50_NEW_CP_GLOBALS (1 << 1) /* 3d bufctx (during draw_vbo, blit_3d) */ -#define NV50_BIND_FB 0 -#define NV50_BIND_VERTEX 1 -#define NV50_BIND_VERTEX_TMP 2 -#define NV50_BIND_INDEX 3 -#define NV50_BIND_TEXTURES 4 -#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) -#define NV50_BIND_SO 53 -#define NV50_BIND_SCREEN 54 -#define NV50_BIND_TLS 55 -#define NV50_BIND_3D_COUNT 56 +#define NV50_BIND_3D_FB 0 +#define NV50_BIND_3D_VERTEX 1 +#define NV50_BIND_3D_VERTEX_TMP 2 +#define NV50_BIND_3D_INDEX 3 +#define NV50_BIND_3D_TEXTURES 4 +#define NV50_BIND_3D_CB(s, i) (5 + 16 * (s) + (i)) +#define NV50_BIND_3D_SO 53 +#define NV50_BIND_3D_SCREEN 54 +#define NV50_BIND_3D_TLS 55 +#define NV50_BIND_3D_COUNT 56 /* compute bufctx (during launch_grid) */ #define NV50_BIND_CP_GLOBAL 0 @@ -115,7 +115,7 @@ struct nv50_context { struct nouveau_bufctx *bufctx; struct nouveau_bufctx *bufctx_cp; - uint32_t dirty; + uint32_t dirty_3d; /* dirty flags for 3d state */ uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; @@ -221,6 +221,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); void nv50_fragprog_validate(struct nv50_context *); +void nv50_compprog_validate(struct nv50_context *); void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); @@ -231,7 +232,15 @@ void nv50_stream_output_validate(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -bool nv50_state_validate(struct nv50_context *, uint32_t state_mask); +struct nv50_state_validate { + void (*func)(struct nv50_context *); + uint32_t states; +}; + +bool nv50_state_validate(struct nv50_context *, uint32_t, + struct nv50_state_validate *, int, uint32_t *, + struct nouveau_bufctx *); +bool nv50_state_validate_3d(struct nv50_context *, uint32_t); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index a67ef28..3444b31 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -335,7 +335,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; - info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index be19c0f..0a73090 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -202,10 +202,10 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) func = nv50_hw_sm_get_func(c); /* configure and reset the counter(s) */ - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1); PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) | cfg->ctr[i].unit | cfg->ctr[i].mode); - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -240,7 +240,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) PUSH_SPACE(push, 8); for (c = 0; c < 4; c++) { if (screen->pm.mp_counter[c]) { - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1); PUSH_DATA (push, 0); } } @@ -257,7 +257,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) hq->bo); PUSH_SPACE(push, 2); - BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); pipe->bind_compute_state(pipe, screen->pm.prog); @@ -295,7 +295,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) mask |= 1 << hsq->ctr[i]; func = nv50_hw_sm_get_func(hsq->ctr[i]); - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(hsq->ctr[i])), 1); PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) | cfg->ctr[i].unit | cfg->ctr[i].mode); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 8e4b2b4..3d2ebfb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -29,6 +29,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_compute.xml.h" + void nv50_constbufs_validate(struct nv50_context *nv50) { @@ -94,7 +96,7 @@ nv50_constbufs_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); - BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { @@ -131,14 +133,14 @@ nv50_program_update_context_state(struct nv50_context *nv50, if (prog && prog->tls_space) { if (nv50->state.new_tls_space) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS); if (!nv50->state.tls_required || nv50->state.new_tls_space) - BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo); nv50->state.new_tls_space = false; nv50->state.tls_required |= 1 << stage; } else { if (nv50->state.tls_required == (1 << stage)) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS); nv50->state.tls_required &= ~(1 << stage); } } @@ -181,7 +183,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) fp->fp.force_persample_interp = rast->force_persample_interp; } - if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES))) + if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES))) return; if (!nv50_program_validate(nv50, fp)) @@ -238,6 +240,19 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) /* GP_ENABLE is updated in linkage validation */ } +void +nv50_compprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *cp = nv50->compprog; + + if (cp && !nv50_program_validate(nv50, cp)) + return; + + BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); +} + static void nv50_sprite_coords_validate(struct nv50_context *nv50) { @@ -309,7 +324,7 @@ nv50_validate_derived_rs(struct nv50_context *nv50) PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); } - if (nv50->dirty & NV50_NEW_FRAGPROG) + if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG) return; psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN; @@ -378,9 +393,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) uint8_t map[64]; uint8_t so_map[64]; - if (!(nv50->dirty & (NV50_NEW_VERTPROG | - NV50_NEW_FRAGPROG | - NV50_NEW_GMTYPROG))) { + if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG | + NV50_NEW_3D_FRAGPROG | + NV50_NEW_3D_GMTYPROG))) { uint8_t bfc, ffc; ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK); bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK) @@ -633,8 +648,6 @@ nv50_stream_output_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); PUSH_DATA (push, ctrl); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); - for (i = 0; i < nv50->num_so_targets; ++i) { struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); @@ -664,7 +677,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) prims = MIN2(prims, limit); } targ->stride = so->stride[i]; - BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR); } if (prims != ~0) { BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 8504ba4..86e74d6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -200,7 +200,7 @@ nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->blend = hwcso; - nv50->dirty |= NV50_NEW_BLEND; + nv50->dirty_3d |= NV50_NEW_3D_BLEND; } static void @@ -337,7 +337,7 @@ nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->rast = hwcso; - nv50->dirty |= NV50_NEW_RASTERIZER; + nv50->dirty_3d |= NV50_NEW_3D_RASTERIZER; } static void @@ -426,7 +426,7 @@ nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->zsa = hwcso; - nv50->dirty |= NV50_NEW_ZSA; + nv50->dirty_3d |= NV50_NEW_3D_ZSA; } static void @@ -605,7 +605,7 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, nv50->num_samplers[s] = nr; - nv50->dirty |= NV50_NEW_SAMPLERS; + nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; } static void @@ -698,9 +698,9 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, nv50->num_textures[s] = nr; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty |= NV50_NEW_TEXTURES; + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; } static void @@ -776,7 +776,7 @@ nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->vertprog = hwcso; - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty_3d |= NV50_NEW_3D_VERTPROG; } static void * @@ -792,7 +792,7 @@ nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->fragprog = hwcso; - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; } static void * @@ -808,7 +808,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->gmtyprog = hwcso; - nv50->dirty |= NV50_NEW_GMTYPROG; + nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG; } static void * @@ -857,7 +857,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].u.buf = NULL; else if (nv50->constbuf[s][i].u.buf) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res); @@ -882,7 +882,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, } nv50->constbuf_dirty[s] |= 1 << i; - nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; } /* ============================================================================= @@ -895,7 +895,7 @@ nv50_set_blend_color(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->blend_colour = *bcol; - nv50->dirty |= NV50_NEW_BLEND_COLOUR; + nv50->dirty_3d |= NV50_NEW_3D_BLEND_COLOUR; } static void @@ -905,7 +905,7 @@ nv50_set_stencil_ref(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->stencil_ref = *sr; - nv50->dirty |= NV50_NEW_STENCIL_REF; + nv50->dirty_3d |= NV50_NEW_3D_STENCIL_REF; } static void @@ -916,7 +916,7 @@ nv50_set_clip_state(struct pipe_context *pipe, memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp)); - nv50->dirty |= NV50_NEW_CLIP; + nv50->dirty_3d |= NV50_NEW_3D_CLIP; } static void @@ -925,7 +925,7 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) struct nv50_context *nv50 = nv50_context(pipe); nv50->sample_mask = sample_mask; - nv50->dirty |= NV50_NEW_SAMPLE_MASK; + nv50->dirty_3d |= NV50_NEW_3D_SAMPLE_MASK; } static void @@ -935,7 +935,7 @@ nv50_set_min_samples(struct pipe_context *pipe, unsigned min_samples) if (nv50->min_samples != min_samples) { nv50->min_samples = min_samples; - nv50->dirty |= NV50_NEW_MIN_SAMPLES; + nv50->dirty_3d |= NV50_NEW_3D_MIN_SAMPLES; } } @@ -945,11 +945,11 @@ nv50_set_framebuffer_state(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); util_copy_framebuffer_state(&nv50->framebuffer, fb); - nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; } static void @@ -959,7 +959,7 @@ nv50_set_polygon_stipple(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->stipple = *stipple; - nv50->dirty |= NV50_NEW_STIPPLE; + nv50->dirty_3d |= NV50_NEW_3D_STIPPLE; } static void @@ -977,7 +977,7 @@ nv50_set_scissor_states(struct pipe_context *pipe, continue; nv50->scissors[start_slot + i] = scissor[i]; nv50->scissors_dirty |= 1 << (start_slot + i); - nv50->dirty |= NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_SCISSOR; } } @@ -996,7 +996,7 @@ nv50_set_viewport_states(struct pipe_context *pipe, continue; nv50->viewports[start_slot + i] = vpt[i]; nv50->viewports_dirty |= 1 << (start_slot + i); - nv50->dirty |= NV50_NEW_VIEWPORT; + nv50->dirty_3d |= NV50_NEW_3D_VIEWPORT; } } @@ -1008,8 +1008,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); unsigned i; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); - nv50->dirty |= NV50_NEW_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX); + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb, start_slot, count); @@ -1051,14 +1051,14 @@ nv50_set_index_buffer(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); if (nv50->idxbuf.buffer) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX); if (ib) { pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); nv50->idxbuf.index_size = ib->index_size; if (ib->buffer) { nv50->idxbuf.offset = ib->offset; - BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD); + BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(ib->buffer), RD); } else { nv50->idxbuf.user_buffer = ib->user_buffer; } @@ -1073,7 +1073,7 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->vertex = hwcso; - nv50->dirty |= NV50_NEW_VERTEX; + nv50->dirty_3d |= NV50_NEW_3D_VERTEX; } static struct pipe_stream_output_target * @@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, } nv50->num_so_targets = num_targets; - if (nv50->so_targets_dirty) - nv50->dirty |= NV50_NEW_STRMOUT; + if (nv50->so_targets_dirty) { + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_SO); + nv50->dirty_3d |= NV50_NEW_3D_STRMOUT; + } } static void diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 5536978..5120493 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -25,7 +25,7 @@ nv50_validate_fb(struct nv50_context *nv50) unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; uint32_t array_size = 0xffff, array_mode = 0; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); @@ -90,7 +90,7 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; /* only register for writing, otherwise we'd always serialize here */ - BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR); } if (fb->zsbuf) { @@ -118,7 +118,7 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR); } else { BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -187,8 +187,8 @@ nv50_validate_scissor(struct nv50_context *nv50) #ifdef NV50_SCISSORS_CLIPPING int minx, maxx, miny, maxy, i; - if (!(nv50->dirty & - (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) && + if (!(nv50->dirty_3d & + (NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | NV50_NEW_3D_FRAMEBUFFER)) && nv50->state.scissor == nv50->rast->pipe.scissor) return; @@ -197,7 +197,7 @@ nv50_validate_scissor(struct nv50_context *nv50) nv50->state.scissor = nv50->rast->pipe.scissor; - if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor) + if ((nv50->dirty_3d & NV50_NEW_3D_FRAMEBUFFER) && !nv50->state.scissor) nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1; for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { @@ -290,10 +290,10 @@ nv50_check_program_ucps(struct nv50_context *nv50, vp->vp.clpd_nr = n; if (likely(vp == nv50->vertprog)) { - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty_3d |= NV50_NEW_3D_VERTPROG; nv50_vertprog_validate(nv50); } else { - nv50->dirty |= NV50_NEW_GMTYPROG; + nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG; nv50_gmtyprog_validate(nv50); } nv50_fp_linkage_validate(nv50); @@ -342,7 +342,7 @@ nv50_validate_clip(struct nv50_context *nv50) struct nv50_program *vp; uint8_t clip_enable; - if (nv50->dirty & NV50_NEW_CLIP) { + if (nv50->dirty_3d & NV50_NEW_3D_CLIP) { BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4); @@ -436,7 +436,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) else ctx_to->state = ctx_to->screen->save_state; - ctx_to->dirty = ~0; + ctx_to->dirty_3d = ~0; + ctx_to->dirty_cp = ~0; ctx_to->viewports_dirty = ~0; ctx_to->scissors_dirty = ~0; @@ -445,71 +446,71 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1; if (!ctx_to->vertex) - ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); + ctx_to->dirty_3d &= ~(NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS); if (!ctx_to->vertprog) - ctx_to->dirty &= ~NV50_NEW_VERTPROG; + ctx_to->dirty_3d &= ~NV50_NEW_3D_VERTPROG; if (!ctx_to->fragprog) - ctx_to->dirty &= ~NV50_NEW_FRAGPROG; + ctx_to->dirty_3d &= ~NV50_NEW_3D_FRAGPROG; if (!ctx_to->blend) - ctx_to->dirty &= ~NV50_NEW_BLEND; + ctx_to->dirty_3d &= ~NV50_NEW_3D_BLEND; if (!ctx_to->rast) #ifdef NV50_SCISSORS_CLIPPING - ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR); + ctx_to->dirty_3d &= ~(NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_SCISSOR); #else - ctx_to->dirty &= ~NV50_NEW_RASTERIZER; + ctx_to->dirty_3d &= ~NV50_NEW_3D_RASTERIZER; #endif if (!ctx_to->zsa) - ctx_to->dirty &= ~NV50_NEW_ZSA; + ctx_to->dirty_3d &= ~NV50_NEW_3D_ZSA; ctx_to->screen->cur_ctx = ctx_to; } -static struct state_validate { - void (*func)(struct nv50_context *); - uint32_t states; -} validate_list[] = { - { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, - { nv50_validate_blend, NV50_NEW_BLEND }, - { nv50_validate_zsa, NV50_NEW_ZSA }, - { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK }, - { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, - { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, - { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, - { nv50_validate_stipple, NV50_NEW_STIPPLE }, +static struct nv50_state_validate +validate_list_3d[] = { + { nv50_validate_fb, NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_blend, NV50_NEW_3D_BLEND }, + { nv50_validate_zsa, NV50_NEW_3D_ZSA }, + { nv50_validate_sample_mask, NV50_NEW_3D_SAMPLE_MASK }, + { nv50_validate_rasterizer, NV50_NEW_3D_RASTERIZER }, + { nv50_validate_blend_colour, NV50_NEW_3D_BLEND_COLOUR }, + { nv50_validate_stencil_ref, NV50_NEW_3D_STENCIL_REF }, + { nv50_validate_stipple, NV50_NEW_3D_STIPPLE }, #ifdef NV50_SCISSORS_CLIPPING - { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | - NV50_NEW_RASTERIZER | - NV50_NEW_FRAMEBUFFER }, + { nv50_validate_scissor, NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | + NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_FRAMEBUFFER }, #else - { nv50_validate_scissor, NV50_NEW_SCISSOR }, + { nv50_validate_scissor, NV50_NEW_3D_SCISSOR }, #endif - { nv50_validate_viewport, NV50_NEW_VIEWPORT }, - { nv50_vertprog_validate, NV50_NEW_VERTPROG }, - { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, - { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | - NV50_NEW_MIN_SAMPLES }, - { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, - { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, - { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER }, - { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER }, - { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, - { nv50_validate_textures, NV50_NEW_TEXTURES }, - { nv50_validate_samplers, NV50_NEW_SAMPLERS }, - { nv50_stream_output_validate, NV50_NEW_STRMOUT | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, - { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, + { nv50_validate_viewport, NV50_NEW_3D_VIEWPORT }, + { nv50_vertprog_validate, NV50_NEW_3D_VERTPROG }, + { nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG }, + { nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_MIN_SAMPLES }, + { nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG | + NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER }, + { nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG }, + { nv50_validate_derived_rs, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_validate_derived_2, NV50_NEW_3D_ZSA | NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_derived_3, NV50_NEW_3D_BLEND | NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_clip, NV50_NEW_3D_CLIP | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_constbufs_validate, NV50_NEW_3D_CONSTBUF }, + { nv50_validate_textures, NV50_NEW_3D_TEXTURES }, + { nv50_validate_samplers, NV50_NEW_3D_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_3D_STRMOUT | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS }, + { nv50_validate_min_samples, NV50_NEW_3D_MIN_SAMPLES }, }; bool -nv50_state_validate(struct nv50_context *nv50, uint32_t mask) +nv50_state_validate(struct nv50_context *nv50, uint32_t mask, + struct nv50_state_validate *validate_list, int size, + uint32_t *dirty, struct nouveau_bufctx *bufctx) { uint32_t state_mask; int ret; @@ -518,16 +519,16 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask) if (nv50->screen->cur_ctx != nv50) nv50_switch_pipe_context(nv50); - state_mask = nv50->dirty & mask; + state_mask = *dirty & mask; if (state_mask) { - for (i = 0; i < ARRAY_SIZE(validate_list); ++i) { - struct state_validate *validate = &validate_list[i]; + for (i = 0; i < size; i++) { + struct nv50_state_validate *validate = &validate_list[i]; if (state_mask & validate->states) validate->func(nv50); } - nv50->dirty &= ~state_mask; + *dirty &= ~state_mask; if (nv50->state.rt_serialize) { nv50->state.rt_serialize = false; @@ -535,14 +536,26 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask) PUSH_DATA (nv50->base.pushbuf, 0); } - nv50_bufctx_fence(nv50->bufctx_3d, false); + nv50_bufctx_fence(bufctx, false); } - nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, bufctx); ret = nouveau_pushbuf_validate(nv50->base.pushbuf); + return !ret; +} + +bool +nv50_state_validate_3d(struct nv50_context *nv50, uint32_t mask) +{ + bool ret; + + ret = nv50_state_validate(nv50, mask, validate_list_3d, + ARRAY_SIZE(validate_list_3d), &nv50->dirty_3d, + nv50->bufctx_3d); + if (unlikely(nv50->state.flushed)) { nv50->state.flushed = false; nv50_bufctx_fence(nv50->bufctx_3d, true); } - return !ret; + return ret; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 84646f6..68b0e18 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -353,7 +353,7 @@ nv50_clear_render_target(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, nv50->cond_condmode); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } static void @@ -436,7 +436,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, nv50->cond_condmode); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } void @@ -525,7 +525,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, uint32_t mode = 0; /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ - if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER)) + if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER)) return; /* We have to clear ALL of the layers, not up to the min number of layers @@ -798,7 +798,7 @@ nv50_clear_buffer(struct pipe_context *pipe, data, data_size); } - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } /* =============================== BLIT CODE =================================== @@ -834,7 +834,7 @@ struct nv50_blitctx struct pipe_sampler_view *texture[2]; struct nv50_tsc_entry *sampler[2]; unsigned min_samples; - uint32_t dirty; + uint32_t dirty_3d; } saved; struct nv50_rasterizer_stateobj rast; }; @@ -1253,15 +1253,15 @@ nv50_blitctx_pre_blit(struct nv50_blitctx *ctx) nv50->min_samples = 1; - ctx->saved.dirty = nv50->dirty; + ctx->saved.dirty_3d = nv50->dirty_3d; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty = - NV50_NEW_FRAMEBUFFER | NV50_NEW_MIN_SAMPLES | - NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | - NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; + nv50->dirty_3d = + NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_MIN_SAMPLES | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_GMTYPROG | + NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS; } static void @@ -1302,14 +1302,14 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit) nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query, nv50->cond_cond, nv50->cond_mode); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty = blit->saved.dirty | - (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | - NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | - NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); + nv50->dirty_3d = blit->saved.dirty_3d | + (NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR | NV50_NEW_3D_SAMPLE_MASK | + NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_ZSA | NV50_NEW_3D_BLEND | + NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_FRAGPROG); nv50->scissors_dirty |= 1; nv50->base.pipe.set_min_samples(&nv50->base.pipe, blit->saved.min_samples); @@ -1344,7 +1344,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) nv50_blitctx_prepare_state(blit); - nv50_state_validate(nv50, ~0); + nv50_state_validate_3d(nv50, ~0); x_range = (float)info->src.box.width / (float)info->dst.box.width; y_range = (float)info->src.box.height / (float)info->dst.box.height; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index 4b69c3b..414d326 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -299,7 +299,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD); BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 6f60445..a11cdf8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -230,7 +230,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50, addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, &bo); if (addrs[b]) - BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART | + BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); } nv50->base.vbo_dirty = true; @@ -269,7 +269,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50) address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, &bo); if (address[b]) - BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo); } BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); @@ -286,7 +286,7 @@ static inline void nv50_release_user_vbufs(struct nv50_context *nv50) { if (nv50->vbo_user) { - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP); nouveau_scratch_done(&nv50->base); } } @@ -394,7 +394,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) struct nv04_resource *buf = nv04_resource(vb->buffer); if (!(refd & (1 << b))) { refd |= 1 << b; - BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD); } address = buf->address + vb->buffer_offset + ve->pipe.src_offset; limit = buf->address + buf->base.width0 - 1; @@ -779,9 +779,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->vbo_push_hint = /* the 64 is heuristic */ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); - if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) { + if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) { if (!!nv50->vbo_fifo != nv50->vbo_push_hint) - nv50->dirty |= NV50_NEW_ARRAYS; + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; else if (!nv50->vbo_fifo) nv50_update_user_vbufs(nv50); @@ -790,7 +790,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; - nv50_state_validate(nv50, ~0); + nv50_state_validate_3d(nv50, ~0); push->kick_notify = nv50_draw_vbo_kick_notify; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index 6800230..7056258 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -58,8 +58,8 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_M2MF(m) 5, (m) #define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n) -#define SUBC_COMPUTE(m) 6, (m) -#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) +#define SUBC_CP(m) 6, (m) +#define NV50_CP(n) SUBC_CP(NV50_COMPUTE_##n) static inline uint32_t |