diff options
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_hw_context.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 107 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 44 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 91 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 63 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 8 |
13 files changed, 225 insertions, 155 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 6f2b7ba..5743e3f 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch( const uint *block_layout, const uint *grid_layout) { int i; - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; unsigned num_waves; unsigned num_pipes = rctx->screen->b.info.r600_max_pipes; @@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch( static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; unsigned i; /* make sure that the gfx ring is only one active */ - if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) { - ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) { + ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } /* Initialize all the compute-related registers. @@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */ for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; - unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx, + unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); @@ -538,7 +538,7 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint64_t va; struct r600_resource *code_bo; unsigned ngpr, nstack; @@ -564,7 +564,7 @@ void evergreen_emit_cs_shader( radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, code_bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER)); } diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 89abe92..a0f4680 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; + struct radeon_winsys_cs *cs = rctx->b.dma.cs; unsigned i, ncopy, csize, sub_cmd, shift; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; @@ -64,9 +64,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, for (i = 0; i < ncopy; i++) { csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; /* emit reloc before writing cs so that cs is always in consistent state */ - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize); cs->buf[cs->cdw++] = dst_offset & 0xffffffff; @@ -86,7 +86,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t offset, unsigned size, uint32_t clear_value) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); @@ -129,7 +129,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, } /* This must be done after r600_need_cs_space. */ - reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c6702a9..684eee7 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -666,6 +666,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, enum pipe_format pipe_format = state->format; struct radeon_surf_level *surflevel; unsigned base_level, first_level, last_level; + unsigned dim, last_layer; uint64_t va; if (view == NULL) @@ -679,7 +680,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->base.reference.count = 1; view->base.context = ctx; - if (texture->target == PIPE_BUFFER) + if (state->target == PIPE_BUFFER) return texture_buffer_sampler_view(rctx, view, width0, height0); swizzle[0] = state->swizzle_r; @@ -773,12 +774,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, } nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks); - if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + if (state->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; depth = texture->array_size; - } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { + } else if (state->target == PIPE_TEXTURE_2D_ARRAY) { depth = texture->array_size; - } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) + } else if (state->target == PIPE_TEXTURE_CUBE_ARRAY) depth = texture->array_size / 6; va = tmp->resource.gpu_address; @@ -790,7 +791,13 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->is_stencil_sampler = true; view->tex_resource = &tmp->resource; - view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) | + + /* array type views and views into array types need to use layer offset */ + dim = state->target; + if (state->target != PIPE_TEXTURE_CUBE) + dim = MAX2(state->target, texture->target); + + view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(dim, texture->nr_samples)) | S_030000_PITCH((pitch / 8) - 1) | S_030000_TEX_WIDTH(width - 1)); if (rscreen->b.chip_class == CAYMAN) @@ -818,10 +825,14 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8; } + last_layer = state->u.tex.last_layer; + if (state->target != texture->target && depth == 1) { + last_layer = state->u.tex.first_layer; + } view->tex_resource_words[4] = (word4 | S_030010_ENDIAN_SWAP(endian)); view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) | - S_030014_LAST_ARRAY(state->u.tex.last_layer); + S_030014_LAST_ARRAY(last_layer); view->tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split); if (texture->nr_samples > 1) { @@ -860,7 +871,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx, static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); @@ -910,7 +921,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx, static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_scissor_state *rstate = &rctx->scissor; struct pipe_scissor_state *state; uint32_t dirty_mask; @@ -1514,7 +1525,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx, static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned max_dist = 0; switch (nr_samples) { @@ -1555,7 +1566,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; unsigned i, tl, br; @@ -1580,7 +1591,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r tex = (struct r600_texture *)cb->base.texture; reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, tex->surface.nsamples > 1 ? @@ -1588,7 +1599,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r RADEON_PRIO_COLOR_BUFFER); if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { - cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, tex->cmask_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_CMASK); } else { @@ -1634,7 +1645,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (!rctx->keep_tiling_flags) { unsigned reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, (struct r600_resource*)state->cbufs[0]->texture, RADEON_USAGE_READWRITE, RADEON_PRIO_COLOR_BUFFER); @@ -1657,7 +1668,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (state->zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, (struct r600_resource*)state->zsbuf->texture, RADEON_USAGE_READWRITE, zb->base.texture->nr_samples > 1 ? @@ -1719,7 +1730,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -1746,7 +1757,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1; unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1; @@ -1761,7 +1772,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -1772,7 +1783,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, + reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; @@ -1784,7 +1795,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_count_control = 0; @@ -1851,7 +1862,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, unsigned resource_offset, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1886,7 +1897,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } state->dirty_mask = 0; @@ -1910,7 +1921,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_const_cache, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1934,7 +1945,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); @@ -1959,7 +1970,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); @@ -2007,7 +2018,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2022,7 +2033,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, radeon_emit(cs, (resource_id_base + resource_index) * 8); radeon_emit_array(cs, rview->tex_resource_words, 8); - reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, RADEON_USAGE_READ, r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); @@ -2066,7 +2077,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, unsigned border_index_reg, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -2119,14 +2130,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK, + radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) { struct r600_sample_mask *s = (struct r600_sample_mask*)a; - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint16_t mask = s->sample_mask; radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); @@ -2136,21 +2147,21 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS, (shader->buffer->gpu_address + shader->offset) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, RADEON_USAGE_READ, RADEON_PRIO_INTERNAL_SHADER)); } static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v = 0, v2 = 0, primid = 0; @@ -2189,7 +2200,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -2202,7 +2213,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, rbuffer->gpu_address >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, @@ -2212,7 +2223,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, rbuffer->gpu_address >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, @@ -2362,6 +2373,8 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); + r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); + r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); /* to avoid GPU doing any preloading of constant from random address */ @@ -2801,6 +2814,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); + r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); + r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); /* to avoid GPU doing any preloading of constant from random address */ @@ -2940,6 +2955,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export); db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); + switch (rshader->ps_conservative_z) { + default: /* fall through */ + case TGSI_FS_DEPTH_LAYOUT_ANY: + db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z); + break; + case TGSI_FS_DEPTH_LAYOUT_GREATER: + db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); + break; + case TGSI_FS_DEPTH_LAYOUT_LESS: + db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); + break; + } + exports_ps = 0; for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || @@ -3246,7 +3274,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; + struct radeon_winsys_cs *cs = rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -3334,9 +3362,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, } size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size); cs->buf[cs->cdw++] = base >> 8; @@ -3371,7 +3399,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.rings.dma.cs == NULL) { + if (rctx->b.dma.cs == NULL) { goto fallback; } @@ -3515,6 +3543,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0); r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); + r600_add_atom(rctx, &rctx->b.render_cond_atom, id++); r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 937ffcb..25237c6 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -815,6 +815,13 @@ #define V_02880C_EXPORT_DB_FOUR16 0x01 #define V_02880C_EXPORT_DB_TWO 0x02 #define S_02880C_ALPHA_TO_MASK_DISABLE(x) (((x) & 0x1) << 12) +#define S_02880C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 16) +#define G_02880C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 16) & 0x03) +#define C_02880C_CONSERVATIVE_Z_EXPORT 0xFFFCFFFF +#define V_02880C_EXPORT_ANY_Z 0 +#define V_02880C_EXPORT_LESS_THAN_Z 1 +#define V_02880C_EXPORT_GREATER_THAN_Z 2 +#define V_02880C_EXPORT_RESERVED 3 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0) @@ -1497,6 +1504,7 @@ #define S_028878_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28) #define G_028878_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1) #define C_028878_UNCACHED_FIRST_INST 0xEFFFFFFF +#define R_02887C_SQ_PGM_RESOURCES_2_GS 0x02887C #define R_028890_SQ_PGM_RESOURCES_ES 0x028890 #define S_028890_NUM_GPRS(x) (((x) & 0xFF) << 0) @@ -1511,6 +1519,7 @@ #define S_028890_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28) #define G_028890_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1) #define C_028890_UNCACHED_FIRST_INST 0xEFFFFFFF +#define R_028894_SQ_PGM_RESOURCES_2_ES 0x028894 #define R_028864_SQ_PGM_RESOURCES_2_VS 0x028864 #define S_028864_SINGLE_ROUND(x) (((x) & 0x3) << 0) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index aede840..8a90489 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -87,18 +87,16 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op (struct pipe_sampler_view**)rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } - if ((op & R600_DISABLE_RENDER_COND) && rctx->b.current_render_cond) { - util_blitter_save_render_condition(rctx->blitter, - rctx->b.current_render_cond, - rctx->b.current_render_cond_cond, - rctx->b.current_render_cond_mode); - } + if (op & R600_DISABLE_RENDER_COND) + rctx->b.render_cond_force_off = true; } static void r600_blitter_end(struct pipe_context *ctx) { struct r600_context *rctx = (struct r600_context *)ctx; - r600_resume_nontimer_queries(&rctx->b); + + rctx->b.render_cond_force_off = false; + r600_resume_nontimer_queries(&rctx->b); } static unsigned u_max_sample(struct pipe_resource *r) @@ -527,7 +525,7 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst * Can we somehow flush the index buffer cache? Starting a new IB seems * to do the trick. */ if (rctx->b.chip_class <= R700) - rctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } /** @@ -604,6 +602,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds } else { uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst), PIPE_TRANSFER_WRITE); + map += offset / 4; size /= 4; for (unsigned i = 0; i < size; i++) *map++ = value; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 6f11366..6409f0b 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -33,11 +33,16 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in) { + struct radeon_winsys_cs *dma = ctx->b.dma.cs; - if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) { + /* Flush the DMA IB if it's not empty. */ + if (dma && dma->cdw) + ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + + if (!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs, ctx->b.vram, ctx->b.gtt)) { ctx->b.gtt = 0; ctx->b.vram = 0; - ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); return; } /* all will be accounted once relocation are emited */ @@ -45,7 +50,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, ctx->b.vram = 0; /* The number of dwords we already used in the CS so far. */ - num_dw += ctx->b.rings.gfx.cs->cdw; + num_dw += ctx->b.gfx.cs->cdw; if (count_draw_in) { uint64_t mask; @@ -75,11 +80,6 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += ctx->b.streamout.num_dw_for_end; } - /* Count in render_condition(NULL) at the end of CS. */ - if (ctx->b.predicate_drawing) { - num_dw += 3; - } - /* SX_MISC */ if (ctx->b.chip_class == R600) { num_dw += 3; @@ -92,14 +92,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += 10; /* Flush if there's not enough space. */ - if (num_dw > ctx->b.rings.gfx.cs->max_dw) { - ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + if (num_dw > ctx->b.gfx.cs->max_dw) { + ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } } void r600_flush_emit(struct r600_context *rctx) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned cp_coher_cntl = 0; unsigned wait_until = 0; @@ -246,13 +246,11 @@ void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence) return; - ctx->b.rings.gfx.flushing = true; - r600_preflush_suspend_features(&ctx->b); /* flush the framebuffer cache */ @@ -278,7 +276,6 @@ void r600_context_gfx_flush(void *context, unsigned flags, /* Flush the CS. */ ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++); - ctx->b.rings.gfx.flushing = false; r600_begin_new_cs(ctx); } @@ -292,7 +289,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->b.vram = 0; /* Begin a new CS. */ - r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd); + r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); @@ -326,6 +323,7 @@ void r600_begin_new_cs(struct r600_context *ctx) } r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom); r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); + r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); if (ctx->blend_state.cso) r600_mark_atom_dirty(ctx, &ctx->blend_state.atom); @@ -361,7 +359,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->last_primitive_type = -1; ctx->last_start_instance = -1; - ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw; + ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw; } /* The max number of bytes to copy per packet. */ @@ -372,7 +370,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *src, uint64_t src_offset, unsigned size) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); @@ -418,9 +416,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, } /* This must be done after r600_need_cs_space. */ - src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, + src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src, RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); - dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, + dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); @@ -453,7 +451,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; + struct radeon_winsys_cs *cs = rctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; @@ -471,9 +469,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx, for (i = 0; i < ncopy; i++) { csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize); cs->buf[cs->cdw++] = dst_offset & 0xfffffffc; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9f4cda2..bd00dcb 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -178,11 +178,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, goto fail; } - rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, - r600_context_gfx_flush, rctx, - rscreen->b.trace_bo ? - rscreen->b.trace_bo->cs_buf : NULL); - rctx->b.rings.gfx.flush = r600_context_gfx_flush; + rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, + r600_context_gfx_flush, rctx, + rscreen->b.trace_bo ? + rscreen->b.trace_bo->cs_buf : NULL); + rctx->b.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256, 0, PIPE_USAGE_DEFAULT, FALSE); @@ -323,6 +323,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: return family >= CHIP_CEDAR ? 1 : 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return family >= CHIP_CEDAR ? 4 : 0; @@ -338,13 +339,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 520b03f..bbb55ad 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "tgsi/tgsi_scan.h" -#define R600_NUM_ATOMS 42 +#define R600_NUM_ATOMS 43 #define R600_MAX_VIEWPORTS 16 @@ -116,6 +116,7 @@ struct r600_db_misc_state { unsigned log_samples; unsigned db_shader_control; bool htile_clear; + uint8_t ps_conservative_z; }; struct r600_cb_misc_state { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index fc6335a..560197c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2044,6 +2044,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; + shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; if (shader->vs_as_gs_a) vs_add_primid_output(&ctx, key.vs.prim_id_out); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index c240e71..2040f73 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -76,6 +76,8 @@ struct r600_shader { boolean uses_tex_buffers; boolean gs_prim_id_input; + uint8_t ps_conservative_z; + /* Size in bytes of a data item in the ring(s) (single vertex data). Stages with only one ring items 123 will be set to 0. */ unsigned ring_item_sizes[4]; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1be3e1b..c2d4abc 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -244,7 +244,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -760,7 +760,7 @@ r600_create_sampler_view(struct pipe_context *ctx, static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4); @@ -774,7 +774,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx, static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_scissor_state *rstate = &rctx->scissor; struct pipe_scissor_state *state; bool do_disable_workaround = false; @@ -1334,7 +1334,7 @@ static void r600_get_sample_position(struct pipe_context *ctx, static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned max_dist = 0; if (rctx->b.family == CHIP_R600) { @@ -1401,7 +1401,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0]; @@ -1432,7 +1432,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base); reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, (struct r600_resource*)cb[i]->base.texture, RADEON_USAGE_READWRITE, cb[i]->base.texture->nr_samples > 1 ? @@ -1445,7 +1445,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask); reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, cb[i]->cb_buffer_fmask, RADEON_USAGE_READWRITE, cb[i]->base.texture->nr_samples > 1 ? @@ -1458,7 +1458,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask); reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, cb[i]->cb_buffer_cmask, RADEON_USAGE_READWRITE, cb[i]->base.texture->nr_samples > 1 ? @@ -1497,7 +1497,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a if (state->zsbuf) { struct r600_surface *surf = (struct r600_surface*)state->zsbuf; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.rings.gfx, + &rctx->b.gfx, (struct r600_resource*)state->zsbuf->texture, RADEON_USAGE_READWRITE, surf->base.texture->nr_samples > 1 ? @@ -1570,7 +1570,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { @@ -1600,7 +1600,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -1610,7 +1610,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, + reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; @@ -1621,13 +1621,28 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_render_override = S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + if (rctx->b.chip_class >= R700) { + switch (a->ps_conservative_z) { + default: /* fall through */ + case TGSI_FS_DEPTH_LAYOUT_ANY: + db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_ANY_Z); + break; + case TGSI_FS_DEPTH_LAYOUT_GREATER: + db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_GREATER_THAN_Z); + break; + case TGSI_FS_DEPTH_LAYOUT_LESS: + db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_LESS_THAN_Z); + break; + } + } + if (a->occlusion_query_enabled) { if (rctx->b.chip_class >= R700) { db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); @@ -1687,7 +1702,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1); @@ -1696,7 +1711,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom * static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; while (dirty_mask) { @@ -1725,7 +1740,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } } @@ -1736,7 +1751,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_constbuf_size, unsigned reg_alu_const_cache) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1758,7 +1773,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); @@ -1774,7 +1789,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); @@ -1810,7 +1825,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1825,7 +1840,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, radeon_emit(cs, (resource_id_base + resource_index) * 7); radeon_emit_array(cs, rview->tex_resource_words, 7); - reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, RADEON_USAGE_READ, r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1857,7 +1872,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx, unsigned resource_id_base, unsigned border_color_reg) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -1918,7 +1933,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned tmp; tmp = S_009508_DISABLE_CUBE_ANISO(1) | @@ -1936,26 +1951,26 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK, + radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, RADEON_USAGE_READ, RADEON_PRIO_INTERNAL_SHADER)); } static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v2 = 0, primid = 0; @@ -1990,7 +2005,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -2002,7 +2017,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) rbuffer =(struct r600_resource*)state->esgs_ring.buffer; radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, @@ -2011,7 +2026,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, @@ -2787,6 +2802,7 @@ void r600_update_db_shader_control(struct r600_context * rctx) { bool dual_export; unsigned db_shader_control; + uint8_t ps_conservative_z; if (!rctx->ps_shader) { return; @@ -2798,6 +2814,8 @@ void r600_update_db_shader_control(struct r600_context * rctx) db_shader_control = rctx->ps_shader->current->db_shader_control | S_02880C_DUAL_EXPORT_ENABLE(dual_export); + ps_conservative_z = rctx->ps_shader->current->shader.ps_conservative_z; + /* When alpha test is enabled we can't trust the hw to make the proper * decision on the order in which ztest should be run related to fragment * shader execution. @@ -2811,8 +2829,10 @@ void r600_update_db_shader_control(struct r600_context * rctx) db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); } - if (db_shader_control != rctx->db_misc_state.db_shader_control) { + if (db_shader_control != rctx->db_misc_state.db_shader_control || + ps_conservative_z != rctx->db_misc_state.ps_conservative_z) { rctx->db_misc_state.db_shader_control = db_shader_control; + rctx->db_misc_state.ps_conservative_z = ps_conservative_z; r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -2845,7 +2865,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; + struct radeon_winsys_cs *cs = rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -2918,9 +2938,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, cheight = cheight > copy_height ? copy_height : cheight; size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); - radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE, + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size); cs->buf[cs->cdw++] = base >> 8; @@ -2954,7 +2974,7 @@ static void r600_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.rings.dma.cs == NULL) { + if (rctx->b.dma.cs == NULL) { goto fallback; } @@ -3086,6 +3106,7 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3); r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); + r600_add_atom(rctx, &rctx->b.render_cond_atom, id++); r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 178005a..d629194 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -71,12 +71,12 @@ void r600_init_atom(struct r600_context *rctx, void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) { - r600_emit_command_buffer(rctx->b.rings.gfx.cs, ((struct r600_cso_state*)atom)->cb); + r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); } void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; unsigned alpha_ref = a->sx_alpha_ref; @@ -211,7 +211,7 @@ static void r600_set_blend_color(struct pipe_context *ctx, void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct pipe_blend_color *state = &rctx->blend_color.state; radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); @@ -223,7 +223,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_vgt_state *a = (struct r600_vgt_state *)atom; radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en); @@ -257,7 +257,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom; radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); @@ -709,7 +709,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx, void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_viewport_state *rstate = &rctx->viewport; struct pipe_viewport_state *state; uint32_t dirty_mask; @@ -1460,7 +1460,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_clip_misc_state *state = &rctx->clip_misc_state; radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, @@ -1477,7 +1477,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info struct r600_context *rctx = (struct r600_context *)ctx; struct pipe_draw_info info = *dinfo; struct pipe_index_buffer ib = {}; - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; uint64_t mask; if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) { @@ -1490,8 +1491,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* make sure that the gfx ring is only one active */ - if (rctx->b.rings.dma.cs && rctx->b.rings.dma.cs->cdw) { - rctx->b.rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); + if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) { + rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); } if (!r600_update_derived_state(rctx)) { @@ -1663,7 +1664,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info /* Draw packets. */ if (!info.indirect) { - cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, 0); cs->buf[cs->cdw++] = info.instance_count; } @@ -1675,20 +1676,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info rctx->vgt_state.last_draw_was_indirect = true; rctx->last_start_instance = -1; - cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, 0); cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE; cs->buf[cs->cdw++] = va; cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)info.indirect, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); } if (info.indexed) { - cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, 0); cs->buf[cs->cdw++] = ib.index_size == 4 ? (VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) : (VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0)); @@ -1696,7 +1697,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info if (ib.user_buffer) { unsigned size_bytes = info.count*ib.index_size; unsigned size_dw = align(size_bytes, 4) / 4; - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit); cs->buf[cs->cdw++] = info.count; cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_IMMEDIATE; memcpy(cs->buf+cs->cdw, ib.user_buffer, size_bytes); @@ -1705,13 +1706,13 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset; if (likely(!info.indirect)) { - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit); cs->buf[cs->cdw++] = va; cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; cs->buf[cs->cdw++] = info.count; cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); @@ -1719,20 +1720,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info else { uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size; - cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, 0); cs->buf[cs->cdw++] = va; cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); - cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0); cs->buf[cs->cdw++] = max_size; - cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit); cs->buf[cs->cdw++] = info.indirect_offset; cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; } @@ -1752,17 +1753,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = 0; /* unused */ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, + cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, t->buf_filled_size, RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); } if (likely(!info.indirect)) { - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit); cs->buf[cs->cdw++] = info.count; } else { - cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit); cs->buf[cs->cdw++] = info.indirect_offset; } cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX | @@ -1938,7 +1939,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; if (!shader) @@ -1946,7 +1947,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) r600_emit_command_buffer(cs, &shader->command_buffer); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo, + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER)); } @@ -2669,12 +2670,12 @@ void r600_init_common_state_functions(struct r600_context *rctx) void r600_trace_emit(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; uint64_t va; uint32_t reloc; va = rscreen->b.trace_bo->gpu_address; - reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo, + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo, RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); radeon_emit(cs, va & 0xFFFFFFFFUL); diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 6bba88c..53f5ad6 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -781,6 +781,14 @@ #define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) #define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) #define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) +#define S_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 13) +#define G_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 13) & 0x03) +#define C_028D0C_CONSERVATIVE_Z_EXPORT 0xFFFF9FFF +#define V_028D0C_EXPORT_ANY_Z 0 +#define V_028D0C_EXPORT_LESS_THAN_Z 1 +#define V_028D0C_EXPORT_GREATER_THAN_Z 2 +#define V_028D0C_EXPORT_RESERVED 3 + #define R_028D10_DB_RENDER_OVERRIDE 0x028D10 #define V_028D10_FORCE_OFF 0 #define V_028D10_FORCE_ENABLE 1 |