diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 87 |
1 files changed, 77 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f823af1..1245f56 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2797,7 +2797,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) * Build the sampler view descriptor for a buffer texture. * @param state 256-bit descriptor; only the high 128 bits are filled in */ -static void +void si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, enum pipe_format format, unsigned first_element, unsigned last_element, @@ -2838,9 +2838,10 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, /** * Build the sampler view descriptor for a texture. */ -static void +void si_make_texture_descriptor(struct si_screen *screen, struct r600_texture *tex, + bool sampler, enum pipe_texture_target target, enum pipe_format pipe_format, const unsigned char state_swizzle[4], @@ -2855,7 +2856,7 @@ si_make_texture_descriptor(struct si_screen *screen, const struct util_format_description *desc; unsigned char swizzle[4]; int first_non_void; - unsigned num_format, data_format; + unsigned num_format, data_format, type; uint32_t pitch; uint64_t va; @@ -2973,12 +2974,30 @@ si_make_texture_descriptor(struct si_screen *screen, data_format = 0; } - if (res->target == PIPE_TEXTURE_1D_ARRAY) { + if (!sampler && + (res->target == PIPE_TEXTURE_CUBE || + res->target == PIPE_TEXTURE_CUBE_ARRAY || + res->target == PIPE_TEXTURE_3D)) { + /* For the purpose of shader images, treat cube maps and 3D + * textures as 2D arrays. For 3D textures, the address + * calculations for mipmaps are different, so we rely on the + * caller to effectively disable mipmaps. + */ + type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; + + assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); + } else { + type = si_tex_dim(res->target, target, res->nr_samples); + } + + if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { height = 1; depth = res->array_size; - } else if (res->target == PIPE_TEXTURE_2D_ARRAY) { - depth = res->array_size; - } else if (res->target == PIPE_TEXTURE_CUBE_ARRAY) + } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || + type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { + if (sampler || res->target != PIPE_TEXTURE_3D) + depth = res->array_size; + } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) depth = res->array_size / 6; pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); @@ -3001,7 +3020,7 @@ si_make_texture_descriptor(struct si_screen *screen, last_level) | S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) | S_008F1C_POW2_PAD(res->last_level > 0) | - S_008F1C_TYPE(si_tex_dim(res->target, target, res->nr_samples))); + S_008F1C_TYPE(type)); state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); state[5] = (S_008F24_BASE_ARRAY(first_layer) | S_008F24_LAST_ARRAY(last_layer)); @@ -3155,7 +3174,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, state->target == PIPE_TEXTURE_CUBE) last_layer = state->u.tex.first_layer; - si_make_texture_descriptor(sctx->screen, tmp, state->target, + si_make_texture_descriptor(sctx->screen, tmp, true, state->target, state->format, state_swizzle, base_level, first_level, last_level, state->u.tex.first_layer, last_layer, @@ -3503,6 +3522,52 @@ static void si_texture_barrier(struct pipe_context *ctx) SI_CONTEXT_FLUSH_AND_INV_CB; } +static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) +{ + struct si_context *sctx = (struct si_context *)ctx; + + /* Subsequent commands must wait for all shader invocations to + * complete. */ + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + + if (flags & PIPE_BARRIER_CONSTANT_BUFFER) + sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1; + + if (flags & (PIPE_BARRIER_VERTEX_BUFFER | + PIPE_BARRIER_SHADER_BUFFER | + PIPE_BARRIER_TEXTURE | + PIPE_BARRIER_IMAGE | + PIPE_BARRIER_STREAMOUT_BUFFER)) { + /* As far as I can tell, L1 contents are written back to L2 + * automatically at end of shader, but the contents of other + * L1 caches might still be stale. */ + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; + } + + if (flags & PIPE_BARRIER_INDEX_BUFFER) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; + + /* Indices are read through TC L2 since VI. */ + if (sctx->screen->b.chip_class <= CIK) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } + + if (flags & PIPE_BARRIER_FRAMEBUFFER) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + + if (flags & (PIPE_BARRIER_MAPPED_BUFFER | + PIPE_BARRIER_FRAMEBUFFER | + PIPE_BARRIER_INDIRECT_BUFFER)) { + /* Not sure if INV_GLOBAL_L2 is the best thing here. + * + * We need to make sure that TC L1 & L2 are written back to + * memory, because neither CPU accesses nor CB fetches consider + * TC, but there's no need to invalidate any TC cache lines. */ + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } +} + static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) { struct pipe_blend_state blend; @@ -3583,6 +3648,7 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.set_index_buffer = si_set_index_buffer; sctx->b.b.texture_barrier = si_texture_barrier; + sctx->b.b.memory_barrier = si_memory_barrier; sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_min_samples = si_set_min_samples; sctx->b.b.set_tess_state = si_set_tess_state; @@ -3637,7 +3703,8 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; - si_make_texture_descriptor(sscreen, rtex, res->target, res->format, + si_make_texture_descriptor(sscreen, rtex, true, + res->target, res->format, swizzle, 0, 0, res->last_level, 0, is_array ? res->array_size - 1 : 0, res->width0, res->height0, res->depth0, |