diff options
author | Christian König <deathsimple@vodafone.de> | 2011-04-02 12:05:22 +0200 |
---|---|---|
committer | Christian König <deathsimple@vodafone.de> | 2011-04-02 12:05:22 +0200 |
commit | 794cde3f5ef59cf603be284fbc8de33d2cda7d2c (patch) | |
tree | 0c7bf5a5a1dbf1244b42ca10a16ca0def657a756 /src/gallium/auxiliary/vl | |
parent | 4de5d81638a79fbd74eca63723f6f09727bf4b60 (diff) | |
download | external_mesa3d-794cde3f5ef59cf603be284fbc8de33d2cda7d2c.zip external_mesa3d-794cde3f5ef59cf603be284fbc8de33d2cda7d2c.tar.gz external_mesa3d-794cde3f5ef59cf603be284fbc8de33d2cda7d2c.tar.bz2 |
[g3dvl] splitt vertex element state into y, cb, cr
Diffstat (limited to 'src/gallium/auxiliary/vl')
-rw-r--r-- | src/gallium/auxiliary/vl/vl_idct.c | 30 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_context.c | 118 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_context.h | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 151 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_vertex_buffers.c | 79 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_vertex_buffers.h | 16 |
7 files changed, 173 insertions, 232 deletions
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 8cfb56e..c73b476 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -93,8 +93,8 @@ static void * create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle) { struct ureg_program *shader; - struct ureg_src vrect, vpos, vblock, eb[4]; - struct ureg_src scale, blocks_xy, t_eb; + struct ureg_src vrect, vpos, vblock, eb; + struct ureg_src scale, blocks_xy; struct ureg_dst t_tex, t_start; struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; unsigned label; @@ -112,10 +112,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle) o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - eb[0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0); - eb[1] = ureg_DECL_vs_input(shader, VS_I_EB_1_0); - eb[2] = ureg_DECL_vs_input(shader, VS_I_EB_0_1); - eb[3] = ureg_DECL_vs_input(shader, VS_I_EB_1_1); + eb = ureg_DECL_vs_input(shader, VS_I_EB); o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); @@ -127,8 +124,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle) * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) * blocks_xy = (blocks_x, blocks_y) * - * ar = vblock.y * blocks.x + vblock.x - * if eb[ar].(color_swizzle) + * if eb.(vblock.y, vblock.x) * o_vpos.xy = -1 * else * t_tex = vpos * blocks_xy + vblock @@ -150,18 +146,20 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle) blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y); if (idct->blocks_x > 1 || idct->blocks_y > 1) { - struct ureg_dst ar = ureg_DECL_address(shader); + ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)), + ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W), + ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y)); - ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), - ureg_scalar(vblock, TGSI_SWIZZLE_Y), blocks_xy, vblock); + ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), + ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)), + ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X)); - ureg_ARL(shader, ureg_writemask(ar, TGSI_WRITEMASK_X), ureg_src(t_tex)); - t_eb = ureg_src_indirect(eb[0], ureg_src(ar)); - } else { - t_eb = eb[0]; + eb = ureg_src(t_tex); } - ureg_IF(shader, ureg_scalar(t_eb, color_swizzle), &label); + ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label); ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f)); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c index 94ac70e..9f3da73 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c @@ -94,8 +94,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer) vl_ycbcr_buffer_cleanup(&buf->render_result); vl_vb_cleanup(&buf->vertex_stream); vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y); - vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb); - vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr); + vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cb); + vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cr); vl_mpeg12_mc_cleanup_buffer(&buf->mc_y); vl_mpeg12_mc_cleanup_buffer(&buf->mc_cb); vl_mpeg12_mc_cleanup_buffer(&buf->mc_cr); @@ -115,8 +115,8 @@ vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer) vl_vb_map(&buf->vertex_stream, ctx->pipe); vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y); - vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr); - vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb); + vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cb); + vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cr); } static void @@ -156,8 +156,8 @@ vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer) vl_vb_unmap(&buf->vertex_stream, ctx->pipe); vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y); - vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr); - vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb); + vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cb); + vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cr); } static void @@ -182,36 +182,42 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer, ctx = (struct vl_mpeg12_context *)buf->base.context; assert(ctx); + surfaces = vl_ycbcr_get_surfaces(&buf->render_result); + + sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL; + sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL; + vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num); ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all); - ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state); ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); - vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num); - vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num); - vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num); - surfaces = vl_ycbcr_get_surfaces(&buf->render_result); - sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL; - sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_y); + vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num); sv_refs[0] = sv_past ? sv_past->y : NULL; sv_refs[1] = sv_future ? sv_future->y : NULL; - vl_mpeg12_mc_renderer_flush(&ctx->mc_y, &buf->mc_y, surfaces->y, + vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_y, surfaces->y, sv_refs, ne_start, ne_num, e_start, e_num, fence); + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cb); + vl_idct_flush(&ctx->idct_c, &buf->idct_cb, ne_num); + sv_refs[0] = sv_past ? sv_past->cb : NULL; sv_refs[1] = sv_future ? sv_future->cb : NULL; - vl_mpeg12_mc_renderer_flush(&ctx->mc_cb, &buf->mc_cb, surfaces->cb, + vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cb, surfaces->cb, sv_refs, ne_start, ne_num, e_start, e_num, fence); + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cr); + vl_idct_flush(&ctx->idct_c, &buf->idct_cr, ne_num); + sv_refs[0] = sv_past ? sv_past->cr : NULL; sv_refs[1] = sv_future ? sv_future->cr : NULL; - vl_mpeg12_mc_renderer_flush(&ctx->mc_cr, &buf->mc_cr, surfaces->cr, + vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cr, surfaces->cr, sv_refs, ne_start, ne_num, e_start, e_num, fence); } @@ -231,13 +237,12 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe) ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); vl_compositor_cleanup(&ctx->compositor); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc); vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); - ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state); + vl_idct_cleanup(&ctx->idct_c); + ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_y); + ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cb); + ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cr); pipe_resource_reference(&ctx->quads.buffer, NULL); ctx->pipe->destroy(ctx->pipe); @@ -353,13 +358,13 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe) return NULL; } - if (!vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb, + if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cb, idct_views->cb, idct_surfaces->cb)) { FREE(buffer); return NULL; } - if (!vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr, + if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cr, idct_views->cr, idct_surfaces->cr)) { FREE(buffer); return NULL; @@ -367,17 +372,17 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe) mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc); - if(!vl_mpeg12_mc_init_buffer(&ctx->mc_y, &buffer->mc_y, mc_views->y)) { + if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y)) { FREE(buffer); return NULL; } - if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cb, &buffer->mc_cb, mc_views->cb)) { + if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb)) { FREE(buffer); return NULL; } - if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cr, &buffer->mc_cr, mc_views->cr)) { + if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr)) { FREE(buffer); return NULL; } @@ -637,14 +642,10 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_ chroma_blocks_y = 2; } - if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height, + if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix)) return false; - if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height, - chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix)) - return false; - return true; } @@ -690,13 +691,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe, ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2); ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT; - ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true); - - if (ctx->vertex_elems_state == NULL) { - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } + ctx->ves_y = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X); + ctx->ves_cb = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y); + ctx->ves_cr = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z); ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH); ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT); @@ -707,34 +704,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe, return NULL; } - if (!vl_mpeg12_mc_renderer_init(&ctx->mc_y, ctx->pipe, - ctx->buffer_width, ctx->buffer_height, - chroma_format, TGSI_SWIZZLE_X)) { - vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } - - if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cb, ctx->pipe, - ctx->buffer_width, ctx->buffer_height, - chroma_format, TGSI_SWIZZLE_Y)) { - vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } - - if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cr, ctx->pipe, - ctx->buffer_width, ctx->buffer_height, - chroma_format, TGSI_SWIZZLE_Z)) { + if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height)) { vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); + vl_idct_cleanup(&ctx->idct_c); ctx->pipe->destroy(ctx->pipe); FREE(ctx); return NULL; @@ -742,11 +714,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe, if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) { vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr); + vl_idct_cleanup(&ctx->idct_c); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc); ctx->pipe->destroy(ctx->pipe); FREE(ctx); return NULL; @@ -754,11 +723,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe, if (!init_pipe_state(ctx)) { vl_idct_cleanup(&ctx->idct_y); - vl_idct_cleanup(&ctx->idct_cr); - vl_idct_cleanup(&ctx->idct_cb); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr); + vl_idct_cleanup(&ctx->idct_c); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc); vl_compositor_cleanup(&ctx->compositor); ctx->pipe->destroy(ctx->pipe); FREE(ctx); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h index f84e235..e0c6ca9 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h @@ -49,10 +49,10 @@ struct vl_mpeg12_context struct pipe_vertex_buffer quads; unsigned vertex_buffer_size; - void *vertex_elems_state; + void *ves_y, *ves_cb, *ves_cr; - struct vl_idct idct_y, idct_cb, idct_cr; - struct vl_mpeg12_mc_renderer mc_y, mc_cb, mc_cr; + struct vl_idct idct_y, idct_c; + struct vl_mpeg12_mc_renderer mc; struct vl_compositor compositor; void *rast; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 7c3fea5..912dea3 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -44,16 +44,12 @@ enum VS_OUTPUT { VS_O_VPOS, VS_O_LINE, - VS_O_TEX0, - VS_O_TEX1, - VS_O_TEX2, - VS_O_EB_0, - VS_O_EB_1, - VS_O_INFO, - VS_O_MV0, - VS_O_MV1, - VS_O_MV2, - VS_O_MV3 + VS_O_TEX_TOP, + VS_O_TEX_BOTTOM, + VS_O_MV0_TOP, + VS_O_MV0_BOTTOM, + VS_O_MV1_TOP, + VS_O_MV1_BOTTOM }; static void * @@ -61,10 +57,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r) { struct ureg_program *shader; struct ureg_src block_scale, mv_scale; - struct ureg_src vrect, vpos, eb[2][2], vmv[4]; + struct ureg_src vrect, vpos, eb, flags, vmv[2][2]; struct ureg_dst t_vpos, t_vtex, t_vmv; - struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info; - unsigned i, label; + struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2][2]; + unsigned i, j, label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) @@ -76,24 +72,21 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r) vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); - eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0); - eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0); - eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1); - eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1); + eb = ureg_DECL_vs_input(shader, VS_I_EB); + flags = ureg_DECL_vs_input(shader, VS_I_FLAGS); + vmv[0][0] = ureg_DECL_vs_input(shader, VS_I_MV0_TOP); + vmv[0][1] = ureg_DECL_vs_input(shader, VS_I_MV0_BOTTOM); + vmv[1][0] = ureg_DECL_vs_input(shader, VS_I_MV1_TOP); + vmv[1][1] = ureg_DECL_vs_input(shader, VS_I_MV1_BOTTOM); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE); - o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0); - o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1); - o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2); - o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0); - o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1); - o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO); - - for (i = 0; i < 4; ++i) { - vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i); - o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i); - } + o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP); + o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM); + o_vmv[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP); + o_vmv[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM); + o_vmv[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP); + o_vmv[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM); /* * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height) @@ -143,31 +136,35 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r) ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); - ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ), - ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), - eb[0][1], eb[0][0]); - ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ), - ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), - eb[1][1], eb[1][0]); - - ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X), - ureg_scalar(eb[1][0], TGSI_SWIZZLE_W)); - ureg_MUL(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y), - ureg_scalar(eb[1][1], TGSI_SWIZZLE_W), + ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_Z), + ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f)); - for (i = 0; i < 4; ++i) - ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos)); + for (i = 0; i < 2; ++i) + for (j = 0; j < 2; ++j) { + ureg_MAD(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_XY), mv_scale, vmv[i][j], ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_src(t_vpos)); + } ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), + ureg_scalar(eb, TGSI_SWIZZLE_Y), + ureg_scalar(eb, TGSI_SWIZZLE_X)); + ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), + ureg_scalar(eb, TGSI_SWIZZLE_W), + ureg_scalar(eb, TGSI_SWIZZLE_Z)); ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y)); ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2)); + ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z), + ureg_scalar(flags, TGSI_SWIZZLE_Z)); - ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label); + ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_X), &label); ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect); ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f)); @@ -206,37 +203,31 @@ calc_field(struct ureg_program *shader) * line.x going from 0 to 1 if not interlaced * line.x going from 0 to 8 in steps of 0.5 if interlaced * line.y going from 0 to 8 in steps of 0.5 + * line.z is flag for intra frames * * tmp.xy = fraction(line) * tmp.xy = tmp.xy >= 0.5 ? 1 : 0 */ ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line); ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); + ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), line); return tmp; } static struct ureg_dst -fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, unsigned color_swizzle) +fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field) { - struct ureg_src tc[3], sampler, eb[2]; - struct ureg_dst texel, t_tc, t_eb_info; + struct ureg_src tc[2], sampler; + struct ureg_dst texel, t_tc; unsigned label; texel = ureg_DECL_temporary(shader); t_tc = ureg_DECL_temporary(shader); - t_eb_info = ureg_DECL_temporary(shader); - tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR); - tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR); - tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR); + tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP, TGSI_INTERPOLATE_LINEAR); + tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM, TGSI_INTERPOLATE_LINEAR); - eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT); - eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT); - - //for (i = 0; i < 3; ++i) { - // sampler[i] = ureg_DECL_sampler(shader, i); - //} sampler = ureg_DECL_sampler(shader, 0); /* @@ -245,31 +236,21 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct * texel.cr = tex(tc[2], sampler[2]) */ - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), + ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)), tc[1], tc[0]); - ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), - ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)), - eb[1], eb[0]); - - /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */ - ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f)); + ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f)); ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f)); - ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), color_swizzle), &label); + ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label); - if(color_swizzle==TGSI_SWIZZLE_X || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) { - ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler); - } else { - ureg_TEX(shader, texel, TGSI_TEXTURE_3D, tc[2], sampler); - } + ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); ureg_release_temporary(shader, t_tc); - ureg_release_temporary(shader, t_eb_info); return texel; } @@ -277,15 +258,14 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct static struct ureg_dst fetch_ref(struct ureg_program *shader, struct ureg_dst field) { - struct ureg_src info; - struct ureg_src tc[4], sampler[2]; + struct ureg_src tc[2][2], sampler[2]; struct ureg_dst ref[2], result; unsigned i, intra_label; - info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT); - - for (i = 0; i < 4; ++i) - tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR); + tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR); + tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR); + tc[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP, TGSI_INTERPOLATE_LINEAR); + tc[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM, TGSI_INTERPOLATE_LINEAR); for (i = 0; i < 2; ++i) { sampler[i] = ureg_DECL_sampler(shader, i + 1); @@ -296,7 +276,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f)); - ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &intra_label); + ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &intra_label); /* * if (field.z) * ref[0..1] = tex(tc[0..1], sampler[0..1]) @@ -306,16 +286,16 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) */ ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), - tc[1], tc[0]); + tc[0][1], tc[0][0]); ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), - tc[3], tc[2]); + tc[1][1], tc[1][0]); ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]); ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]); ureg_LRP(shader, result, - ureg_scalar(info, TGSI_SWIZZLE_Y), + ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z), ureg_src(ref[1]), ureg_src(ref[0])); ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader)); @@ -328,7 +308,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) } static void * -create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle) +create_frag_shader(struct vl_mpeg12_mc_renderer *r) { struct ureg_program *shader; struct ureg_dst result; @@ -342,7 +322,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle) fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); field = calc_field(shader); - texel = fetch_ycbcr(r, shader, field, color_swizzle); + texel = fetch_ycbcr(r, shader, field); result = fetch_ref(shader, field); @@ -444,9 +424,7 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe, unsigned buffer_width, - unsigned buffer_height, - enum pipe_video_chroma_format chroma_format, - unsigned color_swizzle) + unsigned buffer_height) { struct pipe_resource tex_templ, *tex_dummy; struct pipe_sampler_view sampler_view; @@ -459,7 +437,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, renderer->pipe = pipe; renderer->buffer_width = buffer_width; renderer->buffer_height = buffer_height; - renderer->chroma_format = chroma_format; if (!init_pipe_state(renderer)) goto error_pipe_state; @@ -468,7 +445,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, if (!renderer->vs) goto error_vs_shaders; - renderer->fs = create_frag_shader(renderer, color_swizzle); + renderer->fs = create_frag_shader(renderer); if (!renderer->fs) goto error_fs_shaders; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 0a204d6..052d7d6 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -42,7 +42,6 @@ struct vl_mpeg12_mc_renderer struct pipe_context *pipe; unsigned buffer_width; unsigned buffer_height; - enum pipe_video_chroma_format chroma_format; struct pipe_viewport_state viewport; struct pipe_framebuffer_state fb_state; @@ -72,9 +71,7 @@ struct vl_mpeg12_mc_buffer bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe, unsigned picture_width, - unsigned picture_height, - enum pipe_video_chroma_format chroma_format, - unsigned color_swizzle); + unsigned picture_height); void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer); diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index bbac890..59aa1e9 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -33,12 +33,11 @@ struct vl_vertex_stream { struct vertex2s pos; - struct { - int8_t y; - int8_t cr; - int8_t cb; - int8_t flag; - } eb[2][2]; + int8_t eb[3][2][2]; + int8_t dct_type_field; + int8_t mo_type_frame; + int8_t mb_type_intra; + int8_t mv_wheights; struct vertex2s mv[4]; }; @@ -121,7 +120,10 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements assert(elements && num_elements); for ( i = 0; i < num_elements; ++i ) { - elements[i].src_offset = offset; + if (elements[i].src_offset) + offset = elements[i].src_offset; + else + elements[i].src_offset = offset; elements[i].instance_divisor = 1; elements[i].vertex_buffer_index = vertex_buffer_index; offset += util_format_get_blocksize(elements[i].src_format); @@ -129,37 +131,39 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements } void * -vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs) +vl_vb_get_elems_state(struct pipe_context *pipe, int component) { struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; - unsigned i; - memset(&vertex_elems, 0, sizeof(vertex_elems)); vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; - /* y, cr, cb empty block element top left block */ - vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + /* empty block element of selected component */ + vertex_elems[VS_I_EB].src_offset = 4 + component * 4; + vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + + /* flags */ + vertex_elems[VS_I_FLAGS].src_offset = 16; + vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - /* y, cr, cb empty block element top right block */ - vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + /* motion vector 0 TOP element */ + vertex_elems[VS_I_MV0_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED; - /* y, cr, cb empty block element bottom left block */ - vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + /* motion vector 0 BOTTOM element */ + vertex_elems[VS_I_MV0_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED; - /* y, cr, cb empty block element bottom right block */ - vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + /* motion vector 1 TOP element */ + vertex_elems[VS_I_MV1_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED; - for (i = 0; i < 4; ++i) - /* motion vector 0..4 element */ - vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED; + /* motion vector 1 BOTTOM element */ + vertex_elems[VS_I_MV1_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED; - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - (include_mvs ? 1 : 5), 1); + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1); - return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS - (include_mvs ? 0 : 4), vertex_elems); + return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems); } struct pipe_vertex_buffer @@ -256,7 +260,7 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock * const unsigned (*empty_block_mask)[3][2][2]) { struct vl_vertex_stream *stream; - unsigned i, j; + unsigned i, j, k; assert(buffer); assert(mb); @@ -269,28 +273,29 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock * stream->pos.x = mb->mbx; stream->pos.y = mb->mby; - for ( i = 0; i < 2; ++i) { - for ( j = 0; j < 2; ++j) { - stream->eb[i][j].y = !(mb->cbp & (*empty_block_mask)[0][i][j]); - stream->eb[i][j].cr = !(mb->cbp & (*empty_block_mask)[1][i][j]); - stream->eb[i][j].cb = !(mb->cbp & (*empty_block_mask)[2][i][j]); - } - } - stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; - stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME; - stream->eb[1][0].flag = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA; + for ( i = 0; i < 3; ++i) + for ( j = 0; j < 2; ++j) + for ( k = 0; k < 2; ++k) + stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]); + + stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; + stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME; + stream->mb_type_intra = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA; switch (mb->mb_type) { case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - stream->eb[1][1].flag = 0; + stream->mv_wheights = 0; break; case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - stream->eb[1][1].flag = 1; + stream->mv_wheights = 1; break; case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - stream->eb[1][1].flag = 2; + stream->mv_wheights = 2; break; + + default: + stream->mv_wheights = 0; } get_motion_vectors(mb, stream->mv); diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 88e0270..837d8bd 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -41,14 +41,12 @@ enum VS_INPUT { VS_I_RECT, VS_I_VPOS, - VS_I_EB_0_0, - VS_I_EB_0_1, - VS_I_EB_1_0, - VS_I_EB_1_1, - VS_I_MV0, - VS_I_MV1, - VS_I_MV2, - VS_I_MV3, + VS_I_EB, + VS_I_FLAGS, + VS_I_MV0_TOP, + VS_I_MV0_BOTTOM, + VS_I_MV1_TOP, + VS_I_MV1_BOTTOM, NUM_VS_INPUTS }; @@ -67,7 +65,7 @@ struct vl_vertex_buffer struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y); -void *vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs); +void *vl_vb_get_elems_state(struct pipe_context *pipe, int component); struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, |