radeon/uvd: implement HEVC support

add context buffer to fix H265 uvd decode issue. fix H265 corruption issue caused by incorrect assigned ref_pic_list. v2: disable interlace for HEVC add CZ sps flag workaround fix coding style Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com> Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com> Reviewed-by: Leo Liu <leo.liu@amd.com>
author: Boyuan Zhang <boyuan.zhang@amd.com> 2015-07-08 16:54:48 -0400
committer: Marek Olšák <marek.olsak@amd.com> 2015-08-14 15:02:31 +0200
commit: 839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9 (patch)
tree: 9307491fc3189e31e067f31013d316ee91f15941 /src/gallium/drivers/radeon/radeon_uvd.c
parent: 0654a9ca17c17fe140f70d126c878a0ce4736b76 (diff)
download: external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.zip
external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.tar.gz
external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.tar.bz2
1 files changed, 225 insertions, 17 deletions
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 375b5c0..16ee541 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -57,7 +57,7 @@
 
 #define FB_BUFFER_OFFSET 0x1000
 #define FB_BUFFER_SIZE 2048
-#define IT_SCALING_TABLE_SIZE 224
+#define IT_SCALING_TABLE_SIZE 992
 
 /* UVD decoder representation */
 struct ruvd_decoder {
@@ -86,6 +86,7 @@ struct ruvd_decoder {
 
 	struct rvid_buffer		dpb;
 	bool				use_legacy;
+	struct rvid_buffer		ctx;
 };
 
 /* flush IB to the hardware */
@@ -124,6 +125,13 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
 	set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
 }
 
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+	return dec->stream_type == RUVD_CODEC_H264_PERF ||
+		dec->stream_type == RUVD_CODEC_H265;
+}
+
 /* map the next available message/feedback/itscaling buffer */
 static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
 {
@@ -139,7 +147,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
 	/* calc buffer offsets */
 	dec->msg = (struct ruvd_msg *)ptr;
 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
-	if (dec->stream_type == RUVD_CODEC_H264_PERF)
+	if (have_it(dec))
 		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
 }
 
@@ -159,8 +167,7 @@ static void send_msg_buf(struct ruvd_decoder *dec)
 	dec->ws->buffer_unmap(buf->res->cs_buf);
 	dec->msg = NULL;
 	dec->fb = NULL;
-	if (dec->stream_type == RUVD_CODEC_H264_PERF)
-		dec->it = NULL;
+	dec->it = NULL;
 
 	/* and send it to the hardware */
 	send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
@@ -191,12 +198,35 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
 	case PIPE_VIDEO_FORMAT_MPEG4:
 		return RUVD_CODEC_MPEG4;
 
+	case PIPE_VIDEO_FORMAT_HEVC:
+		return RUVD_CODEC_H265;
+
 	default:
 		assert(0);
 		return 0;
 	}
 }
 
+static unsigned calc_ctx_size(struct ruvd_decoder *dec)
+{
+	unsigned width_in_mb, height_in_mb, ctx_size;
+
+	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+	unsigned max_references = dec->base.max_references + 1;
+
+	if (dec->base.width * dec->base.height >= 4096*2000)
+		max_references = MAX2(max_references, 8);
+	else
+		max_references = MAX2(max_references, 17);
+
+	width = align (width, 16);
+	height = align (height, 16);
+	ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024;
+	return ctx_size;
+}
+
 /* calculate size of reference picture buffer */
 static unsigned calc_dpb_size(struct ruvd_decoder *dec)
 {
@@ -270,6 +300,17 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
 		break;
 	}
 
+	case PIPE_VIDEO_FORMAT_HEVC:
+		if (dec->base.width * dec->base.height >= 4096*2000)
+			max_references = MAX2(max_references, 8);
+		else
+			max_references = MAX2(max_references, 17);
+
+		width = align (width, 16);
+		height = align (height, 16);
+		dpb_size = align((width * height * 3) / 2, 256) * max_references;
+		break;
+
 	case PIPE_VIDEO_FORMAT_VC1:
 		// the firmware seems to allways assume a minimum of ref frames
 		max_references = MAX2(NUM_VC1_REFS, max_references);
@@ -319,6 +360,12 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec)
 	return dpb_size;
 }
 
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+	/* NOOP, since we only use an intptr */
+}
+
 /* get h264 specific message bits */
 static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
 {
@@ -392,6 +439,11 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
 	memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
 	memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
 
+	if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+		memcpy(dec->it, result.scaling_list_4x4, 6*16);
+		memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+	}
+
 	result.num_ref_frames = pic->num_ref_frames;
 
 	result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
@@ -408,6 +460,151 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
 	return result;
 }
 
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+				     struct pipe_h265_picture_desc *pic)
+{
+	struct ruvd_h265 result;
+	unsigned i;
+
+	memset(&result, 0, sizeof(result));
+
+	result.sps_info_flags = 0;
+	result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+	result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+	result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+	result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+	result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+	result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+	result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+	result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+	result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+	if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+		result.sps_info_flags |= 1 << 9;
+
+	result.chroma_format = pic->pps->sps->chroma_format_idc;
+	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+	result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+	result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+	result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+	result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+	result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+	result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+	result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+	result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+	result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+	result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+	result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+	result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+	result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+	result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+	result.pps_info_flags = 0;
+	result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+	result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+	result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+	result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+	result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+	result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+	result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+	result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+	result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+	result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+	result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+	result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+	result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+	result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+	result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+	result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+	result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+	result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+	result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+	result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+	//result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+	result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+	result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+	result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+	result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+	result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+	result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+	result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+	result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+	result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+	result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+	result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+	result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+	result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+	for (i = 0; i < 19; ++i)
+		result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+	for (i = 0; i < 21; ++i)
+		result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+	result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+	result.curr_idx = pic->CurrPicOrderCntVal;
+	result.curr_poc = pic->CurrPicOrderCntVal;
+
+	vl_video_buffer_set_associated_data(target, &dec->base,
+					    (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+					    &ruvd_destroy_associated_data);
+
+	for (i = 0; i < 16; ++i) {
+		struct pipe_video_buffer *ref = pic->ref[i];
+		uintptr_t ref_pic = 0;
+
+		result.poc_list[i] = pic->PicOrderCntVal[i];
+
+		if (ref)
+			ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+		else
+			ref_pic = 0x7F;
+		result.ref_pic_list[i] = ref_pic;
+	}
+
+	for (i = 0; i < 8; ++i) {
+		result.ref_pic_set_st_curr_before[i] = 0xFF;
+		result.ref_pic_set_st_curr_after[i] = 0xFF;
+		result.ref_pic_set_lt_curr[i] = 0xFF;
+	}
+
+	for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+		result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+	for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+		result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+	for (i = 0; i < pic->NumPocLtCurr; ++i)
+		result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+	for (i = 0; i < 6; ++i)
+		result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+	for (i = 0; i < 2; ++i)
+		result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+	memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+	memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+	memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+	memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+	/* TODO
+	result.highestTid;
+	result.isNonRef;
+
+	IDRPicFlag;
+	RAPPicFlag;
+	NumPocTotalCurr;
+	NumShortTermPictureSliceHeaderBits;
+	NumLongTermPictureSliceHeaderBits;
+
+	IsLongTerm[16];
+	*/
+
+	return result;
+}
+
 /* get vc1 specific message bits */
 static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
 {
@@ -627,16 +824,12 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
 	}
 
 	rvid_destroy_buffer(&dec->dpb);
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+		rvid_destroy_buffer(&dec->ctx);
 
 	FREE(dec);
 }
 
-/* free associated data in the video buffer callback */
-static void ruvd_destroy_associated_data(void *data)
-{
-	/* NOOP, since we only use an intptr */
-}
-
 /**
  * start decoding of a new frame
  */
@@ -759,10 +952,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 	switch (u_reduce_video_profile(picture->profile)) {
 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
 		dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
-		if (dec->stream_type == RUVD_CODEC_H264_PERF) {
-			memcpy(dec->it, dec->msg->body.decode.codec.h264.scaling_list_4x4, 6*16);
-			memcpy((dec->it + 96), dec->msg->body.decode.codec.h264.scaling_list_8x8, 2*64);
-		}
+		break;
+
+	case PIPE_VIDEO_FORMAT_HEVC:
+		dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
 		break;
 
 	case PIPE_VIDEO_FORMAT_VC1:
@@ -792,13 +985,17 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 
 	send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
 		 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+	if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
+		send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0,
+			RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+	}
 	send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
 		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
 	send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
 		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
 	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
-	if (dec->stream_type == RUVD_CODEC_H264_PERF)
+	if (have_it(dec))
 		send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
 			 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
 	set_reg(dec, RUVD_ENGINE_CNTL, 1);
@@ -884,7 +1081,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
 	for (i = 0; i < NUM_BUFFERS; ++i) {
 		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
 		STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
-		if (dec->stream_type == RUVD_CODEC_H264_PERF)
+		if (have_it(dec))
 			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
 		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
 					msg_fb_it_size, PIPE_USAGE_STAGING)) {
@@ -911,6 +1108,15 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
 
 	rvid_clear_buffer(context, &dec->dpb);
 
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
+		unsigned ctx_size = calc_ctx_size(dec);
+		if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+			RVID_ERR("Can't allocated context buffer.\n");
+			goto error;
+		}
+		rvid_clear_buffer(context, &dec->ctx);
+	}
+
 	map_msg_fb_it_buf(dec);
 	dec->msg->size = sizeof(*dec->msg);
 	dec->msg->msg_type = RUVD_MSG_CREATE;
@@ -918,7 +1124,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
 	dec->msg->body.create.stream_type = dec->stream_type;
 	dec->msg->body.create.width_in_samples = dec->base.width;
 	dec->msg->body.create.height_in_samples = dec->base.height;
-	dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+	dec->msg->body.create.dpb_size = dpb_size;
 	send_msg_buf(dec);
 	flush(dec);
 	next_buffer(dec);
@@ -934,6 +1140,8 @@ error:
 	}
 
 	rvid_destroy_buffer(&dec->dpb);
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+		rvid_destroy_buffer(&dec->ctx);
 
 	FREE(dec);
author	Boyuan Zhang <boyuan.zhang@amd.com>	2015-07-08 16:54:48 -0400
committer	Marek Olšák <marek.olsak@amd.com>	2015-08-14 15:02:31 +0200
commit	839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9 (patch)
tree	9307491fc3189e31e067f31013d316ee91f15941 /src/gallium/drivers/radeon/radeon_uvd.c
parent	0654a9ca17c17fe140f70d126c878a0ce4736b76 (diff)
download	external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.zip external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.tar.gz external_mesa3d-839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9.tar.bz2