From 29a84457aed4c45bc900998b5e11c03023264208 Mon Sep 17 00:00:00 2001 From: James Dong Date: Fri, 2 Jul 2010 17:44:44 -0700 Subject: Initial checkin for software AVC encoder - Since the software encoder assumes the input is YUV420 planar, color conversion needs to be added when the input color format does not meet the requirement. With this patch, I only added a single color conversion from YUV420 semi planar to YUV420 planar. We can add more as we go. Change-Id: If8640c9e5a4f73d385ae9bb2022e57f7f62b91b9 --- .../codecs/avc/enc/src/avcenc_api.cpp | 744 +++++++ .../libstagefright/codecs/avc/enc/src/avcenc_api.h | 320 +++ .../libstagefright/codecs/avc/enc/src/avcenc_int.h | 471 +++++ .../libstagefright/codecs/avc/enc/src/avcenc_lib.h | 1020 +++++++++ .../codecs/avc/enc/src/bitstream_io.cpp | 336 +++ media/libstagefright/codecs/avc/enc/src/block.cpp | 1283 ++++++++++++ .../codecs/avc/enc/src/findhalfpel.cpp | 622 ++++++ media/libstagefright/codecs/avc/enc/src/header.cpp | 917 ++++++++ media/libstagefright/codecs/avc/enc/src/init.cpp | 899 ++++++++ .../codecs/avc/enc/src/intra_est.cpp | 2199 ++++++++++++++++++++ .../codecs/avc/enc/src/motion_comp.cpp | 2156 +++++++++++++++++++ .../codecs/avc/enc/src/motion_est.cpp | 1774 ++++++++++++++++ .../codecs/avc/enc/src/rate_control.cpp | 981 +++++++++ .../libstagefright/codecs/avc/enc/src/residual.cpp | 389 ++++ media/libstagefright/codecs/avc/enc/src/sad.cpp | 290 +++ .../codecs/avc/enc/src/sad_halfpel.cpp | 629 ++++++ .../codecs/avc/enc/src/sad_halfpel_inline.h | 96 + .../libstagefright/codecs/avc/enc/src/sad_inline.h | 488 +++++ .../codecs/avc/enc/src/sad_mb_offset.h | 311 +++ media/libstagefright/codecs/avc/enc/src/slice.cpp | 1025 +++++++++ .../codecs/avc/enc/src/vlc_encode.cpp | 336 +++ 21 files changed, 17286 insertions(+) create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_api.h create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_int.h create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_lib.h create mode 100644 media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/block.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/header.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/init.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/intra_est.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/motion_comp.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/motion_est.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/rate_control.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/residual.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/sad.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_inline.h create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h create mode 100644 media/libstagefright/codecs/avc/enc/src/slice.cpp create mode 100644 media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp (limited to 'media/libstagefright/codecs/avc/enc/src') diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp new file mode 100644 index 0000000..d39885d --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp @@ -0,0 +1,744 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_api.h" +#include "avcenc_lib.h" + +/* ======================================================================== */ +/* Function : PVAVCGetNALType() */ +/* Date : 11/4/2003 */ +/* Purpose : Sniff NAL type from the bitstream */ +/* In/out : */ +/* Return : AVCENC_SUCCESS if succeed, AVCENC_FAIL if fail. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetNALType(unsigned char *bitstream, int size, + int *nal_type, int *nal_ref_idc) +{ + int forbidden_zero_bit; + if (size > 0) + { + forbidden_zero_bit = bitstream[0] >> 7; + if (forbidden_zero_bit != 0) + return AVCENC_FAIL; + *nal_ref_idc = (bitstream[0] & 0x60) >> 5; + *nal_type = bitstream[0] & 0x1F; + return AVCENC_SUCCESS; + } + + return AVCENC_FAIL; +} + + +/* ======================================================================== */ +/* Function : PVAVCEncInitialize() */ +/* Date : 3/18/2004 */ +/* Purpose : Initialize the encoder library, allocate memory and verify */ +/* the profile/level support/settings. */ +/* In/out : Encoding parameters. */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam, + void* extSPS, void* extPPS) +{ + AVCEnc_Status status; + AVCEncObject *encvid; + AVCCommonObj *video; + uint32 *userData = (uint32*) avcHandle->userData; + int framesize; + + if (avcHandle->AVCObject != NULL) + { + return AVCENC_ALREADY_INITIALIZED; /* It's already initialized, need to cleanup first */ + } + + /* not initialized */ + + /* allocate videoObject */ + avcHandle->AVCObject = (void*)avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncObject), DEFAULT_ATTR); + if (avcHandle->AVCObject == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + encvid = (AVCEncObject*) avcHandle->AVCObject; + memset(encvid, 0, sizeof(AVCEncObject)); /* reset everything */ + + encvid->enc_state = AVCEnc_Initializing; + + encvid->avcHandle = avcHandle; + + encvid->common = (AVCCommonObj*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCCommonObj), DEFAULT_ATTR); + if (encvid->common == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + video = encvid->common; + memset(video, 0, sizeof(AVCCommonObj)); + + /* allocate bitstream structure */ + encvid->bitstream = (AVCEncBitstream*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncBitstream), DEFAULT_ATTR); + if (encvid->bitstream == NULL) + { + return AVCENC_MEMORY_FAIL; + } + encvid->bitstream->encvid = encvid; /* to point back for reallocation */ + + /* allocate sequence parameter set structure */ + video->currSeqParams = (AVCSeqParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSeqParamSet), DEFAULT_ATTR); + if (video->currSeqParams == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(video->currSeqParams, 0, sizeof(AVCSeqParamSet)); + + /* allocate picture parameter set structure */ + video->currPicParams = (AVCPicParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCPicParamSet), DEFAULT_ATTR); + if (video->currPicParams == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(video->currPicParams, 0, sizeof(AVCPicParamSet)); + + /* allocate slice header structure */ + video->sliceHdr = (AVCSliceHeader*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSliceHeader), DEFAULT_ATTR); + if (video->sliceHdr == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(video->sliceHdr, 0, sizeof(AVCSliceHeader)); + + /* allocate encoded picture buffer structure*/ + video->decPicBuf = (AVCDecPicBuffer*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCDecPicBuffer), DEFAULT_ATTR); + if (video->decPicBuf == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(video->decPicBuf, 0, sizeof(AVCDecPicBuffer)); + + /* allocate rate control structure */ + encvid->rateCtrl = (AVCRateControl*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCRateControl), DEFAULT_ATTR); + if (encvid->rateCtrl == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(encvid->rateCtrl, 0, sizeof(AVCRateControl)); + + /* reset frame list, not really needed */ + video->currPic = NULL; + video->currFS = NULL; + encvid->currInput = NULL; + video->prevRefPic = NULL; + + /* now read encParams, and allocate dimension-dependent variables */ + /* such as mblock */ + status = SetEncodeParam(avcHandle, encParam, extSPS, extPPS); /* initialized variables to be used in SPS*/ + if (status != AVCENC_SUCCESS) + { + return status; + } + + if (encParam->use_overrun_buffer == AVC_ON) + { + /* allocate overrun buffer */ + encvid->oBSize = encvid->rateCtrl->cpbSize; + if (encvid->oBSize > DEFAULT_OVERRUN_BUFFER_SIZE) + { + encvid->oBSize = DEFAULT_OVERRUN_BUFFER_SIZE; + } + encvid->overrunBuffer = (uint8*) avcHandle->CBAVC_Malloc(userData, encvid->oBSize, DEFAULT_ATTR); + if (encvid->overrunBuffer == NULL) + { + return AVCENC_MEMORY_FAIL; + } + } + else + { + encvid->oBSize = 0; + encvid->overrunBuffer = NULL; + } + + /* allocate frame size dependent structures */ + framesize = video->FrameHeightInMbs * video->PicWidthInMbs; + + video->mblock = (AVCMacroblock*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMacroblock) * framesize, DEFAULT_ATTR); + if (video->mblock == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + video->MbToSliceGroupMap = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits * 2, DEFAULT_ATTR); + if (video->MbToSliceGroupMap == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + encvid->mot16x16 = (AVCMV*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMV) * framesize, DEFAULT_ATTR); + if (encvid->mot16x16 == NULL) + { + return AVCENC_MEMORY_FAIL; + } + memset(encvid->mot16x16, 0, sizeof(AVCMV)*framesize); + + encvid->intraSearch = (uint8*) avcHandle->CBAVC_Malloc(userData, sizeof(uint8) * framesize, DEFAULT_ATTR); + if (encvid->intraSearch == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + encvid->min_cost = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(int) * framesize, DEFAULT_ATTR); + if (encvid->min_cost == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + /* initialize motion search related memory */ + if (AVCENC_SUCCESS != InitMotionSearchModule(avcHandle)) + { + return AVCENC_MEMORY_FAIL; + } + + if (AVCENC_SUCCESS != InitRateControlModule(avcHandle)) + { + return AVCENC_MEMORY_FAIL; + } + + /* intialize function pointers */ + encvid->functionPointer = (AVCEncFuncPtr*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncFuncPtr), DEFAULT_ATTR); + if (encvid->functionPointer == NULL) + { + return AVCENC_MEMORY_FAIL; + } + encvid->functionPointer->SAD_Macroblock = &AVCSAD_Macroblock_C; + encvid->functionPointer->SAD_MB_HalfPel[0] = NULL; + encvid->functionPointer->SAD_MB_HalfPel[1] = &AVCSAD_MB_HalfPel_Cxh; + encvid->functionPointer->SAD_MB_HalfPel[2] = &AVCSAD_MB_HalfPel_Cyh; + encvid->functionPointer->SAD_MB_HalfPel[3] = &AVCSAD_MB_HalfPel_Cxhyh; + + /* initialize timing control */ + encvid->modTimeRef = 0; /* ALWAYS ASSUME THAT TIMESTAMP START FROM 0 !!!*/ + video->prevFrameNum = 0; + encvid->prevCodedFrameNum = 0; + encvid->dispOrdPOCRef = 0; + + if (encvid->outOfBandParamSet == TRUE) + { + encvid->enc_state = AVCEnc_Encoding_SPS; + } + else + { + encvid->enc_state = AVCEnc_Analyzing_Frame; + } + + return AVCENC_SUCCESS; +} + +/* ======================================================================== */ +/* Function : PVAVCEncGetMaxOutputSize() */ +/* Date : 11/29/2008 */ +/* Purpose : Return max output buffer size that apps should allocate for */ +/* output buffer. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : size */ +/* ======================================================================== */ + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size) +{ + AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject; + + if (encvid == NULL) + { + return AVCENC_UNINITIALIZED; + } + + *size = encvid->rateCtrl->cpbSize; + + return AVCENC_SUCCESS; +} + +/* ======================================================================== */ +/* Function : PVAVCEncSetInput() */ +/* Date : 4/18/2004 */ +/* Purpose : To feed an unencoded original frame to the encoder library. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input) +{ + AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject; + AVCCommonObj *video = encvid->common; + AVCRateControl *rateCtrl = encvid->rateCtrl; + + AVCEnc_Status status; + uint frameNum; + + if (encvid == NULL) + { + return AVCENC_UNINITIALIZED; + } + + if (encvid->enc_state == AVCEnc_WaitingForBuffer) + { + goto RECALL_INITFRAME; + } + else if (encvid->enc_state != AVCEnc_Analyzing_Frame) + { + return AVCENC_FAIL; + } + + if (input->pitch > 0xFFFF) + { + return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch + } + + /***********************************/ + + /* Let's rate control decide whether to encode this frame or not */ + /* Also set video->nal_unit_type, sliceHdr->slice_type, video->slice_type */ + if (AVCENC_SUCCESS != RCDetermineFrameNum(encvid, rateCtrl, input->coding_timestamp, &frameNum)) + { + return AVCENC_SKIPPED_PICTURE; /* not time to encode, thus skipping */ + } + + /* we may not need this line */ + //nextFrmModTime = (uint32)((((frameNum+1)*1000)/rateCtrl->frame_rate) + modTimeRef); /* rec. time */ + //encvid->nextModTime = nextFrmModTime - (encvid->frameInterval>>1) - 1; /* between current and next frame */ + + encvid->currInput = input; + encvid->currInput->coding_order = frameNum; + +RECALL_INITFRAME: + /* initialize and analyze the frame */ + status = InitFrame(encvid); + + if (status == AVCENC_SUCCESS) + { + encvid->enc_state = AVCEnc_Encoding_Frame; + } + else if (status == AVCENC_NEW_IDR) + { + if (encvid->outOfBandParamSet == TRUE) + { + encvid->enc_state = AVCEnc_Encoding_Frame; + } + else // assuming that in-band paramset keeps sending new SPS and PPS. + { + encvid->enc_state = AVCEnc_Encoding_SPS; + //video->currSeqParams->seq_parameter_set_id++; + //if(video->currSeqParams->seq_parameter_set_id > 31) // range check + { + video->currSeqParams->seq_parameter_set_id = 0; // reset + } + } + + video->sliceHdr->idr_pic_id++; + if (video->sliceHdr->idr_pic_id > 65535) // range check + { + video->sliceHdr->idr_pic_id = 0; // reset + } + } + /* the following logics need to be revisited */ + else if (status == AVCENC_PICTURE_READY) // no buffers returned back to the encoder + { + encvid->enc_state = AVCEnc_WaitingForBuffer; // Input accepted but can't continue + // need to free up some memory before proceeding with Encode + } + + return status; // return status, including the AVCENC_FAIL case and all 3 above. +} + +/* ======================================================================== */ +/* Function : PVAVCEncodeNAL() */ +/* Date : 4/29/2004 */ +/* Purpose : To encode one NAL/slice. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, unsigned char *buffer, unsigned int *buf_nal_size, int *nal_type) +{ + AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject; + AVCCommonObj *video = encvid->common; + AVCEncBitstream *bitstream = encvid->bitstream; + AVCEnc_Status status; + + if (encvid == NULL) + { + return AVCENC_UNINITIALIZED; + } + + switch (encvid->enc_state) + { + case AVCEnc_Initializing: + return AVCENC_UNINITIALIZED; + case AVCEnc_Encoding_SPS: + /* initialized the structure */ + BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0); + BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_SPS); + + /* encode SPS */ + status = EncodeSPS(encvid, bitstream); + if (status != AVCENC_SUCCESS) + { + return status; + } + + /* closing the NAL with trailing bits */ + status = BitstreamTrailingBits(bitstream, buf_nal_size); + if (status == AVCENC_SUCCESS) + { + encvid->enc_state = AVCEnc_Encoding_PPS; + video->currPicParams->seq_parameter_set_id = video->currSeqParams->seq_parameter_set_id; + video->currPicParams->pic_parameter_set_id++; + *nal_type = AVC_NALTYPE_SPS; + *buf_nal_size = bitstream->write_pos; + } + break; + case AVCEnc_Encoding_PPS: + /* initialized the structure */ + BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0); + BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_PPS); + + /* encode PPS */ + status = EncodePPS(encvid, bitstream); + if (status != AVCENC_SUCCESS) + { + return status; + } + + /* closing the NAL with trailing bits */ + status = BitstreamTrailingBits(bitstream, buf_nal_size); + if (status == AVCENC_SUCCESS) + { + if (encvid->outOfBandParamSet == TRUE) // already extract PPS, SPS + { + encvid->enc_state = AVCEnc_Analyzing_Frame; + } + else // SetInput has been called before SPS and PPS. + { + encvid->enc_state = AVCEnc_Encoding_Frame; + } + + *nal_type = AVC_NALTYPE_PPS; + *buf_nal_size = bitstream->write_pos; + } + break; + + case AVCEnc_Encoding_Frame: + /* initialized the structure */ + BitstreamEncInit(bitstream, buffer, *buf_nal_size, encvid->overrunBuffer, encvid->oBSize); + BitstreamWriteBits(bitstream, 8, (video->nal_ref_idc << 5) | (video->nal_unit_type)); + + /* Re-order the reference list according to the ref_pic_list_reordering() */ + /* We don't have to reorder the list for the encoder here. This can only be done + after we encode this slice. We can run thru a second-pass to see if new ordering + would save more bits. Too much delay !! */ + /* status = ReOrderList(video);*/ + status = InitSlice(encvid); + if (status != AVCENC_SUCCESS) + { + return status; + } + + /* when we have everything, we encode the slice header */ + status = EncodeSliceHeader(encvid, bitstream); + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = AVCEncodeSlice(encvid); + + video->slice_id++; + + /* closing the NAL with trailing bits */ + BitstreamTrailingBits(bitstream, buf_nal_size); + + *buf_nal_size = bitstream->write_pos; + + encvid->rateCtrl->numFrameBits += ((*buf_nal_size) << 3); + + *nal_type = video->nal_unit_type; + + if (status == AVCENC_PICTURE_READY) + { + status = RCUpdateFrame(encvid); + if (status == AVCENC_SKIPPED_PICTURE) /* skip current frame */ + { + DPBReleaseCurrentFrame(avcHandle, video); + encvid->enc_state = AVCEnc_Analyzing_Frame; + + return status; + } + + /* perform loop-filtering on the entire frame */ + DeblockPicture(video); + + /* update the original frame array */ + encvid->prevCodedFrameNum = encvid->currInput->coding_order; + + /* store the encoded picture in the DPB buffer */ + StorePictureInDPB(avcHandle, video); + + if (video->currPic->isReference) + { + video->PrevRefFrameNum = video->sliceHdr->frame_num; + } + + /* update POC related variables */ + PostPOC(video); + + encvid->enc_state = AVCEnc_Analyzing_Frame; + status = AVCENC_PICTURE_READY; + + } + break; + default: + status = AVCENC_WRONG_STATE; + } + + return status; +} + +/* ======================================================================== */ +/* Function : PVAVCEncGetOverrunBuffer() */ +/* Purpose : To retrieve the overrun buffer. Check whether overrun buffer */ +/* is used or not before returning */ +/* In/out : */ +/* Return : Pointer to the internal overrun buffer. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject; + AVCEncBitstream *bitstream = encvid->bitstream; + + if (bitstream->overrunBuffer == bitstream->bitstreamBuffer) /* OB is used */ + { + return encvid->overrunBuffer; + } + else + { + return NULL; + } +} + + +/* ======================================================================== */ +/* Function : PVAVCEncGetRecon() */ +/* Date : 4/29/2004 */ +/* Purpose : To retrieve the most recently encoded frame. */ +/* assume that user will make a copy if they want to hold on */ +/* to it. Otherwise, it is not guaranteed to be reserved. */ +/* Most applications prefer to see original frame rather than */ +/* reconstructed frame. So, we are staying aware from complex */ +/* buffering mechanism. If needed, can be added later. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon) +{ + AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject; + AVCCommonObj *video = encvid->common; + AVCFrameStore *currFS = video->currFS; + + if (encvid == NULL) + { + return AVCENC_UNINITIALIZED; + } + + recon->YCbCr[0] = currFS->frame.Sl; + recon->YCbCr[1] = currFS->frame.Scb; + recon->YCbCr[2] = currFS->frame.Scr; + recon->height = currFS->frame.height; + recon->pitch = currFS->frame.pitch; + recon->disp_order = currFS->PicOrderCnt; + recon->coding_order = currFS->FrameNum; + recon->id = (uint32) currFS->base_dpb; /* use the pointer as the id */ + + currFS->IsOutputted |= 1; + + return AVCENC_SUCCESS; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon) +{ + OSCL_UNUSED_ARG(avcHandle); + OSCL_UNUSED_ARG(recon); + + return AVCENC_SUCCESS; //for now +} + +/* ======================================================================== */ +/* Function : PVAVCCleanUpEncoder() */ +/* Date : 4/18/2004 */ +/* Purpose : To clean up memories allocated by PVAVCEncInitialize() */ +/* In/out : */ +/* Return : AVCENC_SUCCESS for success. */ +/* Modified : */ +/* ======================================================================== */ +OSCL_EXPORT_REF void PVAVCCleanUpEncoder(AVCHandle *avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCCommonObj *video; + uint32 *userData = (uint32*) avcHandle->userData; + + if (encvid != NULL) + { + CleanMotionSearchModule(avcHandle); + + CleanupRateControlModule(avcHandle); + + if (encvid->functionPointer != NULL) + { + avcHandle->CBAVC_Free(userData, (int)encvid->functionPointer); + } + + if (encvid->min_cost) + { + avcHandle->CBAVC_Free(userData, (int)encvid->min_cost); + } + + if (encvid->intraSearch) + { + avcHandle->CBAVC_Free(userData, (int)encvid->intraSearch); + } + + if (encvid->mot16x16) + { + avcHandle->CBAVC_Free(userData, (int)encvid->mot16x16); + } + + if (encvid->rateCtrl) + { + avcHandle->CBAVC_Free(userData, (int)encvid->rateCtrl); + } + + if (encvid->overrunBuffer) + { + avcHandle->CBAVC_Free(userData, (int)encvid->overrunBuffer); + } + + video = encvid->common; + if (video != NULL) + { + if (video->MbToSliceGroupMap) + { + avcHandle->CBAVC_Free(userData, (int)video->MbToSliceGroupMap); + } + if (video->mblock != NULL) + { + avcHandle->CBAVC_Free(userData, (int)video->mblock); + } + if (video->decPicBuf != NULL) + { + CleanUpDPB(avcHandle, video); + avcHandle->CBAVC_Free(userData, (int)video->decPicBuf); + } + if (video->sliceHdr != NULL) + { + avcHandle->CBAVC_Free(userData, (int)video->sliceHdr); + } + if (video->currPicParams != NULL) + { + if (video->currPicParams->slice_group_id) + { + avcHandle->CBAVC_Free(userData, (int)video->currPicParams->slice_group_id); + } + + avcHandle->CBAVC_Free(userData, (int)video->currPicParams); + } + if (video->currSeqParams != NULL) + { + avcHandle->CBAVC_Free(userData, (int)video->currSeqParams); + } + if (encvid->bitstream != NULL) + { + avcHandle->CBAVC_Free(userData, (int)encvid->bitstream); + } + if (video != NULL) + { + avcHandle->CBAVC_Free(userData, (int)video); + } + } + + avcHandle->CBAVC_Free(userData, (int)encvid); + + avcHandle->AVCObject = NULL; + } + + return ; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate) +{ + OSCL_UNUSED_ARG(avcHandle); + OSCL_UNUSED_ARG(bitrate); + + return AVCENC_FAIL; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom) +{ + OSCL_UNUSED_ARG(avcHandle); + OSCL_UNUSED_ARG(num); + OSCL_UNUSED_ARG(denom); + + return AVCENC_FAIL; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval) +{ + OSCL_UNUSED_ARG(avcHandle); + OSCL_UNUSED_ARG(IDRInterval); + + return AVCENC_FAIL; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle) +{ + OSCL_UNUSED_ARG(avcHandle); + + return AVCENC_FAIL; +} + +OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB) +{ + OSCL_UNUSED_ARG(avcHandle); + OSCL_UNUSED_ARG(numMB); + + return AVCENC_FAIL; +} + +void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCRateControl *rateCtrl = encvid->rateCtrl; + + avcStats->avgFrameQP = GetAvgFrameQP(rateCtrl); + avcStats->numIntraMBs = encvid->numIntraMB; + + return ; +} + + + diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.h b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h new file mode 100644 index 0000000..628dec6 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h @@ -0,0 +1,320 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +/** +This file contains application function interfaces to the AVC encoder library +and necessary type defitionitions and enumerations. +@publishedAll +*/ + +#ifndef AVCENC_API_H_INCLUDED +#define AVCENC_API_H_INCLUDED + +#ifndef AVCAPI_COMMON_H_INCLUDED +#include "avcapi_common.h" +#endif + +/** + This enumeration is used for the status returned from the library interface. +*/ +typedef enum +{ + /** + Fail information, need to add more error code for more specific info + */ + AVCENC_TRAILINGONES_FAIL = -35, + AVCENC_SLICE_EMPTY = -34, + AVCENC_POC_FAIL = -33, + AVCENC_CONSECUTIVE_NONREF = -32, + AVCENC_CABAC_FAIL = -31, + AVCENC_PRED_WEIGHT_TAB_FAIL = -30, + AVCENC_DEC_REF_PIC_MARK_FAIL = -29, + AVCENC_SPS_FAIL = -28, + AVCENC_BITSTREAM_BUFFER_FULL = -27, + AVCENC_BITSTREAM_INIT_FAIL = -26, + AVCENC_CHROMA_QP_FAIL = -25, + AVCENC_INIT_QS_FAIL = -24, + AVCENC_INIT_QP_FAIL = -23, + AVCENC_WEIGHTED_BIPRED_FAIL = -22, + AVCENC_INVALID_INTRA_PERIOD = -21, + AVCENC_INVALID_CHANGE_RATE = -20, + AVCENC_INVALID_BETA_OFFSET = -19, + AVCENC_INVALID_ALPHA_OFFSET = -18, + AVCENC_INVALID_DEBLOCK_IDC = -17, + AVCENC_INVALID_REDUNDANT_PIC = -16, + AVCENC_INVALID_FRAMERATE = -15, + AVCENC_INVALID_NUM_SLICEGROUP = -14, + AVCENC_INVALID_POC_LSB = -13, + AVCENC_INVALID_NUM_REF = -12, + AVCENC_INVALID_FMO_TYPE = -11, + AVCENC_ENCPARAM_MEM_FAIL = -10, + AVCENC_LEVEL_NOT_SUPPORTED = -9, + AVCENC_LEVEL_FAIL = -8, + AVCENC_PROFILE_NOT_SUPPORTED = -7, + AVCENC_TOOLS_NOT_SUPPORTED = -6, + AVCENC_WRONG_STATE = -5, + AVCENC_UNINITIALIZED = -4, + AVCENC_ALREADY_INITIALIZED = -3, + AVCENC_NOT_SUPPORTED = -2, + AVCENC_MEMORY_FAIL = AVC_MEMORY_FAIL, + AVCENC_FAIL = AVC_FAIL, + /** + Generic success value + */ + AVCENC_SUCCESS = AVC_SUCCESS, + AVCENC_PICTURE_READY = 2, + AVCENC_NEW_IDR = 3, /* upon getting this, users have to call PVAVCEncodeSPS and PVAVCEncodePPS to get a new SPS and PPS*/ + AVCENC_SKIPPED_PICTURE = 4 /* continuable error message */ + +} AVCEnc_Status; + +#define MAX_NUM_SLICE_GROUP 8 /* maximum for all the profiles */ + +/** +This structure contains the encoding parameters. +*/ +typedef struct tagAVCEncParam +{ + /* if profile/level is set to zero, encoder will choose the closest one for you */ + AVCProfile profile; /* profile of the bitstream to be compliant with*/ + AVCLevel level; /* level of the bitstream to be compliant with*/ + + int width; /* width of an input frame in pixel */ + int height; /* height of an input frame in pixel */ + + int poc_type; /* picture order count mode, 0,1 or 2 */ + /* for poc_type == 0 */ + uint log2_max_poc_lsb_minus_4; /* specify maximum value of POC Lsb, range 0..12*/ + /* for poc_type == 1 */ + uint delta_poc_zero_flag; /* delta POC always zero */ + int offset_poc_non_ref; /* offset for non-reference pic */ + int offset_top_bottom; /* offset between top and bottom field */ + uint num_ref_in_cycle; /* number of reference frame in one cycle */ + int *offset_poc_ref; /* array of offset for ref pic, dimension [num_ref_in_cycle] */ + + int num_ref_frame; /* number of reference frame used */ + int num_slice_group; /* number of slice group */ + int fmo_type; /* 0: interleave, 1: dispersed, 2: foreground with left-over + 3: box-out, 4:raster scan, 5:wipe, 6:explicit */ + /* for fmo_type == 0 */ + uint run_length_minus1[MAX_NUM_SLICE_GROUP]; /* array of size num_slice_group, in round robin fasion */ + /* fmo_type == 2*/ + uint top_left[MAX_NUM_SLICE_GROUP-1]; /* array of co-ordinates of each slice_group */ + uint bottom_right[MAX_NUM_SLICE_GROUP-1]; /* except the last one which is the background. */ + /* fmo_type == 3,4,5 */ + AVCFlag change_dir_flag; /* slice group change direction flag */ + uint change_rate_minus1; + /* fmo_type == 6 */ + uint *slice_group; /* array of size MBWidth*MBHeight */ + + AVCFlag db_filter; /* enable deblocking loop filter */ + int disable_db_idc; /* 0: filter everywhere, 1: no filter, 2: no filter across slice boundary */ + int alpha_offset; /* alpha offset range -6,...,6 */ + int beta_offset; /* beta offset range -6,...,6 */ + + AVCFlag constrained_intra_pred; /* constrained intra prediction flag */ + + AVCFlag auto_scd; /* scene change detection on or off */ + int idr_period; /* idr frame refresh rate in number of target encoded frame (no concept of actual time).*/ + int intramb_refresh; /* minimum number of intra MB per frame */ + AVCFlag data_par; /* enable data partitioning */ + + AVCFlag fullsearch; /* enable full-pel full-search mode */ + int search_range; /* search range for motion vector in (-search_range,+search_range) pixels */ + AVCFlag sub_pel; /* enable sub pel prediction */ + AVCFlag submb_pred; /* enable sub MB partition mode */ + AVCFlag rdopt_mode; /* RD optimal mode selection */ + AVCFlag bidir_pred; /* enable bi-directional for B-slice, this flag forces the encoder to encode + any frame with POC less than the previously encoded frame as a B-frame. + If it's off, then such frames will remain P-frame. */ + + AVCFlag rate_control; /* rate control enable, on: RC on, off: constant QP */ + int initQP; /* initial QP */ + uint32 bitrate; /* target encoding bit rate in bits/second */ + uint32 CPB_size; /* coded picture buffer in number of bits */ + uint32 init_CBP_removal_delay; /* initial CBP removal delay in msec */ + + uint32 frame_rate; /* frame rate in the unit of frames per 1000 second */ + /* note, frame rate is only needed by the rate control, AVC is timestamp agnostic. */ + + AVCFlag out_of_band_param_set; /* flag to set whether param sets are to be retrieved up front or not */ + + AVCFlag use_overrun_buffer; /* do not throw away the frame if output buffer is not big enough. + copy excess bits to the overrun buffer */ +} AVCEncParams; + + +/** +This structure contains current frame encoding statistics for debugging purpose. +*/ +typedef struct tagAVCEncFrameStats +{ + int avgFrameQP; /* average frame QP */ + int numIntraMBs; /* number of intra MBs */ + int numFalseAlarm; + int numMisDetected; + int numDetected; + +} AVCEncFrameStats; + +#ifdef __cplusplus +extern "C" +{ +#endif + /** THE FOLLOWINGS ARE APIS */ + /** + This function initializes the encoder library. It verifies the validity of the + encoding parameters against the specified profile/level and the list of supported + tools by this library. It allocates necessary memories required to perform encoding. + For re-encoding application, if users want to setup encoder in a more precise way, + users can give the external SPS and PPS to the encoder to follow. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "encParam" "Pointer to the encoding parameter structure." + \param "extSPS" "External SPS used for re-encoding purpose. NULL if not present" + \param "extPPS" "External PPS used for re-encoding purpose. NULL if not present" + \return "AVCENC_SUCCESS for success, + AVCENC_NOT_SUPPORTED for the use of unsupported tools, + AVCENC_MEMORY_FAIL for memory allocation failure, + AVCENC_FAIL for generic failure." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam, void* extSPS, void* extPPS); + + + /** + Since the output buffer size is not known prior to encoding a frame, users need to + allocate big enough buffer otherwise, that frame will be dropped. This function returns + the size of the output buffer to be allocated by the users that guarantees to hold one frame. + It follows the CPB spec for a particular level. However, when the users set use_overrun_buffer + flag, this API is useless as excess output bits are saved in the overrun buffer waiting to be + copied out in small chunks, i.e. users can allocate any size of output buffer. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "size" "Pointer to the size to be modified." + \return "AVCENC_SUCCESS for success, AVCENC_UNINITIALIZED when level is not known. + */ + + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size); + + /** + Users call this function to provide an input structure to the encoder library which will keep + a list of input structures it receives in case the users call this function many time before + calling PVAVCEncodeSlice. The encoder library will encode them according to the frame_num order. + Users should not modify the content of a particular frame until this frame is encoded and + returned thru CBAVCEnc_ReturnInput() callback function. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "input" "Pointer to the input structure." + \return "AVCENC_SUCCESS for success, + AVCENC_FAIL if the encoder is not in the right state to take a new input frame. + AVCENC_NEW_IDR for the detection or determination of a new IDR, with this status, + the returned NAL is an SPS NAL, + AVCENC_NO_PICTURE if the input frame coding timestamp is too early, users must + get next frame or adjust the coding timestamp." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input); + + /** + This function is called to encode a NAL unit which can be an SPS NAL, a PPS NAL or + a VCL (video coding layer) NAL which contains one slice of data. It could be a + fixed number of macroblocks, as specified in the encoder parameters set, or the + maximum number of macroblocks fitted into the given input argument "buffer". The + input frame is taken from the oldest unencoded input frame retrieved by users by + PVAVCEncGetInput API. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "buffer" "Pointer to the output AVC bitstream buffer, the format will be EBSP, + not RBSP." + \param "buf_nal_size" "As input, the size of the buffer in bytes. + This is the physical limitation of the buffer. As output, the size of the EBSP." + \param "nal_type" "Pointer to the NAL type of the returned buffer." + \return "AVCENC_SUCCESS for success of encoding one slice, + AVCENC_PICTURE_READY for the completion of a frame encoding, + AVCENC_FAIL for failure (this should not occur, though)." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, uint8 *buffer, uint *buf_nal_size, int *nal_type); + + /** + This function sniffs the nal_unit_type such that users can call corresponding APIs. + This function is identical to PVAVCDecGetNALType() in the decoder. + \param "bitstream" "Pointer to the beginning of a NAL unit (start with forbidden_zero_bit, etc.)." + \param "size" "size of the bitstream (NumBytesInNALunit + 1)." + \param "nal_unit_type" "Pointer to the return value of nal unit type." + \return "AVCENC_SUCCESS if success, AVCENC_FAIL otherwise." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetNALType(uint8 *bitstream, int size, int *nal_type, int *nal_ref_idc); + + /** + This function returns the pointer to internal overrun buffer. Users can call this to query + whether the overrun buffer has been used to encode the current NAL. + \param "avcHandle" "Pointer to the handle." + \return "Pointer to overrun buffer if it is used, otherwise, NULL." + */ + OSCL_IMPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle); + + /** + This function returns the reconstructed frame of the most recently encoded frame. + Note that this frame is not returned to the users yet. Users should only read the + content of this frame. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "output" "Pointer to the input structure." + \return "AVCENC_SUCCESS for success, AVCENC_NO_PICTURE if no picture to be outputted." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon); + + /** + This function is used to return the recontructed frame back to the AVC encoder library + in order to be re-used for encoding operation. If users want the content of it to remain + unchanged for a long time, they should make a copy of it and release the memory back to + the encoder. The encoder relies on the id element in the AVCFrameIO structure, + thus users should not change the id value. + \param "avcHandle" "Handle to the AVC decoder library object." + \param "output" "Pointer to the AVCFrameIO structure." + \return "AVCENC_SUCCESS for success, AVCENC_FAIL for fail for id not found." + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon); + + /** + This function performs clean up operation including memory deallocation. + The encoder will also clear the list of input structures it has not released. + This implies that users must keep track of the number of input structure they have allocated + and free them accordingly. + \param "avcHandle" "Handle to the AVC encoder library object." + */ + OSCL_IMPORT_REF void PVAVCCleanUpEncoder(AVCHandle *avcHandle); + + /** + This function extracts statistics of the current frame. If the encoder has not finished + with the current frame, the result is not accurate. + \param "avcHandle" "Handle to the AVC encoder library object." + \param "avcStats" "Pointer to AVCEncFrameStats structure." + \return "void." + */ + void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats); + + /** + These functions are used for the modification of encoding parameters. + To be polished. + */ + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate); + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom); + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval); + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle); + OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB); + + +#ifdef __cplusplus +} +#endif +#endif /* _AVCENC_API_H_ */ + diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_int.h b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h new file mode 100644 index 0000000..3fe08a1 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h @@ -0,0 +1,471 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +/** +This file contains application function interfaces to the AVC encoder library +and necessary type defitionitions and enumerations. +@publishedAll +*/ + +#ifndef AVCENC_INT_H_INCLUDED +#define AVCENC_INT_H_INCLUDED + +#ifndef AVCINT_COMMON_H_INCLUDED +#include "avcint_common.h" +#endif +#ifndef AVCENC_API_H_INCLUDED +#include "avcenc_api.h" +#endif + +typedef float OsclFloat; + +/* Definition for the structures below */ +#define DEFAULT_ATTR 0 /* default memory attribute */ +#define MAX_INPUT_FRAME 30 /* some arbitrary number, it can be much higher than this. */ +#define MAX_REF_FRAME 16 /* max size of the RefPicList0 and RefPicList1 */ +#define MAX_REF_PIC_LIST 33 + +#define MIN_QP 0 +#define MAX_QP 51 +#define SHIFT_QP 12 +#define LAMBDA_ACCURACY_BITS 16 +#define LAMBDA_FACTOR(lambda) ((int)((double)(1<>LAMBDA_ACCURACY_BITS) +#define MV_COST(f,s,cx,cy,px,py) (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py])) +#define MV_COST_S(f,cx,cy,px,py) (WEIGHTED_COST(f,mvbits[cx-px]+mvbits[cy-py])) + +/* for sub-pel search and interpolation */ +#define SUBPEL_PRED_BLK_SIZE 576 // 24x24 +#define REF_CENTER 75 +#define V2Q_H0Q 1 +#define V0Q_H2Q 2 +#define V2Q_H2Q 3 + +/* +#define V3Q_H0Q 1 +#define V3Q_H1Q 2 +#define V0Q_H1Q 3 +#define V1Q_H1Q 4 +#define V1Q_H0Q 5 +#define V1Q_H3Q 6 +#define V0Q_H3Q 7 +#define V3Q_H3Q 8 +#define V2Q_H3Q 9 +#define V2Q_H0Q 10 +#define V2Q_H1Q 11 +#define V2Q_H2Q 12 +#define V3Q_H2Q 13 +#define V0Q_H2Q 14 +#define V1Q_H2Q 15 +*/ + + +#define DEFAULT_OVERRUN_BUFFER_SIZE 1000 + +// associated with the above cost model +const uint8 COEFF_COST[2][16] = +{ + {3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9} +}; + + + +//! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6) +const int QP2QUANT[40] = +{ + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 4, 4, 4, 5, 6, + 6, 7, 8, 9, 10, 11, 13, 14, + 16, 18, 20, 23, 25, 29, 32, 36, + 40, 45, 51, 57, 64, 72, 81, 91 +}; + + +/** +This enumeration keeps track of the internal status of the encoder whether it is doing +something. The encoding flow follows the order in which these states are. +@publishedAll +*/ +typedef enum +{ + AVCEnc_Initializing = 0, + AVCEnc_Encoding_SPS, + AVCEnc_Encoding_PPS, + AVCEnc_Analyzing_Frame, + AVCEnc_WaitingForBuffer, // pending state + AVCEnc_Encoding_Frame, +} AVCEnc_State ; + +/** +Bitstream structure contains bitstream related parameters such as the pointer +to the buffer, the current byte position and bit position. The content of the +bitstreamBuffer will be in EBSP format as the emulation prevention codes are +automatically inserted as the RBSP is recorded. +@publishedAll +*/ +typedef struct tagEncBitstream +{ + uint8 *bitstreamBuffer; /* pointer to buffer memory */ + int buf_size; /* size of the buffer memory */ + int write_pos; /* next position to write to bitstreamBuffer */ + int count_zeros; /* count number of consecutive zero */ + uint current_word; /* byte-swapped (MSB left) current word to write to buffer */ + int bit_left; /* number of bit left in current_word */ + uint8 *overrunBuffer; /* extra output buffer to prevent current skip due to output buffer overrun*/ + int oBSize; /* size of allocated overrun buffer */ + void *encvid; /* pointer to the main object */ + +} AVCEncBitstream; + +/** +This structure is used for rate control purpose and other performance related control +variables such as, RD cost, statistics, motion search stuffs, etc. +should be in this structure. +@publishedAll +*/ + + +typedef struct tagRDInfo +{ + int QP; + int actual_bits; + OsclFloat mad; + OsclFloat R_D; +} RDInfo; + +typedef struct tagMultiPass +{ + /* multipass rate control data */ + int target_bits; /* target bits for current frame, = rc->T */ + int actual_bits; /* actual bits for current frame obtained after encoding, = rc->Rc*/ + int QP; /* quantization level for current frame, = rc->Qc*/ + int prev_QP; /* quantization level for previous frame */ + int prev_prev_QP; /* quantization level for previous frame before last*/ + OsclFloat mad; /* mad for current frame, = video->avgMAD*/ + int bitrate; /* bitrate for current frame */ + OsclFloat framerate; /* framerate for current frame*/ + + int nRe_Quantized; /* control variable for multipass encoding, */ + /* 0 : first pass */ + /* 1 : intermediate pass(quantization and VLC loop only) */ + /* 2 : final pass(de-quantization, idct, etc) */ + /* 3 : macroblock level rate control */ + + int encoded_frames; /* counter for all encoded frames */ + int re_encoded_frames; /* counter for all multipass encoded frames*/ + int re_encoded_times; /* counter for all times of multipass frame encoding */ + + /* Multiple frame prediction*/ + RDInfo **pRDSamples; /* pRDSamples[30][32], 30->30fps, 32 -> 5 bit quantizer, 32 candidates*/ + int framePos; /* specific position in previous multiple frames*/ + int frameRange; /* number of overall previous multiple frames */ + int samplesPerFrame[30]; /* number of samples per frame, 30->30fps */ + + /* Bit allocation for scene change frames and high motion frames */ + OsclFloat sum_mad; + int counter_BTsrc; /* BT = Bit Transfer, bit transfer from low motion frames or less complicatedly compressed frames */ + int counter_BTdst; /* BT = Bit Transfer, bit transfer to scene change frames or high motion frames or more complicatedly compressed frames */ + OsclFloat sum_QP; + int diff_counter; /* diff_counter = -diff_counter_BTdst, or diff_counter_BTsrc */ + + /* For target bitrate or framerate update */ + OsclFloat target_bits_per_frame; /* = C = bitrate/framerate */ + OsclFloat target_bits_per_frame_prev; /* previous C */ + OsclFloat aver_mad; /* so-far average mad could replace sum_mad */ + OsclFloat aver_mad_prev; /* previous average mad */ + int overlapped_win_size; /* transition period of time */ + int encoded_frames_prev; /* previous encoded_frames */ +} MultiPass; + + +typedef struct tagdataPointArray +{ + int Qp; + int Rp; + OsclFloat Mp; /* for MB-based RC */ + struct tagdataPointArray *next; + struct tagdataPointArray *prev; +} dataPointArray; + +typedef struct tagAVCRateControl +{ + + /* these parameters are initialized by the users AVCEncParams */ + /* bitrate-robustness tradeoff */ + uint scdEnable; /* enable scene change detection */ + int idrPeriod; /* IDR period in number of frames */ + int intraMBRate; /* intra MB refresh rate per frame */ + uint dpEnable; /* enable data partitioning */ + + /* quality-complexity tradeoff */ + uint subPelEnable; /* enable quarter pel search */ + int mvRange; /* motion vector search range in +/- pixel */ + uint subMBEnable; /* enable sub MB prediction mode (4x4, 4x8, 8x4) */ + uint rdOptEnable; /* enable RD-opt mode selection */ + uint twoPass; /* flag for 2 pass encoding ( for future )*/ + uint bidirPred; /* bi-directional prediction for B-frame. */ + + uint rcEnable; /* enable rate control, '1' on, '0' const QP */ + int initQP; /* initial QP */ + + /* note the following 3 params are for HRD, these triplets can be a series + of triplets as the generalized HRD allows. SEI message must be generated in this case. */ + /* We no longer have to differentiate between CBR and VBR. The users to the + AVC encoder lib will do the mapping from CBR/VBR to these parameters. */ + int32 bitRate; /* target bit rate for the overall clip in bits/second*/ + int32 cpbSize; /* coded picture buffer size in bytes */ + int32 initDelayOffset; /* initial CBP removal delay in bits */ + + OsclFloat frame_rate; /* frame rate */ + int srcInterval; /* source frame rate in msec */ + int basicUnit; /* number of macroblocks per BU */ + + /* Then internal parameters for the operation */ + uint first_frame; /* a flag for the first frame */ + int lambda_mf; /* for example */ + int totalSAD; /* SAD of current frame */ + + /*******************************************/ + /* this part comes from MPEG4 rate control */ + int alpha; /* weight for I frame */ + int Rs; /*bit rate for the sequence (or segment) e.g., 24000 bits/sec */ + int Rc; /*bits used for the current frame. It is the bit count obtained after encoding. */ + int Rp; /*bits to be removed from the buffer per picture. */ + /*? is this the average one, or just the bits coded for the previous frame */ + int Rps; /*bit to be removed from buffer per src frame */ + OsclFloat Ts; /*number of seconds for the sequence (or segment). e.g., 10 sec */ + OsclFloat Ep; + OsclFloat Ec; /*mean absolute difference for the current frame after motion compensation.*/ + /*If the macroblock is intra coded, the original spatial pixel values are summed.*/ + int Qc; /*quantization level used for the current frame. */ + int Nr; /*number of P frames remaining for encoding.*/ + int Rr; /*number of bits remaining for encoding this sequence (or segment).*/ + int Rr_Old; + int T; /*target bit to be used for the current frame.*/ + int S; /*number of bits used for encoding the previous frame.*/ + int Hc; /*header and motion vector bits used in the current frame. It includes all the information except to the residual information.*/ + int Hp; /*header and motion vector bits used in the previous frame. It includes all the information except to the residual information.*/ + int Ql; /*quantization level used in the previous frame */ + int Bs; /*buffer size e.g., R/2 */ + int B; /*current buffer level e.g., R/4 - start from the middle of the buffer */ + OsclFloat X1; + OsclFloat X2; + OsclFloat X11; + OsclFloat M; /*safe margin for the buffer */ + OsclFloat smTick; /*ratio of src versus enc frame rate */ + double remnant; /*remainder frame of src/enc frame for fine frame skipping */ + int timeIncRes; /* vol->timeIncrementResolution */ + + dataPointArray *end; /*quantization levels for the past (20) frames */ + + int frameNumber; /* ranging from 0 to 20 nodes*/ + int w; + int Nr_Original; + int Nr_Old, Nr_Old2; + int skip_next_frame; + int Qdep; /* smooth Q adjustment */ + int VBR_Enabled; + + int totalFrameNumber; /* total coded frames, for debugging!!*/ + + char oFirstTime; + + int numFrameBits; /* keep track of number of bits of the current frame */ + int NumberofHeaderBits; + int NumberofTextureBits; + int numMBHeaderBits; + int numMBTextureBits; + double *MADofMB; + int32 bitsPerFrame; + + /* BX rate control, something like TMN8 rate control*/ + + MultiPass *pMP; + + int TMN_W; + int TMN_TH; + int VBV_fullness; + int max_BitVariance_num; /* the number of the maximum bit variance within the given buffer with the unit of 10% of bitrate/framerate*/ + int encoded_frames; /* counter for all encoded frames */ + int low_bound; /* bound for underflow detection, usually low_bound=-Bs/2, but could be changed in H.263 mode */ + int VBV_fullness_offset; /* offset of VBV_fullness, usually is zero, but can be changed in H.263 mode*/ + /* End BX */ + +} AVCRateControl; + + +/** +This structure is for the motion vector information. */ +typedef struct tagMV +{ + int x; + int y; + uint sad; +} AVCMV; + +/** +This structure contains function pointers for different platform dependent implementation of +functions. */ +typedef struct tagAVCEncFuncPtr +{ + + int (*SAD_MB_HalfPel[4])(uint8*, uint8*, int, void *); + int (*SAD_Macroblock)(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + +} AVCEncFuncPtr; + +/** +This structure contains information necessary for correct padding. +*/ +typedef struct tagPadInfo +{ + int i; + int width; + int j; + int height; +} AVCPadInfo; + + +#ifdef HTFM +typedef struct tagHTFM_Stat +{ + int abs_dif_mad_avg; + uint countbreak; + int offsetArray[16]; + int offsetRef[16]; +} HTFM_Stat; +#endif + + +/** +This structure is the main object for AVC encoder library providing access to all +global variables. It is allocated at PVAVCInitEncoder and freed at PVAVCCleanUpEncoder. +@publishedAll +*/ +typedef struct tagEncObject +{ + + AVCCommonObj *common; + + AVCEncBitstream *bitstream; /* for current NAL */ + uint8 *overrunBuffer; /* extra output buffer to prevent current skip due to output buffer overrun*/ + int oBSize; /* size of allocated overrun buffer */ + + /* rate control */ + AVCRateControl *rateCtrl; /* pointer to the rate control structure */ + + /* encoding operation */ + AVCEnc_State enc_state; /* encoding state */ + + AVCFrameIO *currInput; /* pointer to the current input frame */ + + int currSliceGroup; /* currently encoded slice group id */ + + int level[24][16], run[24][16]; /* scratch memory */ + int leveldc[16], rundc[16]; /* for DC component */ + int levelcdc[16], runcdc[16]; /* for chroma DC component */ + int numcoefcdc[2]; /* number of coefficient for chroma DC */ + int numcoefdc; /* number of coefficients for DC component */ + + int qp_const; + int qp_const_c; + /********* intra prediction scratch memory **********************/ + uint8 pred_i16[AVCNumI16PredMode][256]; /* save prediction for MB */ + uint8 pred_i4[AVCNumI4PredMode][16]; /* save prediction for blk */ + uint8 pred_ic[AVCNumIChromaMode][128]; /* for 2 chroma */ + + int mostProbableI4Mode[16]; /* in raster scan order */ + /********* motion compensation related variables ****************/ + AVCMV *mot16x16; /* Saved motion vectors for 16x16 block*/ + AVCMV(*mot16x8)[2]; /* Saved motion vectors for 16x8 block*/ + AVCMV(*mot8x16)[2]; /* Saved motion vectors for 8x16 block*/ + AVCMV(*mot8x8)[4]; /* Saved motion vectors for 8x8 block*/ + + /********* subpel position **************************************/ + uint32 subpel_pred[SUBPEL_PRED_BLK_SIZE/*<<2*/]; /* all 16 sub-pel positions */ + uint8 *hpel_cand[9]; /* pointer to half-pel position */ + int best_hpel_pos; /* best position */ + uint8 qpel_cand[8][24*16]; /* pointer to quarter-pel position */ + int best_qpel_pos; + uint8 *bilin_base[9][4]; /* pointer to 4 position at top left of bilinear quarter-pel */ + + /* need for intra refresh rate */ + uint8 *intraSearch; /* Intra Array for MBs to be intra searched */ + uint firstIntraRefreshMBIndx; /* keep track for intra refresh */ + + int i4_sad; /* temporary for i4 mode SAD */ + int *min_cost; /* Minimum cost for the all MBs */ + int lambda_mode; /* Lagrange parameter for mode selection */ + int lambda_motion; /* Lagrange parameter for MV selection */ + + uint8 *mvbits_array; /* Table for bits spent in the cost funciton */ + uint8 *mvbits; /* An offset to the above array. */ + + /* to speedup the SAD calculation */ + void *sad_extra_info; + uint8 currYMB[256]; /* interleaved current macroblock in HTFM order */ + +#ifdef HTFM + int nrmlz_th[48]; /* Threshold for fast SAD calculation using HTFM */ + HTFM_Stat htfm_stat; /* For statistics collection */ +#endif + + /* statistics */ + int numIntraMB; /* keep track of number of intra MB */ + + /* encoding complexity control */ + uint fullsearch_enable; /* flag to enable full-pel full-search */ + + /* misc.*/ + bool outOfBandParamSet; /* flag to enable out-of-band param set */ + + AVCSeqParamSet extSPS; /* for external SPS */ + AVCPicParamSet extPPS; /* for external PPS */ + + /* time control */ + uint32 prevFrameNum; /* previous frame number starting from modTimeRef */ + uint32 modTimeRef; /* Reference modTime update every I-Vop*/ + uint32 wrapModTime; /* Offset to modTime Ref, rarely used */ + + uint prevProcFrameNum; /* previously processed frame number, could be skipped */ + uint prevCodedFrameNum; /* previously encoded frame number */ + /* POC related variables */ + uint32 dispOrdPOCRef; /* reference POC is displayer order unit. */ + + /* Function pointers */ + AVCEncFuncPtr *functionPointer; /* store pointers to platform specific functions */ + + /* Application control data */ + AVCHandle *avcHandle; + + +} AVCEncObject; + + +#endif /*AVCENC_INT_H_INCLUDED*/ + diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h new file mode 100644 index 0000000..17e28ef --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h @@ -0,0 +1,1020 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +/** +This file contains declarations of internal functions for AVC decoder library. +@publishedAll +*/ +#ifndef AVCENC_LIB_H_INCLUDED +#define AVCENC_LIB_H_INCLUDED + +#ifndef AVCLIB_COMMON_H_INCLUDED +#include "avclib_common.h" +#endif +#ifndef AVCENC_INT_H_INCLUDED +#include "avcenc_int.h" +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + /*------------- block.c -------------------------*/ + + /** + This function perform residue calculation, transform, quantize, inverse quantize, + inverse transform and residue compensation on a 4x4 block. + \param "encvid" "Pointer to AVCEncObject." + \param "blkidx" "raster scan block index of the current 4x4 block." + \param "cur" "Pointer to the reconstructed block." + \param "org" "Pointer to the original block." + \param "coef_cost" "Pointer to the coefficient cost to be filled in and returned." + \return "Number of non-zero coefficients." + */ + int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost); + + /** + This function performs IDCT on an INTER macroblock. + \param "video" "Pointer to AVCCommonObj." + \param "curL" "Pointer to the origin of the macroblock on the current frame." + \param "currMB" "Pointer to the AVCMacroblock structure." + \param "picPitch" "Pitch of the current frame." + \return "void". + */ + void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch); + + /** + This function perform residue calculation, transform, quantize, inverse quantize, + inverse transform and residue compensation on a macroblock. + \param "encvid" "Pointer to AVCEncObject." + \param "curL" "Pointer to the reconstructed MB." + \param "orgL" "Pointer to the original MB." + \return "void" + */ + void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL); + + /** + This function perform residue calculation, transform, quantize, inverse quantize, + inverse transform and residue compensation for chroma components of an MB. + \param "encvid" "Pointer to AVCEncObject." + \param "curC" "Pointer to the reconstructed MB." + \param "orgC" "Pointer to the original MB." + \param "cr" "Flag whether it is Cr or not." + \return "void" + */ + void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr); + + /*----------- init.c ------------------*/ + /** + This function interprets the encoding parameters provided by users in encParam. + The results are kept in AVCEncObject, AVCSeqParamSet, AVCPicParamSet and AVCSliceHeader. + \param "encvid" "Pointer to AVCEncObject." + \param "encParam" "Pointer to AVCEncParam." + \param "extSPS" "External SPS template to be followed. NULL if not present." + \param "extPPS" "External PPS template to be followed. NULL if not present." + \return "see AVCEnc_Status." + */ + AVCEnc_Status SetEncodeParam(AVCHandle *avcHandle, AVCEncParams *encParam, + void *extSPS, void *extPPS); + + /** + This function verifies the encoding parameters whether they meet the set of supported + tool by a specific profile. If the profile is not set, it will just find the closest + profile instead of verifying it. + \param "video" "Pointer to AVCEncObject." + \param "seqParam" "Pointer to AVCSeqParamSet." + \param "picParam" "Pointer to AVCPicParamSet." + \return "AVCENC_SUCCESS if success, + AVCENC_PROFILE_NOT_SUPPORTED if the specified profile + is not supported by this version of the library, + AVCENC_TOOLS_NOT_SUPPORTED if any of the specified encoding tools are + not supported by the user-selected profile." + */ + AVCEnc_Status VerifyProfile(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam); + + /** + This function verifies the encoding parameters whether they meet the requirement + for a specific level. If the level is not set, it will just find the closest + level instead of verifying it. + \param "video" "Pointer to AVCEncObject." + \param "seqParam" "Pointer to AVCSeqParamSet." + \param "picParam" "Pointer to AVCPicParamSet." + \return "AVCENC_SUCCESS if success, + AVCENC_LEVEL_NOT_SUPPORTED if the specified level + is not supported by this version of the library, + AVCENC_LEVEL_FAIL if any of the encoding parameters exceed + the range of the user-selected level." + */ + AVCEnc_Status VerifyLevel(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam); + + /** + This funciton initializes the frame encoding by setting poc/frame_num related parameters. it + also performs motion estimation. + \param "encvid" "Pointer to the AVCEncObject." + \return "AVCENC_SUCCESS if success, AVCENC_NO_PICTURE if there is no input picture + in the queue to encode, AVCENC_POC_FAIL or AVCENC_CONSECUTIVE_NONREF for POC + related errors, AVCENC_NEW_IDR if new IDR is detected." + */ + AVCEnc_Status InitFrame(AVCEncObject *encvid); + + /** + This function initializes slice header related variables and other variables necessary + for decoding one slice. + \param "encvid" "Pointer to the AVCEncObject." + \return "AVCENC_SUCCESS if success." + */ + AVCEnc_Status InitSlice(AVCEncObject *encvid); + + /*----------- header.c ----------------*/ + /** + This function performs bitstream encoding of the sequence parameter set NAL. + \param "encvid" "Pointer to the AVCEncObject." + \param "stream" "Pointer to AVCEncBitstream." + \return "AVCENC_SUCCESS if success or AVCENC_SPS_FAIL or others for unexpected failure which + should not occur. The SPS parameters should all be verified before this function is called." + */ + AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream); + + /** + This function encodes the VUI parameters into the sequence parameter set bitstream. + \param "stream" "Pointer to AVCEncBitstream." + \param "vui" "Pointer to AVCVUIParams." + \return "nothing." + */ + void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui); + + /** + This function encodes HRD parameters into the sequence parameter set bitstream + \param "stream" "Pointer to AVCEncBitstream." + \param "hrd" "Pointer to AVCHRDParams." + \return "nothing." + */ + void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd); + + + /** + This function performs bitstream encoding of the picture parameter set NAL. + \param "encvid" "Pointer to the AVCEncObject." + \param "stream" "Pointer to AVCEncBitstream." + \return "AVCENC_SUCCESS if success or AVCENC_PPS_FAIL or others for unexpected failure which + should not occur. The SPS parameters should all be verified before this function is called." + */ + AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream); + + /** + This function encodes slice header information which has been initialized or fabricated + prior to entering this funciton. + \param "encvid" "Pointer to the AVCEncObject." + \param "stream" "Pointer to AVCEncBitstream." + \return "AVCENC_SUCCESS if success or bitstream fail statuses." + */ + AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream); + + /** + This function encodes reference picture list reordering relted syntax. + \param "video" "Pointer to AVCCommonObj." + \param "stream" "Pointer to AVCEncBitstream." + \param "sliceHdr" "Pointer to AVCSliceHdr." + \param "slice_type" "Value of slice_type - 5 if greater than 5." + \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise." + */ + AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type); + + /** + This function encodes dec_ref_pic_marking related syntax. + \param "video" "Pointer to AVCCommonObj." + \param "stream" "Pointer to AVCEncBitstream." + \param "sliceHdr" "Pointer to AVCSliceHdr." + \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise." + */ + AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr); + + /** + This function initializes the POC related variables and the POC syntax to be encoded + to the slice header derived from the disp_order and is_reference flag of the original + input frame to be encoded. + \param "video" "Pointer to the AVCEncObject." + \return "AVCENC_SUCCESS if success, + AVCENC_POC_FAIL if the poc type is undefined or + AVCENC_CONSECUTIVE_NONREF if there are consecutive non-reference frame for POC type 2." + */ + AVCEnc_Status InitPOC(AVCEncObject *video); + + /** + This function performs POC related operation after a picture is decoded. + \param "video" "Pointer to AVCCommonObj." + \return "AVCENC_SUCCESS" + */ + AVCEnc_Status PostPOC(AVCCommonObj *video); + + /*----------- bitstream_io.c ----------------*/ + /** + This function initializes the bitstream structure with the information given by + the users. + \param "bitstream" "Pointer to the AVCEncBitstream structure." + \param "buffer" "Pointer to the unsigned char buffer for output." + \param "buf_size" "The size of the buffer in bytes." + \param "overrunBuffer" "Pointer to extra overrun buffer." + \param "oBSize" "Size of overrun buffer in bytes." + \return "AVCENC_SUCCESS if success, AVCENC_BITSTREAM_INIT_FAIL if fail" + */ + AVCEnc_Status BitstreamEncInit(AVCEncBitstream *bitstream, uint8 *buffer, int buf_size, + uint8 *overrunBuffer, int oBSize); + + /** + This function writes the data from the cache into the bitstream buffer. It also adds the + emulation prevention code if necessary. + \param "stream" "Pointer to the AVCEncBitstream structure." + \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail." + */ + AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream); + + /** + This function writes the codeword into the cache which will eventually be written to + the bitstream buffer. + \param "stream" "Pointer to the AVCEncBitstream structure." + \param "nBits" "Number of bits in the codeword." + \param "code" "The codeword." + \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail." + */ + AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code); + + /** + This function writes one bit of data into the cache which will eventually be written + to the bitstream buffer. + \param "stream" "Pointer to the AVCEncBitstream structure." + \param "code" "The codeword." + \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail." + */ + AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code); + + /** + This function adds trailing bits to the bitstream and reports back the final EBSP size. + \param "stream" "Pointer to the AVCEncBitstream structure." + \param "nal_size" "Output the final NAL size." + \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail." + */ + AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size); + + /** + This function checks whether the current bit position is byte-aligned or not. + \param "stream" "Pointer to the bitstream structure." + \return "true if byte-aligned, false otherwise." + */ + bool byte_aligned(AVCEncBitstream *stream); + + + /** + This function checks the availability of overrun buffer and switches to use it when + normal bufffer is not big enough. + \param "stream" "Pointer to the bitstream structure." + \param "numExtraBytes" "Number of extra byte needed." + \return "AVCENC_SUCCESS or AVCENC_FAIL." + */ + AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes); + + + /*-------------- intra_est.c ---------------*/ + + /** This function performs intra/inter decision based on ABE. + \param "encvid" "Pointer to AVCEncObject." + \param "min_cost" "Best inter cost." + \param "curL" "Pointer to the current MB origin in reconstructed frame." + \param "picPitch" "Pitch of the reconstructed frame." + \return "Boolean for intra mode." + */ + +//bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch); + bool IntraDecision(int *min_cost, uint8 *cur, int pitch, bool ave); + + /** + This function performs intra prediction mode search. + \param "encvid" "Pointer to AVCEncObject." + \param "mbnum" "Current MB number." + \param "curL" "Pointer to the current MB origin in reconstructed frame." + \param "picPitch" "Pitch of the reconstructed frame." + \return "void." + */ + void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch); + + /** + This function generates all the I16 prediction modes for an MB and keep it in + encvid->pred_i16. + \param "encvid" "Pointer to AVCEncObject." + \return "void" + */ + void intrapred_luma_16x16(AVCEncObject *encvid); + + /** + This function calculate the cost of all I16 modes and compare them to get the minimum. + \param "encvid" "Pointer to AVCEncObject." + \param "orgY" "Pointer to the original luma MB." + \param "min_cost" "Pointer to the minimal cost so-far." + \return "void" + */ + void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost); + + /** + This function calculates the cost of each I16 mode. + \param "org" "Pointer to the original luma MB." + \param "org_pitch" "Stride size of the original frame." + \param "pred" "Pointer to the prediction values." + \param "min_cost" "Minimal cost so-far." + \return "Cost" + */ + + int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost); + + /** + This function generates all the I4 prediction modes and select the best one + for all the blocks inside a macroblock.It also calls dct_luma to generate the reconstructed + MB, and transform coefficients to be encoded. + \param "encvid" "Pointer to AVCEncObject." + \param "min_cost" "Pointer to the minimal cost so-far." + \return "void" + */ + void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost); + + /** + This function calculates the most probable I4 mode of a given 4x4 block + from neighboring informationaccording to AVC/H.264 standard. + \param "video" "Pointer to AVCCommonObj." + \param "blkidx" "The current block index." + \return "Most probable mode." + */ + int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx); + + /** + This function is where a lot of actions take place in the 4x4 block level inside + mb_intra4x4_search. + \param "encvid" "Pointer to AVCEncObject." + \param "blkidx" "The current 4x4 block index." + \param "cur" "Pointer to the reconstructed block." + \param "org" "Pointer to the original block." + \return "Minimal cost, also set currMB->i4Mode" + */ + int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org); + + /** + This function calculates the cost of a given I4 prediction mode. + \param "org" "Pointer to the original block." + \param "org_pitch" "Stride size of the original frame." + \param "pred" "Pointer to the prediction block. (encvid->pred_i4)" + \param "cost" "Pointer to the minimal cost (to be updated)." + \return "void" + */ + void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost); + + /** + This function performs chroma intra search. Each mode is saved in encvid->pred_ic. + \param "encvid" "Pointer to AVCEncObject." + \return "void" + */ + void chroma_intra_search(AVCEncObject *encvid); + + /** + This function calculates the cost of a chroma prediction mode. + \param "orgCb" "Pointer to the original Cb block." + \param "orgCr" "Pointer to the original Cr block." + \param "org_pitch" "Stride size of the original frame." + \param "pred" "Pointer to the prediction block (encvid->pred_ic)" + \param "mincost" "Minimal cost so far." + \return "Cost." + */ + + int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int mincost); + + /*-------------- motion_comp.c ---------------*/ + + /** + This is a main function to peform inter prediction. + \param "encvid" "Pointer to AVCEncObject." + \param "video" "Pointer to AVCCommonObj." + \return "void". + */ + void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video); + + + /** + This function is called for luma motion compensation. + \param "ref" "Pointer to the origin of a reference luma." + \param "picwidth" "Width of the picture." + \param "picheight" "Height of the picture." + \param "x_pos" "X-coordinate of the predicted block in quarter pel resolution." + \param "y_pos" "Y-coordinate of the predicted block in quarter pel resolution." + \param "pred" "Pointer to the output predicted block." + \param "pred_pitch" "Width of pred." + \param "blkwidth" "Width of the current partition." + \param "blkheight" "Height of the current partition." + \return "void" + */ + void eLumaMotionComp(uint8 *ref, int picwidth, int picheight, + int x_pos, int y_pos, + uint8 *pred, int pred_pitch, + int blkwidth, int blkheight); + + void eFullPelMC(uint8 *in, int inwidth, uint8 *out, int outpitch, + int blkwidth, int blkheight); + + void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dx); + + void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dx); + + void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch, + int blkwidth, int blkheight); + + void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dy); + + void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch, + int blkwidth, int blkheight); + + void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dy); + + void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch, + uint8 *out, int outpitch, + int blkwidth, int blkheight); + + void eChromaMotionComp(uint8 *ref, int picwidth, int picheight, + int x_pos, int y_pos, uint8 *pred, int pred_pitch, + int blkwidth, int blkheight); + + void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight); + + + /*-------------- motion_est.c ---------------*/ + + /** + Allocate and initialize arrays necessary for motion search algorithm. + \param "envid" "Pointer to AVCEncObject." + \return "AVC_SUCCESS or AVC_MEMORY_FAIL." + */ + AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle); + + /** + Clean up memory allocated in InitMotionSearchModule. + \param "envid" "Pointer to AVCEncObject." + \return "void." + */ + void CleanMotionSearchModule(AVCHandle *avcHandle); + + + /** + This function performs motion estimation of all macroblocks in a frame during the InitFrame. + The goal is to find the best MB partition for inter and find out if intra search is needed for + any MBs. This intra MB tendency can be used for scene change detection. + \param "encvid" "Pointer to AVCEncObject." + \return "void" + */ + void AVCMotionEstimation(AVCEncObject *encvid); + + /** + This function performs repetitive edge padding to the reference picture by adding 16 pixels + around the luma and 8 pixels around the chromas. + \param "refPic" "Pointer to the reference picture." + \return "void" + */ + void AVCPaddingEdge(AVCPictureData *refPic); + + /** + This function keeps track of intra refresh macroblock locations. + \param "encvid" "Pointer to the global array structure AVCEncObject." + \param "mblock" "Pointer to the array of AVCMacroblock structures." + \param "totalMB" "Total number of MBs in a frame." + \param "numRefresh" "Number of MB to be intra refresh in a single frame." + \return "void" + */ + void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh); + +#ifdef HTFM + void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect); + void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat); + void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[]); + void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch); +#endif + + /** + This function reads the input MB into a smaller faster memory space to minimize the cache miss. + \param "encvid" "Pointer to the global AVCEncObject." + \param "cur" "Pointer to the original input macroblock." + \param "pitch" "Stride size of the input frame (luma)." + \return "void" + */ + void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch); + + /** + Performs motion vector search for a macroblock. + \param "encvid" "Pointer to AVCEncObject structure." + \param "cur" "Pointer to the current macroblock in the input frame." + \param "best_cand" "Array of best candidates (to be filled in and returned)." + \param "i0" "X-coordinate of the macroblock." + \param "j0" "Y-coordinate of the macroblock." + \param "type_pred" "Indicates the type of operations." + \param "FS_en" "Flag for fullsearch enable." + \param "hp_guess" "Guess for half-pel search." + \return "void" + */ + void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[], + int i0, int j0, int type_pred, int FS_en, int *hp_guess); + +//AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum, +// int num_pass); + + /** + Perform full-pel exhaustive search around the predicted MV. + \param "encvid" "Pointer to AVCEncObject structure." + \param "prev" "Pointer to the reference frame." + \param "cur" "Pointer to the input macroblock." + \param "imin" "Pointer to minimal mv (x)." + \param "jmin" "Pointer to minimal mv (y)." + \param "ilow, ihigh, jlow, jhigh" "Lower bound on search range." + \param "cmvx, cmvy" "Predicted MV value." + + \return "The cost function of the best candidate." + */ + int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur, + int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh, + int cmvx, int cmvy); + + /** + Select candidates from neighboring blocks according to the type of the + prediction selection. + \param "mvx" "Pointer to the candidate, x-coordinate." + \param "mvy" "Pointer to the candidate, y-coordinate." + \param "num_can" "Pointer to the number of candidates returned." + \param "imb" "The MB index x-coordinate." + \param "jmb" "The MB index y-coordinate." + \param "type_pred" "Type of the prediction." + \param "cmvx, cmvy" "Pointer to predicted MV (modified version)." + \return "void." + */ + void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb, + AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy); + + /** + Utility function to move the values in the array dn according to the new + location to avoid redundant calculation. + \param "dn" "Array of integer of size 9." + \param "new_loc" "New location index." + \return "void." + */ + void AVCMoveNeighborSAD(int dn[], int new_loc); + + /** + Find minimum index of dn. + \param "dn" "Array of integer of size 9." + \return "The index of dn with the smallest dn[] value." + */ + int AVCFindMin(int dn[]); + + + /*------------- findhalfpel.c -------------------*/ + + /** + Search for the best half-pel resolution MV around the full-pel MV. + \param "encvid" "Pointer to the global AVCEncObject structure." + \param "cur" "Pointer to the current macroblock." + \param "mot" "Pointer to the AVCMV array of the frame." + \param "ncand" "Pointer to the origin of the fullsearch result." + \param "xpos" "The current MB position in x." + \param "ypos" "The current MB position in y." + \param "hp_guess" "Input to help speedup the search." + \param "cmvx, cmvy" "Predicted motion vector use for mvcost." + \return "Minimal cost (SATD) without MV cost. (for rate control purpose)" + */ + int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand, + int xpos, int ypos, int hp_guess, int cmvx, int cmvy); + + /** + This function generates sub-pel pixels required to do subpel MV search. + \param "subpel_pred" "Pointer to 2-D array, each array for each position." + \param "ncand" "Pointer to the full-pel center position in ref frame." + \param "lx" "Pitch of the ref frame." + \return "void" + */ + void GenerateHalfPelPred(uint8 *subpel_pred, uint8 *ncand, int lx); + + /** + This function calculate vertical interpolation at half-point of size 4x17. + \param "dst" "Pointer to destination." + \param "ref" "Pointer to the starting reference pixel." + \return "void." + */ + void VertInterpWClip(uint8 *dst, uint8 *ref); + + /** + This function generates quarter-pel pixels around the best half-pel result + during the sub-pel MV search. + \param "bilin_base" "Array of pointers to be used as basis for q-pel interp." + \param "qpel_pred" "Array of pointers pointing to quarter-pel candidates." + \param "hpel_pos" "Best half-pel position at the center." + \return "void" + */ + void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_pred, int hpel_pos); + + /** + This function calculates the SATD of a subpel candidate. + \param "cand" "Pointer to a candidate." + \param "cur" "Pointer to the current block." + \param "dmin" "Min-so-far SATD." + \return "Sum of Absolute Transformed Difference." + */ + int SATD_MB(uint8 *cand, uint8 *cur, int dmin); + + /*------------- rate_control.c -------------------*/ + + /** This function is a utility function. It returns average QP of the previously encoded frame. + \param "rateCtrl" "Pointer to AVCRateControl structure." + \return "Average QP." + */ + int GetAvgFrameQP(AVCRateControl *rateCtrl); + + /** + This function takes the timestamp of the input and determine whether it should be encoded + or skipped. + \param "encvid" "Pointer to the AVCEncObject structure." + \param "rateCtrl" "Pointer to the AVCRateControl structure." + \param "modTime" "The 32 bit timestamp of the input frame." + \param "frameNum" "Pointer to the frame number if to be encoded." + \return "AVC_SUCCESS or else." + */ + AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum); + + /** + This function updates the buffer fullness when frames are dropped either by the + rate control algorithm or by the users to make sure that target bit rate is still met. + \param "video" "Pointer to the common object structure." + \param "rateCtrl" "Pointer to rate control structure." + \param "frameInc" "Difference of the current frame number and previous frame number." + \return "void." + */ + void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc); + + /** + This function initializes rate control module and allocates necessary bufferes to do the job. + \param "avcHandle" "Pointer to the encoder handle." + \return "AVCENC_SUCCESS or AVCENC_MEMORY_FAIL." + */ + AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle); + + /** + This function frees buffers allocated in InitRateControlModule. + \param "avcHandle" "Pointer to the encoder handle." + \return "void." + */ + void CleanupRateControlModule(AVCHandle *avcHandle); + + /** + This function is called at the beginning of each GOP or the first IDR frame. It calculates + target bits for a GOP. + \param "encvid" "Pointer to the encoder object." + \return "void." + */ + void RCInitGOP(AVCEncObject *encvid); + + /** + This function calculates target bits for a particular frame. + \param "video" "Pointer to the AVCEncObject structure." + \return "void" + */ + void RCInitFrameQP(AVCEncObject *video); + + /** + This function calculates QP for the upcoming frame or basic unit. + \param "encvid" "Pointer to the encoder object." + \param "rateCtrl" "Pointer to the rate control object." + \return "QP value ranging from 0-51." + */ + int RCCalculateQP(AVCEncObject *encvid, AVCRateControl *rateCtrl); + + /** + This function translates the luma QP to chroma QP and calculates lambda based on QP. + \param "video" "Pointer to the AVCEncObject structure." + \return "void" + */ + void RCInitChromaQP(AVCEncObject *encvid); + + /** + This function is called before encoding each macroblock. + \param "encvid" "Pointer to the encoder object." + \return "void." + */ + void RCInitMBQP(AVCEncObject *encvid); + + /** + This function updates bits usage stats after encoding an macroblock. + \param "video" "Pointer to AVCCommonObj." + \param "rateCtrl" "Pointer to AVCRateControl." + \param "num_header_bits" "Number of bits used for MB header." + \param "num_texture_bits" "Number of bits used for MB texture." + \return "void" + */ + void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits); + + /** + This function calculates the difference between prediction and original MB. + \param "encvid" "Pointer to the encoder object." + \param "currMB" "Pointer to the current macroblock structure." + \param "orgL" "Pointer to the original MB." + \param "orgPitch" "Pointer to the original picture pitch." + \return "void." + */ + void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch); + + /** + Restore QP related parameters of previous MB when current MB is skipped. + \param "currMB" "Pointer to the current macroblock." + \param "video" "Pointer to the common video structure." + \param "encvid" "Pointer to the global encoding structure." + \return "void" + */ + void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid); + + /** + This function is called after done with a frame. + \param "encvid" "Pointer to the encoder object." + \return "AVCENC_SUCCESS or AVCENC_SKIPPED_PICTURE when bufer overflow (need to discard current frame)." + */ + AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid); + + /*--------- residual.c -------------------*/ + + /** + This function encodes the intra pcm data and fill it in the corresponding location + on the current picture. + \param "video" "Pointer to AVCEncObject." + \return "AVCENC_SUCCESS if success, or else for bitstream errors." + */ + AVCEnc_Status EncodeIntraPCM(AVCEncObject *video); + + /** + This function performs CAVLC syntax encoding on the run and level information of the coefficients. + The level and run arrays are elements in AVCEncObject structure, populated by TransQuantZZ, + TransQuantIntraDC and TransQuantChromaDC functions. + \param "video" "Pointer to AVCEncObject." + \param "type" "One of AVCResidualType for a particular 4x4 block." + \param "bindx" "Block index or number of nonzero coefficients for AVC_Intra16DC and AVC_ChromaDC mode." + \param "currMB" "Pointer to the current macroblock structure." + \return "AVCENC_SUCCESS for success." + \Note "This function has 32-bit machine specific instruction!!!!" + */ + AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int bindx, AVCMacroblock *currMB); + + + /*------------- sad.c ---------------------------*/ + + + int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + +#ifdef HTFM /* 3/2/1, Hypothesis Testing Fast Matching */ + int AVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info); + int AVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info); + int AVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info); + int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); + int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info); +#endif + + + /*------------- slice.c -------------------------*/ + + /** + This function performs the main encoding loop for a slice. + \param "encvid" "Pointer to AVCEncObject." + \return "AVCENC_SUCCESS for success, AVCENC_PICTURE_READY for end-of-picture and + AVCENC_FAIL or AVCENC_SLICE_EMPTY otherwise." + */ + AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid); + + /** + This function performs the main encoding operation for one macroblock. + \param "video" "pointer to AVCEncObject." + \return "AVCENC_SUCCESS for success, or other bitstream related failure status." + */ + AVCEnc_Status EncodeMB(AVCEncObject *video); + + /** + This function calls prediction INTRA/INTER functions, transform, + quantization and zigzag scanning to get the run-level symbols. + \param "encvid" "pointer to AVCEncObject." + \param "curL" "pointer to Luma component of the current frame. + \param "curCb" "pointer to Cb component of the current frame. + \param "curCr" "pointer to Cr component of the current frame. + \return "void for now." + */ + void MBPredTransQuantZZ(AVCEncObject *encvid, uint8 *curL, uint8 *curCb, uint8 *curCr); + + /** + This function copies the content of the prediction MB into the reconstructed YUV + frame directly. + \param "curL" "Pointer to the destination Y component." + \param "curCb" "Pointer to the destination Cb component." + \param "curCr" "Pointer to the destination Cr component." + \param "predBlock" "Pointer to the prediction MB." + \param "picWidth" "The width of the frame." + \return "None." + */ + void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picWidth); + + /** + This function encodes the mb_type, CBP, prediction mode, ref idx and MV. + \param "currMB" "Pointer to the current macroblock structure." + \param "video" "Pointer to the AVCEncObject structure." + \return "AVCENC_SUCCESS for success or else for fail." + */ + AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *video); + + /** + This function finds the right mb_type for a macroblock given the mbMode, CBP, + NumPart, PredPartMode. + \param "currMB" "Pointer to the current macroblock structure." + \param "slice_type" "Value of the slice_type." + \return "mb_type." + */ + uint InterpretMBType(AVCMacroblock *currMB, int slice_type); + + /** + This function encodes the mb_pred part of the macroblock data. + \param "video" "Pointer to the AVCCommonObj structure." + \param "currMB" "Pointer to the current macroblock structure." + \param "stream" "Pointer to the AVCEncBitstream structure." + \return "AVCENC_SUCCESS for success or bitstream fail status." + */ + AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream); + + /** + This function encodes the sub_mb_pred part of the macroblock data. + \param "video" "Pointer to the AVCCommonObj structure." + \param "currMB" "Pointer to the current macroblock structure." + \param "stream" "Pointer to the AVCEncBitstream structure." + \return "AVCENC_SUCCESS for success or bitstream fail status." + */ + AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream); + + /** + This function interprets the sub_mb_type and sets necessary information + when the slice type is AVC_P_SLICE. + in the macroblock structure. + \param "mblock" "Pointer to current AVCMacroblock." + \param "sub_mb_type" "From the syntax bitstream." + \return "void" + */ + void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type); + + /** + This function interprets the sub_mb_type and sets necessary information + when the slice type is AVC_B_SLICE. + in the macroblock structure. + \param "mblock" "Pointer to current AVCMacroblock." + \param "sub_mb_type" "From the syntax bitstream." + \return "void" + */ + void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type); + + /** + This function encodes intra 4x4 mode. It calculates the predicted I4x4 mode and the + remnant to be encoded. + \param "video" "Pointer to AVCEncObject structure." + \param "currMB" "Pointer to the AVCMacroblock structure." + \param "stream" "Pointer to AVCEncBitstream sructure." + \return "AVCENC_SUCCESS for success." + */ + AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream); + + /*------------- vlc_encode.c -----------------------*/ + /** + This function encodes and writes a value into an Exp-Golomb codeword. + \param "bitstream" "Pointer to AVCEncBitstream." + \param "codeNum" "Pointer to the value of the codeNum." + \return "AVCENC_SUCCESS for success or bitstream error messages for fail." + */ + AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum); + + /** + This function maps and encodes signed Exp-Golomb codes. + \param "bitstream" "Pointer to AVCEncBitstream." + \param "value" "Pointer to syntax element value." + \return "AVCENC_SUCCESS or AVCENC_FAIL." + */ + AVCEnc_Status se_v(AVCEncBitstream *bitstream, int value); + + /** + This function maps and encodes truncated Exp-Golomb codes. + \param "bitstream" "Pointer to AVCEncBitstream." + \param "value" "Pointer to syntax element value." + \param "range" "Range of the value as input to determine the algorithm." + \return "AVCENC_SUCCESS or AVCENC_FAIL." + */ + AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range); + + /** + This function creates Exp-Golomb codeword from codeNum. + \param "bitstream" "Pointer to AVCEncBitstream." + \param "codeNum" "Pointer to the codeNum value." + \return "AVCENC_SUCCESS for success or bitstream error messages for fail." + */ + AVCEnc_Status SetEGBitstring(AVCEncBitstream *bitstream, uint codeNum); + + /** + This function performs CAVLC encoding of the CBP (coded block pattern) of a macroblock + by calling ue_v() and then mapping the CBP to the corresponding VLC codeNum. + \param "currMB" "Pointer to the current AVCMacroblock structure." + \param "stream" "Pointer to the AVCEncBitstream." + \return "void" + */ + AVCEnc_Status EncodeCBP(AVCMacroblock *currMB, AVCEncBitstream *stream); + + /** + This function encodes trailing ones and total coefficient. + \param "stream" "Pointer to the AVCEncBitstream." + \param "TrailingOnes" "The trailing one variable output." + \param "TotalCoeff" "The total coefficient variable output." + \param "nC" "Context for number of nonzero coefficient (prediction context)." + \return "AVCENC_SUCCESS for success or else for bitstream failure." + */ + AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC); + + /** + This function encodes trailing ones and total coefficient for chroma DC block. + \param "stream" "Pointer to the AVCEncBitstream." + \param "TrailingOnes" "The trailing one variable output." + \param "TotalCoeff" "The total coefficient variable output." + \return "AVCENC_SUCCESS for success or else for bitstream failure." + */ + AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff); + + /** + This function encodes total_zeros value as in Table 9-7 and 9-8. + \param "stream" "Pointer to the AVCEncBitstream." + \param "TotalZeros" "The total_zeros value." + \param "TotalCoeff" "The total coefficient variable output." + \return "AVCENC_SUCCESS for success or else for bitstream failure." + */ + AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff); + + /** + This function encodes total_zeros VLC syntax for chroma DC as in Table 9-9. + \param "stream" "Pointer to the AVCEncBitstream." + \param "TotalZeros" "The total_zeros value." + \param "TotalCoeff" "The total coefficient variable output." + \return "AVCENC_SUCCESS for success or else for bitstream failure." + */ + AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff); + + /** + This function encodes run_before VLC syntax as in Table 9-10. + \param "stream" "Pointer to the AVCEncBitstream." + \param "run_before" "The run_before value." + \param "zerosLeft" "The context for number of zeros left." + \return "AVCENC_SUCCESS for success or else for bitstream failure." + */ + AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft); + +#ifdef __cplusplus +} +#endif + + +#endif /* _AVCENC_LIB_H_ */ + diff --git a/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp new file mode 100644 index 0000000..75ab514 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp @@ -0,0 +1,336 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +#define WORD_SIZE 32 + +/* array for trailing bit pattern as function of number of bits */ +/* the first one is unused. */ +const static uint8 trailing_bits[9] = {0, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; + +/* ======================================================================== */ +/* Function : BitstreamInit() */ +/* Date : 11/4/2003 */ +/* Purpose : Populate bitstream structure with bitstream buffer and size */ +/* it also initializes internal data */ +/* In/out : */ +/* Return : AVCENC_SUCCESS if successed, AVCENC_FAIL if failed. */ +/* Modified : */ +/* ======================================================================== */ +/* |--------|--------|----~~~~~-----|---------|---------|---------| + ^ ^write_pos ^buf_size + bitstreamBuffer <---------> + current_word + + |-----xxxxxxxxxxxxx| = current_word 32 or 16 bits + <----> + bit_left + ======================================================================== */ + +AVCEnc_Status BitstreamEncInit(AVCEncBitstream *stream, uint8 *buffer, int buf_size, + uint8 *overrunBuffer, int oBSize) +{ + if (stream == NULL || buffer == NULL || buf_size <= 0) + { + return AVCENC_BITSTREAM_INIT_FAIL; + } + + stream->bitstreamBuffer = buffer; + + stream->buf_size = buf_size; + + stream->write_pos = 0; + + stream->count_zeros = 0; + + stream->current_word = 0; + + stream->bit_left = WORD_SIZE; + + stream->overrunBuffer = overrunBuffer; + + stream->oBSize = oBSize; + + return AVCENC_SUCCESS; +} + +/* ======================================================================== */ +/* Function : AVCBitstreamSaveWord() */ +/* Date : 3/29/2004 */ +/* Purpose : Save the current_word into the buffer, byte-swap, and */ +/* add emulation prevention insertion. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */ +/* full. */ +/* Modified : */ +/* ======================================================================== */ +AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream) +{ + int num_bits; + uint8 *write_pnt, byte; + uint current_word; + + /* check number of bytes in current_word, must always be byte-aligned!!!! */ + num_bits = WORD_SIZE - stream->bit_left; /* must be multiple of 8 !!*/ + + if (stream->buf_size - stream->write_pos <= (num_bits >> 3) + 2) /* 2 more bytes for possible EPBS */ + { + if (AVCENC_SUCCESS != AVCBitstreamUseOverrunBuffer(stream, (num_bits >> 3) + 2)) + { + return AVCENC_BITSTREAM_BUFFER_FULL; + } + } + + /* write word, byte-by-byte */ + write_pnt = stream->bitstreamBuffer + stream->write_pos; + current_word = stream->current_word; + while (num_bits) /* no need to check stream->buf_size and stream->write_pos, taken care already */ + { + num_bits -= 8; + byte = (current_word >> num_bits) & 0xFF; + if (byte != 0) + { + *write_pnt++ = byte; + stream->write_pos++; + stream->count_zeros = 0; + } + else + { + stream->count_zeros++; + *write_pnt++ = byte; + stream->write_pos++; + if (stream->count_zeros == 2) + { /* for num_bits = 32, this can add 2 more bytes extra for EPBS */ + *write_pnt++ = 0x3; + stream->write_pos++; + stream->count_zeros = 0; + } + } + } + + /* reset current_word and bit_left */ + stream->current_word = 0; + stream->bit_left = WORD_SIZE; + + return AVCENC_SUCCESS; +} + +/* ======================================================================== */ +/* Function : BitstreamWriteBits() */ +/* Date : 3/29/2004 */ +/* Purpose : Write up to machine word. */ +/* In/out : Unused bits in 'code' must be all zeros. */ +/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */ +/* full. */ +/* Modified : */ +/* ======================================================================== */ +AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + int bit_left = stream->bit_left; + uint current_word = stream->current_word; + + //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWriteBits",nBits,-1); + + if (nBits > WORD_SIZE) /* has to be taken care of specially */ + { + return AVCENC_FAIL; /* for now */ + /* otherwise, break it down to 2 write of less than 16 bits at a time. */ + } + + if (nBits <= bit_left) /* more bits left in current_word */ + { + stream->current_word = (current_word << nBits) | code; + stream->bit_left -= nBits; + if (stream->bit_left == 0) /* prepare for the next word */ + { + status = AVCBitstreamSaveWord(stream); + return status; + } + } + else + { + stream->current_word = (current_word << bit_left) | (code >> (nBits - bit_left)); + + nBits -= bit_left; + + stream->bit_left = 0; + + status = AVCBitstreamSaveWord(stream); /* save current word */ + + stream->bit_left = WORD_SIZE - nBits; + + stream->current_word = code; /* no extra masking for code, must be handled before saving */ + } + + return status; +} + + +/* ======================================================================== */ +/* Function : BitstreamWrite1Bit() */ +/* Date : 3/30/2004 */ +/* Purpose : Write 1 bit */ +/* In/out : Unused bits in 'code' must be all zeros. */ +/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */ +/* full. */ +/* Modified : */ +/* ======================================================================== */ +AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code) +{ + AVCEnc_Status status; + uint current_word = stream->current_word; + + //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWrite1Bit",code,-1); + + //if(1 <= bit_left) /* more bits left in current_word */ + /* we can assume that there always be positive bit_left in the current word */ + stream->current_word = (current_word << 1) | code; + stream->bit_left--; + if (stream->bit_left == 0) /* prepare for the next word */ + { + status = AVCBitstreamSaveWord(stream); + return status; + } + + return AVCENC_SUCCESS; +} + + +/* ======================================================================== */ +/* Function : BitstreamTrailingBits() */ +/* Date : 3/31/2004 */ +/* Purpose : Add trailing bits and report the final EBSP size. */ +/* In/out : */ +/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */ +/* full. */ +/* Modified : */ +/* ======================================================================== */ +AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size) +{ + (void)(nal_size); + + AVCEnc_Status status; + int bit_left = bitstream->bit_left; + + bit_left &= 0x7; /* modulo by 8 */ + if (bit_left == 0) bit_left = 8; + /* bitstream->bit_left == 0 cannot happen here since it would have been Saved already */ + + status = BitstreamWriteBits(bitstream, bit_left, trailing_bits[bit_left]); + + if (status != AVCENC_SUCCESS) + { + return status; + } + + /* if it's not saved, save it. */ + //if(bitstream->bit_left<(WORD_SIZE<<3)) /* in fact, no need to check */ + { + status = AVCBitstreamSaveWord(bitstream); + } + + return status; +} + +/* check whether it's byte-aligned */ +bool byte_aligned(AVCEncBitstream *stream) +{ + if (stream->bit_left % 8) + return false; + else + return true; +} + + +/* determine whether overrun buffer can be used or not */ +AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes) +{ + AVCEncObject *encvid = (AVCEncObject*)stream->encvid; + + if (stream->overrunBuffer != NULL) // overrunBuffer is set + { + if (stream->bitstreamBuffer != stream->overrunBuffer) // not already used + { + if (stream->write_pos + numExtraBytes >= stream->oBSize) + { + stream->oBSize = stream->write_pos + numExtraBytes + 100; + stream->oBSize &= (~0x3); // make it multiple of 4 + + // allocate new overrun Buffer + if (encvid->overrunBuffer) + { + encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData, + (int)encvid->overrunBuffer); + } + + encvid->oBSize = stream->oBSize; + encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, + stream->oBSize, DEFAULT_ATTR); + + stream->overrunBuffer = encvid->overrunBuffer; + if (stream->overrunBuffer == NULL) + { + return AVCENC_FAIL; + } + } + + // copy everything to overrun buffer and start using it. + memcpy(stream->overrunBuffer, stream->bitstreamBuffer, stream->write_pos); + stream->bitstreamBuffer = stream->overrunBuffer; + stream->buf_size = stream->oBSize; + } + else // overrun buffer is already used + { + stream->oBSize = stream->write_pos + numExtraBytes + 100; + stream->oBSize &= (~0x3); // make it multiple of 4 + + // allocate new overrun buffer + encvid->oBSize = stream->oBSize; + encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, + stream->oBSize, DEFAULT_ATTR); + + if (encvid->overrunBuffer == NULL) + { + return AVCENC_FAIL; + } + + + // copy from the old buffer to new buffer + memcpy(encvid->overrunBuffer, stream->overrunBuffer, stream->write_pos); + // free old buffer + encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData, + (int)stream->overrunBuffer); + + // assign pointer to new buffer + stream->overrunBuffer = encvid->overrunBuffer; + stream->bitstreamBuffer = stream->overrunBuffer; + stream->buf_size = stream->oBSize; + } + + return AVCENC_SUCCESS; + } + else // overrunBuffer is not enable. + { + return AVCENC_FAIL; + } + +} + + + diff --git a/media/libstagefright/codecs/avc/enc/src/block.cpp b/media/libstagefright/codecs/avc/enc/src/block.cpp new file mode 100644 index 0000000..01e26a6 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/block.cpp @@ -0,0 +1,1283 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +/* subtract with the prediction and do transformation */ +void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock) +{ + int16 *ptr = dataBlock; + int r0, r1, r2, r3, j; + int curpitch = (uint)pitch >> 16; + int predpitch = (pitch & 0xFFFF); + + /* horizontal */ + j = 4; + while (j > 0) + { + /* calculate the residue first */ + r0 = cur[0] - predBlock[0]; + r1 = cur[1] - predBlock[1]; + r2 = cur[2] - predBlock[2]; + r3 = cur[3] - predBlock[3]; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + ptr[0] = r0 + r1; + ptr[2] = r0 - r1; + ptr[1] = (r3 << 1) + r2; + ptr[3] = r3 - (r2 << 1); + + ptr += 16; + predBlock += predpitch; + cur += curpitch; + j--; + } + /* vertical */ + ptr = dataBlock; + j = 4; + while (j > 0) + { + r0 = ptr[0] + ptr[48]; + r3 = ptr[0] - ptr[48]; + r1 = ptr[16] + ptr[32]; + r2 = ptr[16] - ptr[32]; + + ptr[0] = r0 + r1; + ptr[32] = r0 - r1; + ptr[16] = (r3 << 1) + r2; + ptr[48] = r3 - (r2 << 1); + + ptr++; + j--; + } + + return ; +} + + +/* do residue transform quant invquant, invtrans and write output out */ +int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost) +{ + AVCCommonObj *video = encvid->common; + int org_pitch = encvid->currInput->pitch; + int pitch = video->currPic->pitch; + int16 *coef = video->block; + uint8 *pred = video->pred_block; // size 16 for a 4x4 block + int pred_pitch = video->pred_pitch; + int r0, r1, r2, r3, j, k, idx; + int *level, *run; + int Qq, Rq, q_bits, qp_const, quant; + int data, lev, zero_run; + int numcoeff; + + coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */ + + /* first take a 4x4 transform */ + /* horizontal */ + j = 4; + while (j > 0) + { + /* calculate the residue first */ + r0 = org[0] - pred[0]; /* OPTIMIZEABLE */ + r1 = org[1] - pred[1]; + r2 = org[2] - pred[2]; + r3 = org[3] - pred[3]; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + coef[0] = r0 + r1; + coef[2] = r0 - r1; + coef[1] = (r3 << 1) + r2; + coef[3] = r3 - (r2 << 1); + + coef += 16; + org += org_pitch; + pred += pred_pitch; + j--; + } + /* vertical */ + coef -= 64; + pred -= (pred_pitch << 2); + j = 4; + while (j > 0) /* OPTIMIZABLE */ + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + j--; + } + + coef -= 4; + + /* quant */ + level = encvid->level[ras2dec[blkidx]]; + run = encvid->run[ras2dec[blkidx]]; + + Rq = video->QPy_mod_6; + Qq = video->QPy_div_6; + qp_const = encvid->qp_const; + q_bits = 15 + Qq; + + zero_run = 0; + numcoeff = 0; + for (k = 0; k < 16; k++) + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { + *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]); + + /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[numcoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[numcoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[numcoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + if (video->currMB->mb_intra) // only do inverse transform with intra block + { + if (numcoeff) /* then do inverse transform */ + { + for (j = 4; j > 0; j--) /* horizontal */ + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + + coef -= 64; + for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = *(pred += pred_pitch) + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + + *cur = r0; + *(cur += pitch) = r1; + *(cur += pitch) = r2; + cur[pitch] = r3; + cur -= (pitch << 1); + cur++; + pred -= (pred_pitch << 1); + pred++; + coef++; + } + } + else // copy from pred to cur + { + *((uint32*)cur) = *((uint32*)pred); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + } + } + + return numcoeff; +} + + +void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch) +{ + int16 *coef, *coef8 = video->block; + uint8 *cur; // the same as curL + int b8, b4; + int r0, r1, r2, r3, j, blkidx; + + for (b8 = 0; b8 < 4; b8++) + { + cur = curL; + coef = coef8; + + if (currMB->CBP&(1 << b8)) + { + for (b4 = 0; b4 < 4; b4++) + { + blkidx = blkIdx2blkXY[b8][b4]; + /* do IDCT */ + if (currMB->nz_coeff[blkidx]) + { + for (j = 4; j > 0; j--) /* horizontal */ + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + + coef -= 64; + for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + + r0 = cur[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + *cur = r0; + r1 = *(cur += picPitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + *cur = r1; + r2 = *(cur += picPitch) + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + *cur = r2; + r3 = cur[picPitch] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + cur[picPitch] = r3; + + cur -= (picPitch << 1); + cur++; + coef++; + } + cur -= 4; + coef -= 4; + } + if (b4&1) + { + cur += ((picPitch << 2) - 4); + coef += 60; + } + else + { + cur += 4; + coef += 4; + } + } + } + + if (b8&1) + { + curL += ((picPitch << 3) - 8); + coef8 += 120; + } + else + { + curL += 8; + coef8 += 8; + } + } + + return ; +} + +/* performa dct, quant, iquant, idct for the entire MB */ +void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL) +{ + AVCCommonObj *video = encvid->common; + int pitch = video->currPic->pitch; + int org_pitch = encvid->currInput->pitch; + AVCMacroblock *currMB = video->currMB; + int16 *coef = video->block; + uint8 *pred = encvid->pred_i16[currMB->i16Mode]; + int blk_x, blk_y, j, k, idx, b8, b4; + int r0, r1, r2, r3, m0, m1, m2 , m3; + int data, lev; + int *level, *run, zero_run, ncoeff; + int Rq, Qq, quant, q_bits, qp_const; + int offset_cur[4], offset_pred[4], offset; + + /* horizontal */ + for (j = 16; j > 0; j--) + { + for (blk_x = 4; blk_x > 0; blk_x--) + { + /* calculate the residue first */ + r0 = *orgL++ - *pred++; + r1 = *orgL++ - *pred++; + r2 = *orgL++ - *pred++; + r3 = *orgL++ - *pred++; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + *coef++ = r0 + r1; + *coef++ = (r3 << 1) + r2; + *coef++ = r0 - r1; + *coef++ = r3 - (r2 << 1); + } + orgL += (org_pitch - 16); + } + pred -= 256; + coef -= 256; + /* vertical */ + for (blk_y = 4; blk_y > 0; blk_y--) + { + for (j = 16; j > 0; j--) + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + } + coef += 48; + } + + /* then perform DC transform */ + coef -= 256; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[12]; + r3 = coef[0] - coef[12]; + r1 = coef[4] + coef[8]; + r2 = coef[4] - coef[8]; + + coef[0] = r0 + r1; + coef[8] = r0 - r1; + coef[4] = r3 + r2; + coef[12] = r3 - r2; + coef += 64; + } + coef -= 256; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[192]; + r3 = coef[0] - coef[192]; + r1 = coef[64] + coef[128]; + r2 = coef[64] - coef[128]; + + coef[0] = (r0 + r1) >> 1; + coef[128] = (r0 - r1) >> 1; + coef[64] = (r3 + r2) >> 1; + coef[192] = (r3 - r2) >> 1; + coef += 4; + } + + coef -= 16; + // then quantize DC + level = encvid->leveldc; + run = encvid->rundc; + + Rq = video->QPy_mod_6; + Qq = video->QPy_div_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = encvid->qp_const; + + zero_run = 0; + ncoeff = 0; + for (k = 0; k < 16; k++) /* in zigzag scan order */ + { + idx = ZIGZAG2RASTERDC[k]; + data = coef[idx]; + if (data > 0) // quant + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) // dequant + { + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = lev; + } + else + { + level[ncoeff] = -lev; + coef[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + /* inverse transform DC */ + encvid->numcoefdc = ncoeff; + if (ncoeff) + { + quant = dequant_coefres[Rq][0]; + + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[4]; + m1 = coef[0] - coef[4]; + m2 = coef[8] + coef[12]; + m3 = coef[8] - coef[12]; + + + coef[0] = m0 + m2; + coef[4] = m0 - m2; + coef[8] = m1 - m3; + coef[12] = m1 + m3; + coef += 64; + } + + coef -= 256; + + if (Qq >= 2) /* this way should be faster than JM */ + { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */ + Qq -= 2; + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[64]; + m1 = coef[0] - coef[64]; + m2 = coef[128] + coef[192]; + m3 = coef[128] - coef[192]; + + coef[0] = ((m0 + m2) * quant) << Qq; + coef[64] = ((m0 - m2) * quant) << Qq; + coef[128] = ((m1 - m3) * quant) << Qq; + coef[192] = ((m1 + m3) * quant) << Qq; + coef += 4; + } + Qq += 2; /* restore the value */ + } + else + { + Qq = 2 - Qq; + offset = 1 << (Qq - 1); + + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[64]; + m1 = coef[0] - coef[64]; + m2 = coef[128] + coef[192]; + m3 = coef[128] - coef[192]; + + coef[0] = (((m0 + m2) * quant + offset) >> Qq); + coef[64] = (((m0 - m2) * quant + offset) >> Qq); + coef[128] = (((m1 - m3) * quant + offset) >> Qq); + coef[192] = (((m1 + m3) * quant + offset) >> Qq); + coef += 4; + } + Qq = 2 - Qq; /* restore the value */ + } + coef -= 16; /* back to the origin */ + } + + /* now zigzag scan ac coefs, quant, iquant and itrans */ + run = encvid->run[0]; + level = encvid->level[0]; + + /* offset btw 4x4 block */ + offset_cur[0] = 0; + offset_cur[1] = (pitch << 2) - 8; + + /* offset btw 8x8 block */ + offset_cur[2] = 8 - (pitch << 3); + offset_cur[3] = -8; + + /* similarly for pred */ + offset_pred[0] = 0; + offset_pred[1] = 56; + offset_pred[2] = -120; + offset_pred[3] = -8; + + currMB->CBP = 0; + + for (b8 = 0; b8 < 4; b8++) + { + for (b4 = 0; b4 < 4; b4++) + { + + zero_run = 0; + ncoeff = 0; + + for (k = 1; k < 16; k++) + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[ncoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */ + if (ncoeff) + { + currMB->CBP |= (1 << b8); + + // do inverse transform here + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + coef -= 64; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = pred[16] + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[32] + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[48] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curL = r0; + *(curL += pitch) = r1; + *(curL += pitch) = r2; + curL[pitch] = r3; + curL -= (pitch << 1); + curL++; + pred++; + coef++; + } + } + else // do DC-only inverse + { + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = pred[16] + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[32] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[48] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curL = r0; + *(curL += pitch) = r1; + *(curL += pitch) = r2; + curL[pitch] = r3; + curL -= (pitch << 1); + curL++; + pred++; + } + coef += 4; + } + + run += 16; // follow coding order + level += 16; + curL += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_pred[b4&1]; + } + + curL += offset_cur[2 + (b8&1)]; + pred += offset_pred[2 + (b8&1)]; + coef += offset_pred[2 + (b8&1)]; + } + + return ; +} + + +void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + int org_pitch = (encvid->currInput->pitch) >> 1; + int pitch = (video->currPic->pitch) >> 1; + int pred_pitch = 16; + int16 *coef = video->block + 256; + uint8 *pred = video->pred_block; + int j, blk_x, blk_y, k, idx, b4; + int r0, r1, r2, r3, m0; + int Qq, Rq, qp_const, q_bits, quant; + int *level, *run, zero_run, ncoeff; + int data, lev; + int offset_cur[2], offset_pred[2], offset_coef[2]; + uint8 nz_temp[4]; + int coeff_cost; + + if (cr) + { + coef += 8; + pred += 8; + } + + if (currMB->mb_intra == 0) // inter mode + { + pred = curC; + pred_pitch = pitch; + } + + /* do 4x4 transform */ + /* horizontal */ + for (j = 8; j > 0; j--) + { + for (blk_x = 2; blk_x > 0; blk_x--) + { + /* calculate the residue first */ + r0 = *orgC++ - *pred++; + r1 = *orgC++ - *pred++; + r2 = *orgC++ - *pred++; + r3 = *orgC++ - *pred++; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + *coef++ = r0 + r1; + *coef++ = (r3 << 1) + r2; + *coef++ = r0 - r1; + *coef++ = r3 - (r2 << 1); + + } + coef += 8; // coef pitch is 16 + pred += (pred_pitch - 8); // pred_pitch is 16 + orgC += (org_pitch - 8); + } + pred -= (pred_pitch << 3); + coef -= 128; + /* vertical */ + for (blk_y = 2; blk_y > 0; blk_y--) + { + for (j = 8; j > 0; j--) + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + } + coef += 56; + } + /* then perform DC transform */ + coef -= 128; + + /* 2x2 transform of DC components*/ + r0 = coef[0]; + r1 = coef[4]; + r2 = coef[64]; + r3 = coef[68]; + + coef[0] = r0 + r1 + r2 + r3; + coef[4] = r0 - r1 + r2 - r3; + coef[64] = r0 + r1 - r2 - r3; + coef[68] = r0 - r1 - r2 + r3; + + Qq = video->QPc_div_6; + Rq = video->QPc_mod_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = encvid->qp_const_c; + + zero_run = 0; + ncoeff = 0; + run = encvid->runcdc + (cr << 2); + level = encvid->levelcdc + (cr << 2); + + /* in zigzag scan order */ + for (k = 0; k < 4; k++) + { + idx = ((k >> 1) << 6) + ((k & 1) << 2); + data = coef[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = lev; + } + else + { + level[ncoeff] = -lev; + coef[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + encvid->numcoefcdc[cr] = ncoeff; + + if (ncoeff) + { + currMB->CBP |= (1 << 4); // DC present + // do inverse transform + quant = dequant_coefres[Rq][0]; + + r0 = coef[0] + coef[4]; + r1 = coef[0] - coef[4]; + r2 = coef[64] + coef[68]; + r3 = coef[64] - coef[68]; + + r0 += r2; + r2 = r0 - (r2 << 1); + r1 += r3; + r3 = r1 - (r3 << 1); + + if (Qq >= 1) + { + Qq -= 1; + coef[0] = (r0 * quant) << Qq; + coef[4] = (r1 * quant) << Qq; + coef[64] = (r2 * quant) << Qq; + coef[68] = (r3 * quant) << Qq; + Qq++; + } + else + { + coef[0] = (r0 * quant) >> 1; + coef[4] = (r1 * quant) >> 1; + coef[64] = (r2 * quant) >> 1; + coef[68] = (r3 * quant) >> 1; + } + } + + /* now do AC zigzag scan, quant, iquant and itrans */ + if (cr) + { + run = encvid->run[20]; + level = encvid->level[20]; + } + else + { + run = encvid->run[16]; + level = encvid->level[16]; + } + + /* offset btw 4x4 block */ + offset_cur[0] = 0; + offset_cur[1] = (pitch << 2) - 8; + offset_pred[0] = 0; + offset_pred[1] = (pred_pitch << 2) - 8; + offset_coef[0] = 0; + offset_coef[1] = 56; + + coeff_cost = 0; + + for (b4 = 0; b4 < 4; b4++) + { + zero_run = 0; + ncoeff = 0; + for (k = 1; k < 16; k++) /* in zigzag scan order */ + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { + /* for RD performance*/ + if (lev > 1) + coeff_cost += MAX_VALUE; // set high cost, shall not be discarded + else + coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run]; + + /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[ncoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + nz_temp[b4] = ncoeff; // raster scan + + // just advance the pointers for now, do IDCT later + coef += 4; + run += 16; + level += 16; + coef += offset_coef[b4&1]; + } + + /* rewind the pointers */ + coef -= 128; + + if (coeff_cost < _CHROMA_COEFF_COST_) + { + /* if it's not efficient to encode any blocks. + Just do DC only */ + /* We can reset level and run also, but setting nz to zero should be enough. */ + currMB->nz_coeff[16+(cr<<1)] = 0; + currMB->nz_coeff[17+(cr<<1)] = 0; + currMB->nz_coeff[20+(cr<<1)] = 0; + currMB->nz_coeff[21+(cr<<1)] = 0; + + for (b4 = 0; b4 < 4; b4++) + { + // do DC-only inverse + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + } + coef += 4; + curC += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_coef[b4&1]; + } + } + else // not dropping anything, continue with the IDCT + { + for (b4 = 0; b4 < 4; b4++) + { + ncoeff = nz_temp[b4] ; // in raster scan + currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan + + if (ncoeff) // do a check on the nonzero-coeff + { + currMB->CBP |= (2 << 4); + + // do inverse transform here + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + coef -= 64; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + coef++; + } + } + else + { + // do DC-only inverse + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + } + coef += 4; + } + curC += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_coef[b4&1]; + } + } + + return ; +} + + +/* only DC transform */ +int TransQuantIntra16DC(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + int16 *block = video->block; + int *level = encvid->leveldc; + int *run = encvid->rundc; + int16 *ptr = block; + int r0, r1, r2, r3, j; + int Qq = video->QPy_div_6; + int Rq = video->QPy_mod_6; + int q_bits, qp_const, quant; + int data, lev, zero_run; + int k, ncoeff, idx; + + /* DC transform */ + /* horizontal */ + j = 4; + while (j) + { + r0 = ptr[0] + ptr[12]; + r3 = ptr[0] - ptr[12]; + r1 = ptr[4] + ptr[8]; + r2 = ptr[4] - ptr[8]; + + ptr[0] = r0 + r1; + ptr[8] = r0 - r1; + ptr[4] = r3 + r2; + ptr[12] = r3 - r2; + ptr += 64; + j--; + } + /* vertical */ + ptr = block; + j = 4; + while (j) + { + r0 = ptr[0] + ptr[192]; + r3 = ptr[0] - ptr[192]; + r1 = ptr[64] + ptr[128]; + r2 = ptr[64] - ptr[128]; + + ptr[0] = (r0 + r1) >> 1; + ptr[128] = (r0 - r1) >> 1; + ptr[64] = (r3 + r2) >> 1; + ptr[192] = (r3 - r2) >> 1; + ptr += 4; + j--; + } + + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = (1 << q_bits) / 3; // intra + + zero_run = 0; + ncoeff = 0; + + for (k = 0; k < 16; k++) /* in zigzag scan order */ + { + idx = ZIGZAG2RASTERDC[k]; + data = block[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + block[idx] = lev; + } + else + { + level[ncoeff] = -lev; + block[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + block[idx] = 0; + } + } + return ncoeff; +} + +int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr) +{ + AVCCommonObj *video = encvid->common; + int *level, *run; + int r0, r1, r2, r3; + int Qq, Rq, q_bits, qp_const, quant; + int data, lev, zero_run; + int k, ncoeff, idx; + + level = encvid->levelcdc + (cr << 2); /* cb or cr */ + run = encvid->runcdc + (cr << 2); + + /* 2x2 transform of DC components*/ + r0 = block[0]; + r1 = block[4]; + r2 = block[64]; + r3 = block[68]; + + block[0] = r0 + r1 + r2 + r3; + block[4] = r0 - r1 + r2 - r3; + block[64] = r0 + r1 - r2 - r3; + block[68] = r0 - r1 - r2 + r3; + + Qq = video->QPc_div_6; + Rq = video->QPc_mod_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + if (slice_type == AVC_I_SLICE) + { + qp_const = (1 << q_bits) / 3; + } + else + { + qp_const = (1 << q_bits) / 6; + } + + zero_run = 0; + ncoeff = 0; + + for (k = 0; k < 4; k++) /* in zigzag scan order */ + { + idx = ((k >> 1) << 6) + ((k & 1) << 2); + data = block[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + block[idx] = lev; + } + else + { + level[ncoeff] = -lev; + block[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + block[idx] = 0; + } + } + return ncoeff; +} + + diff --git a/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp new file mode 100644 index 0000000..38a2a15 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp @@ -0,0 +1,622 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +/* 3/29/01 fast half-pel search based on neighboring guess */ +/* value ranging from 0 to 4, high complexity (more accurate) to + low complexity (less accurate) */ +#define HP_DISTANCE_TH 5 // 2 /* half-pel distance threshold */ + +#define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/ + +const static int distance_tab[9][9] = /* [hp_guess][k] */ +{ + {0, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 0, 1, 2, 3, 4, 3, 2, 1}, + {1, 0, 0, 0, 1, 2, 3, 2, 1}, + {1, 2, 1, 0, 1, 2, 3, 4, 3}, + {1, 2, 1, 0, 0, 0, 1, 2, 3}, + {1, 4, 3, 2, 1, 0, 1, 2, 3}, + {1, 2, 3, 2, 1, 0, 0, 0, 1}, + {1, 2, 3, 4, 3, 2, 1, 0, 1}, + {1, 0, 1, 2, 3, 2, 1, 0, 0} +}; + +#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ + x = 0xFF & (~(x>>31));} + +#define CLIP_UPPER16(x) if((uint)x >= 0x20000000){ \ + x = 0xFF0000 & (~(x>>31));} \ + else { \ + x = (x>>5)&0xFF0000; \ + } + +/*===================================================================== + Function: AVCFindHalfPelMB + Date: 10/31/2007 + Purpose: Find half pel resolution MV surrounding the full-pel MV +=====================================================================*/ + +int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand, + int xpos, int ypos, int hp_guess, int cmvx, int cmvy) +{ + AVCPictureData *currPic = encvid->common->currPic; + int lx = currPic->pitch; + int d, dmin, satd_min; + uint8* cand; + int lambda_motion = encvid->lambda_motion; + uint8 *mvbits = encvid->mvbits; + int mvcost; + /* list of candidate to go through for half-pel search*/ + uint8 *subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions + uint8 **hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */ + + int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2}; + int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2}; + int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1}; + int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1}; + int h, hmin, q, qmin; + + OSCL_UNUSED_ARG(xpos); + OSCL_UNUSED_ARG(ypos); + OSCL_UNUSED_ARG(hp_guess); + + GenerateHalfPelPred(subpel_pred, ncand, lx); + + cur = encvid->currYMB; // pre-load current original MB + + cand = hpel_cand[0]; + + // find cost for the current full-pel position + dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD + mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy); + satd_min = dmin; + dmin += mvcost; + hmin = 0; + + /* find half-pel */ + for (h = 1; h < 9; h++) + { + d = SATD_MB(hpel_cand[h], cur, dmin); + mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy); + d += mvcost; + + if (d < dmin) + { + dmin = d; + hmin = h; + satd_min = d - mvcost; + } + } + + mot->sad = dmin; + mot->x += xh[hmin]; + mot->y += yh[hmin]; + encvid->best_hpel_pos = hmin; + + /*** search for quarter-pel ****/ + GenerateQuartPelPred(encvid->bilin_base[hmin], &(encvid->qpel_cand[0][0]), hmin); + + encvid->best_qpel_pos = qmin = -1; + + for (q = 0; q < 8; q++) + { + d = SATD_MB(encvid->qpel_cand[q], cur, dmin); + mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy); + d += mvcost; + if (d < dmin) + { + dmin = d; + qmin = q; + satd_min = d - mvcost; + } + } + + if (qmin != -1) + { + mot->sad = dmin; + mot->x += xq[qmin]; + mot->y += yq[qmin]; + encvid->best_qpel_pos = qmin; + } + + return satd_min; +} + + + +/** This function generates sub-pel prediction around the full-pel candidate. +Each sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */ +/** The sub-pel position is labeled in spiral manner from the center. */ + +void GenerateHalfPelPred(uint8* subpel_pred, uint8 *ncand, int lx) +{ + /* let's do straightforward way first */ + uint8 *ref; + uint8 *dst; + uint8 tmp8; + int32 tmp32; + int16 tmp_horz[18*22], *dst_16, *src_16; + register int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; // temp register + int msk; + int i, j; + + /* first copy full-pel to the first array */ + /* to be optimized later based on byte-offset load */ + ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */ + dst = subpel_pred; + + dst -= 4; /* offset */ + for (j = 0; j < 22; j++) /* 24x22 */ + { + i = 6; + while (i > 0) + { + tmp32 = *ref++; + tmp8 = *ref++; + tmp32 |= (tmp8 << 8); + tmp8 = *ref++; + tmp32 |= (tmp8 << 16); + tmp8 = *ref++; + tmp32 |= (tmp8 << 24); + *((uint32*)(dst += 4)) = tmp32; + i--; + } + ref += (lx - 24); + } + + /* from the first array, we do horizontal interp */ + ref = subpel_pred + 2; + dst_16 = tmp_horz; /* 17 x 22 */ + + for (j = 4; j > 0; j--) + { + for (i = 16; i > 0; i -= 4) + { + a = ref[-2]; + b = ref[-1]; + c = ref[0]; + d = ref[1]; + e = ref[2]; + f = ref[3]; + *dst_16++ = a + f - 5 * (b + e) + 20 * (c + d); + a = ref[4]; + *dst_16++ = b + a - 5 * (c + f) + 20 * (d + e); + b = ref[5]; + *dst_16++ = c + b - 5 * (d + a) + 20 * (e + f); + c = ref[6]; + *dst_16++ = d + c - 5 * (e + b) + 20 * (f + a); + + ref += 4; + } + /* do the 17th column here */ + d = ref[3]; + *dst_16 = e + d - 5 * (f + c) + 20 * (a + b); + dst_16 += 2; /* stride for tmp_horz is 18 */ + ref += 8; /* stride for ref is 24 */ + if (j == 3) // move 18 lines down + { + dst_16 += 324;//18*18; + ref += 432;//18*24; + } + } + + ref -= 480;//20*24; + dst_16 -= 360;//20*18; + dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/ + + for (j = 18; j > 0; j--) + { + for (i = 16; i > 0; i -= 4) + { + a = ref[-2]; + b = ref[-1]; + c = ref[0]; + d = ref[1]; + e = ref[2]; + f = ref[3]; + tmp32 = a + f - 5 * (b + e) + 20 * (c + d); + *dst_16++ = tmp32; + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *dst++ = tmp32; + + a = ref[4]; + tmp32 = b + a - 5 * (c + f) + 20 * (d + e); + *dst_16++ = tmp32; + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *dst++ = tmp32; + + b = ref[5]; + tmp32 = c + b - 5 * (d + a) + 20 * (e + f); + *dst_16++ = tmp32; + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *dst++ = tmp32; + + c = ref[6]; + tmp32 = d + c - 5 * (e + b) + 20 * (f + a); + *dst_16++ = tmp32; + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *dst++ = tmp32; + + ref += 4; + } + /* do the 17th column here */ + d = ref[3]; + tmp32 = e + d - 5 * (f + c) + 20 * (a + b); + *dst_16 = tmp32; + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *dst = tmp32; + + dst += 8; /* stride for dst is 24 */ + dst_16 += 2; /* stride for tmp_horz is 18 */ + ref += 8; /* stride for ref is 24 */ + } + + + /* Do middle point filtering*/ + src_16 = tmp_horz; /* 17 x 22 */ + dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/ + dst -= 24; // offset + for (i = 0; i < 17; i++) + { + for (j = 16; j > 0; j -= 4) + { + a = *src_16; + b = *(src_16 += 18); + c = *(src_16 += 18); + d = *(src_16 += 18); + e = *(src_16 += 18); + f = *(src_16 += 18); + + tmp32 = a + f - 5 * (b + e) + 20 * (c + d); + tmp32 = (tmp32 + 512) >> 10; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; + + a = *(src_16 += 18); + tmp32 = b + a - 5 * (c + f) + 20 * (d + e); + tmp32 = (tmp32 + 512) >> 10; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; + + b = *(src_16 += 18); + tmp32 = c + b - 5 * (d + a) + 20 * (e + f); + tmp32 = (tmp32 + 512) >> 10; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; + + c = *(src_16 += 18); + tmp32 = d + c - 5 * (e + b) + 20 * (f + a); + tmp32 = (tmp32 + 512) >> 10; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; + + src_16 -= (18 << 2); + } + + d = src_16[90]; // 18*5 + tmp32 = e + d - 5 * (f + c) + 20 * (a + b); + tmp32 = (tmp32 + 512) >> 10; + CLIP_RESULT(tmp32) + dst[24] = tmp32; + + src_16 -= ((18 << 4) - 1); + dst -= ((24 << 4) - 1); + } + + /* do vertical interpolation */ + ref = subpel_pred + 2; + dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */ + dst -= 24; // offset + + for (i = 2; i > 0; i--) + { + for (j = 16; j > 0; j -= 4) + { + a = *ref; + b = *(ref += 24); + c = *(ref += 24); + d = *(ref += 24); + e = *(ref += 24); + f = *(ref += 24); + + tmp32 = a + f - 5 * (b + e) + 20 * (c + d); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + a = *(ref += 24); + tmp32 = b + a - 5 * (c + f) + 20 * (d + e); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + b = *(ref += 24); + tmp32 = c + b - 5 * (d + a) + 20 * (e + f); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + c = *(ref += 24); + tmp32 = d + c - 5 * (e + b) + 20 * (f + a); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + ref -= (24 << 2); + } + + d = ref[120]; // 24*5 + tmp32 = e + d - 5 * (f + c) + 20 * (a + b); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + dst[24] = tmp32; // 10th + + dst -= ((24 << 4) - 1); + ref -= ((24 << 4) - 1); + } + + // note that using SIMD here doesn't help much, the cycle almost stays the same + // one can just use the above code and change the for(i=2 to for(i=18 + for (i = 16; i > 0; i -= 4) + { + msk = 0; + for (j = 17; j > 0; j--) + { + a = *((uint32*)ref); /* load 4 bytes */ + b = (a >> 8) & 0xFF00FF; /* second and fourth byte */ + a &= 0xFF00FF; + + c = *((uint32*)(ref + 120)); + d = (c >> 8) & 0xFF00FF; + c &= 0xFF00FF; + + a += c; + b += d; + + e = *((uint32*)(ref + 72)); /* e, f */ + f = (e >> 8) & 0xFF00FF; + e &= 0xFF00FF; + + c = *((uint32*)(ref + 48)); /* c, d */ + d = (c >> 8) & 0xFF00FF; + c &= 0xFF00FF; + + c += e; + d += f; + + a += 20 * c; + b += 20 * d; + a += 0x100010; + b += 0x100010; + + e = *((uint32*)(ref += 24)); /* e, f */ + f = (e >> 8) & 0xFF00FF; + e &= 0xFF00FF; + + c = *((uint32*)(ref + 72)); /* c, d */ + d = (c >> 8) & 0xFF00FF; + c &= 0xFF00FF; + + c += e; + d += f; + + a -= 5 * c; + b -= 5 * d; + + c = a << 16; + d = b << 16; + CLIP_UPPER16(a) + CLIP_UPPER16(c) + CLIP_UPPER16(b) + CLIP_UPPER16(d) + + a |= (c >> 16); + b |= (d >> 16); + // a>>=5; + // b>>=5; + /* clip */ + // msk |= b; msk|=a; + // a &= 0xFF00FF; + // b &= 0xFF00FF; + a |= (b << 8); /* pack it back */ + + *((uint16*)(dst += 24)) = a & 0xFFFF; //dst is not word-aligned. + *((uint16*)(dst + 2)) = a >> 16; + + } + dst -= 404; // 24*17-4 + ref -= 404; + /* if(msk & 0xFF00FF00) // need clipping + { + VertInterpWClip(dst,ref); // re-do 4 column with clip + }*/ + } + + return ; +} + +void VertInterpWClip(uint8 *dst, uint8 *ref) +{ + int i, j; + int a, b, c, d, e, f; + int32 tmp32; + + dst -= 4; + ref -= 4; + + for (i = 4; i > 0; i--) + { + for (j = 16; j > 0; j -= 4) + { + a = *ref; + b = *(ref += 24); + c = *(ref += 24); + d = *(ref += 24); + e = *(ref += 24); + f = *(ref += 24); + + tmp32 = a + f - 5 * (b + e) + 20 * (c + d); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + a = *(ref += 24); + tmp32 = b + a - 5 * (c + f) + 20 * (d + e); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + b = *(ref += 24); + tmp32 = c + b - 5 * (d + a) + 20 * (e + f); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + c = *(ref += 24); + tmp32 = d + c - 5 * (e + b) + 20 * (f + a); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + *(dst += 24) = tmp32; // 10th + + ref -= (24 << 2); + } + + d = ref[120]; // 24*5 + tmp32 = e + d - 5 * (f + c) + 20 * (a + b); + tmp32 = (tmp32 + 16) >> 5; + CLIP_RESULT(tmp32) + dst[24] = tmp32; // 10th + + dst -= ((24 << 4) - 1); + ref -= ((24 << 4) - 1); + } + + return ; +} + + +void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_cand, int hpel_pos) +{ + // for even value of hpel_pos, start with pattern 1, otherwise, start with pattern 2 + int i, j; + + uint8 *c1 = qpel_cand; + uint8 *tl = bilin_base[0]; + uint8 *tr = bilin_base[1]; + uint8 *bl = bilin_base[2]; + uint8 *br = bilin_base[3]; + int a, b, c, d; + int offset = 1 - (384 * 7); + + if (!(hpel_pos&1)) // diamond pattern + { + j = 16; + while (j--) + { + i = 16; + while (i--) + { + d = tr[24]; + a = *tr++; + b = bl[1]; + c = *br++; + + *c1 = (c + a + 1) >> 1; + *(c1 += 384) = (b + a + 1) >> 1; /* c2 */ + *(c1 += 384) = (b + c + 1) >> 1; /* c3 */ + *(c1 += 384) = (b + d + 1) >> 1; /* c4 */ + + b = *bl++; + + *(c1 += 384) = (c + d + 1) >> 1; /* c5 */ + *(c1 += 384) = (b + d + 1) >> 1; /* c6 */ + *(c1 += 384) = (b + c + 1) >> 1; /* c7 */ + *(c1 += 384) = (b + a + 1) >> 1; /* c8 */ + + c1 += offset; + } + // advance to the next line, pitch is 24 + tl += 8; + tr += 8; + bl += 8; + br += 8; + c1 += 8; + } + } + else // star pattern + { + j = 16; + while (j--) + { + i = 16; + while (i--) + { + a = *br++; + b = *tr++; + c = tl[1]; + *c1 = (a + b + 1) >> 1; + b = bl[1]; + *(c1 += 384) = (a + c + 1) >> 1; /* c2 */ + c = tl[25]; + *(c1 += 384) = (a + b + 1) >> 1; /* c3 */ + b = tr[23]; + *(c1 += 384) = (a + c + 1) >> 1; /* c4 */ + c = tl[24]; + *(c1 += 384) = (a + b + 1) >> 1; /* c5 */ + b = *bl++; + *(c1 += 384) = (a + c + 1) >> 1; /* c6 */ + c = *tl++; + *(c1 += 384) = (a + b + 1) >> 1; /* c7 */ + *(c1 += 384) = (a + c + 1) >> 1; /* c8 */ + + c1 += offset; + } + // advance to the next line, pitch is 24 + tl += 8; + tr += 8; + bl += 8; + br += 8; + c1 += 8; + } + } + + return ; +} + + +/* assuming cand always has a pitch of 24 */ +int SATD_MB(uint8 *cand, uint8 *cur, int dmin) +{ + int cost; + + + dmin = (dmin << 16) | 24; + cost = AVCSAD_Macroblock_C(cand, cur, dmin, NULL); + + return cost; +} + + + + + diff --git a/media/libstagefright/codecs/avc/enc/src/header.cpp b/media/libstagefright/codecs/avc/enc/src/header.cpp new file mode 100644 index 0000000..9acff9e --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/header.cpp @@ -0,0 +1,917 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +#include "avcenc_api.h" + +/** see subclause 7.4.2.1 */ +/* no need for checking the valid range , already done in SetEncodeParam(), +if we have to send another SPS, the ranges should be verified first before +users call PVAVCEncodeSPS() */ +AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream) +{ + AVCCommonObj *video = encvid->common; + AVCSeqParamSet *seqParam = video->currSeqParams; + AVCVUIParams *vui = &(seqParam->vui_parameters); + int i; + AVCEnc_Status status = AVCENC_SUCCESS; + + //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"EncodeSPS",-1,-1); + + status = BitstreamWriteBits(stream, 8, seqParam->profile_idc); + status = BitstreamWrite1Bit(stream, seqParam->constrained_set0_flag); + status = BitstreamWrite1Bit(stream, seqParam->constrained_set1_flag); + status = BitstreamWrite1Bit(stream, seqParam->constrained_set2_flag); + status = BitstreamWrite1Bit(stream, seqParam->constrained_set3_flag); + status = BitstreamWriteBits(stream, 4, 0); /* forbidden zero bits */ + if (status != AVCENC_SUCCESS) /* we can check after each write also */ + { + return status; + } + + status = BitstreamWriteBits(stream, 8, seqParam->level_idc); + status = ue_v(stream, seqParam->seq_parameter_set_id); + status = ue_v(stream, seqParam->log2_max_frame_num_minus4); + status = ue_v(stream, seqParam->pic_order_cnt_type); + if (status != AVCENC_SUCCESS) + { + return status; + } + + if (seqParam->pic_order_cnt_type == 0) + { + status = ue_v(stream, seqParam->log2_max_pic_order_cnt_lsb_minus4); + } + else if (seqParam->pic_order_cnt_type == 1) + { + status = BitstreamWrite1Bit(stream, seqParam->delta_pic_order_always_zero_flag); + status = se_v(stream, seqParam->offset_for_non_ref_pic); /* upto 32 bits */ + status = se_v(stream, seqParam->offset_for_top_to_bottom_field); /* upto 32 bits */ + status = ue_v(stream, seqParam->num_ref_frames_in_pic_order_cnt_cycle); + + for (i = 0; i < (int)(seqParam->num_ref_frames_in_pic_order_cnt_cycle); i++) + { + status = se_v(stream, seqParam->offset_for_ref_frame[i]); /* upto 32 bits */ + } + } + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = ue_v(stream, seqParam->num_ref_frames); + status = BitstreamWrite1Bit(stream, seqParam->gaps_in_frame_num_value_allowed_flag); + status = ue_v(stream, seqParam->pic_width_in_mbs_minus1); + status = ue_v(stream, seqParam->pic_height_in_map_units_minus1); + status = BitstreamWrite1Bit(stream, seqParam->frame_mbs_only_flag); + if (status != AVCENC_SUCCESS) + { + return status; + } + /* if frame_mbs_only_flag is 0, then write, mb_adaptive_frame_field_frame here */ + + status = BitstreamWrite1Bit(stream, seqParam->direct_8x8_inference_flag); + status = BitstreamWrite1Bit(stream, seqParam->frame_cropping_flag); + if (seqParam->frame_cropping_flag) + { + status = ue_v(stream, seqParam->frame_crop_left_offset); + status = ue_v(stream, seqParam->frame_crop_right_offset); + status = ue_v(stream, seqParam->frame_crop_top_offset); + status = ue_v(stream, seqParam->frame_crop_bottom_offset); + } + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = BitstreamWrite1Bit(stream, seqParam->vui_parameters_present_flag); + if (seqParam->vui_parameters_present_flag) + { + /* not supported */ + //return AVCENC_SPS_FAIL; + EncodeVUI(stream, vui); + } + + return status; +} + + +void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui) +{ + int temp; + + temp = vui->aspect_ratio_info_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWriteBits(stream, 8, vui->aspect_ratio_idc); + if (vui->aspect_ratio_idc == 255) + { + BitstreamWriteBits(stream, 16, vui->sar_width); + BitstreamWriteBits(stream, 16, vui->sar_height); + } + } + temp = vui->overscan_info_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWrite1Bit(stream, vui->overscan_appropriate_flag); + } + temp = vui->video_signal_type_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWriteBits(stream, 3, vui->video_format); + BitstreamWrite1Bit(stream, vui->video_full_range_flag); + temp = vui->colour_description_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWriteBits(stream, 8, vui->colour_primaries); + BitstreamWriteBits(stream, 8, vui->transfer_characteristics); + BitstreamWriteBits(stream, 8, vui->matrix_coefficients); + } + } + temp = vui->chroma_location_info_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + ue_v(stream, vui->chroma_sample_loc_type_top_field); + ue_v(stream, vui->chroma_sample_loc_type_bottom_field); + } + + temp = vui->timing_info_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWriteBits(stream, 32, vui->num_units_in_tick); + BitstreamWriteBits(stream, 32, vui->time_scale); + BitstreamWrite1Bit(stream, vui->fixed_frame_rate_flag); + } + + temp = vui->nal_hrd_parameters_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + EncodeHRD(stream, &(vui->nal_hrd_parameters)); + } + temp = vui->vcl_hrd_parameters_present_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + EncodeHRD(stream, &(vui->vcl_hrd_parameters)); + } + if (vui->nal_hrd_parameters_present_flag || vui->vcl_hrd_parameters_present_flag) + { + BitstreamWrite1Bit(stream, vui->low_delay_hrd_flag); + } + BitstreamWrite1Bit(stream, vui->pic_struct_present_flag); + temp = vui->bitstream_restriction_flag; + BitstreamWrite1Bit(stream, temp); + if (temp) + { + BitstreamWrite1Bit(stream, vui->motion_vectors_over_pic_boundaries_flag); + ue_v(stream, vui->max_bytes_per_pic_denom); + ue_v(stream, vui->max_bits_per_mb_denom); + ue_v(stream, vui->log2_max_mv_length_horizontal); + ue_v(stream, vui->log2_max_mv_length_vertical); + ue_v(stream, vui->max_dec_frame_reordering); + ue_v(stream, vui->max_dec_frame_buffering); + } + + return ; +} + + +void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd) +{ + int i; + + ue_v(stream, hrd->cpb_cnt_minus1); + BitstreamWriteBits(stream, 4, hrd->bit_rate_scale); + BitstreamWriteBits(stream, 4, hrd->cpb_size_scale); + for (i = 0; i <= (int)hrd->cpb_cnt_minus1; i++) + { + ue_v(stream, hrd->bit_rate_value_minus1[i]); + ue_v(stream, hrd->cpb_size_value_minus1[i]); + ue_v(stream, hrd->cbr_flag[i]); + } + BitstreamWriteBits(stream, 5, hrd->initial_cpb_removal_delay_length_minus1); + BitstreamWriteBits(stream, 5, hrd->cpb_removal_delay_length_minus1); + BitstreamWriteBits(stream, 5, hrd->dpb_output_delay_length_minus1); + BitstreamWriteBits(stream, 5, hrd->time_offset_length); + + return ; +} + + + +/** see subclause 7.4.2.2 */ +/* no need for checking the valid range , already done in SetEncodeParam(). +If we have to send another SPS, the ranges should be verified first before +users call PVAVCEncodeSPS()*/ +AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream) +{ + AVCCommonObj *video = encvid->common; + AVCEnc_Status status = AVCENC_SUCCESS; + AVCPicParamSet *picParam = video->currPicParams; + int i, iGroup, numBits; + uint temp; + + status = ue_v(stream, picParam->pic_parameter_set_id); + status = ue_v(stream, picParam->seq_parameter_set_id); + status = BitstreamWrite1Bit(stream, picParam->entropy_coding_mode_flag); + status = BitstreamWrite1Bit(stream, picParam->pic_order_present_flag); + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = ue_v(stream, picParam->num_slice_groups_minus1); + if (picParam->num_slice_groups_minus1 > 0) + { + status = ue_v(stream, picParam->slice_group_map_type); + if (picParam->slice_group_map_type == 0) + { + for (iGroup = 0; iGroup <= (int)picParam->num_slice_groups_minus1; iGroup++) + { + status = ue_v(stream, picParam->run_length_minus1[iGroup]); + } + } + else if (picParam->slice_group_map_type == 2) + { + for (iGroup = 0; iGroup < (int)picParam->num_slice_groups_minus1; iGroup++) + { + status = ue_v(stream, picParam->top_left[iGroup]); + status = ue_v(stream, picParam->bottom_right[iGroup]); + } + } + else if (picParam->slice_group_map_type == 3 || + picParam->slice_group_map_type == 4 || + picParam->slice_group_map_type == 5) + { + status = BitstreamWrite1Bit(stream, picParam->slice_group_change_direction_flag); + status = ue_v(stream, picParam->slice_group_change_rate_minus1); + } + else /*if(picParam->slice_group_map_type == 6)*/ + { + status = ue_v(stream, picParam->pic_size_in_map_units_minus1); + + numBits = 0;/* ceil(log2(num_slice_groups_minus1+1)) bits */ + i = picParam->num_slice_groups_minus1; + while (i > 0) + { + numBits++; + i >>= 1; + } + + for (i = 0; i <= (int)picParam->pic_size_in_map_units_minus1; i++) + { + status = BitstreamWriteBits(stream, numBits, picParam->slice_group_id[i]); + } + } + } + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = ue_v(stream, picParam->num_ref_idx_l0_active_minus1); + status = ue_v(stream, picParam->num_ref_idx_l1_active_minus1); + status = BitstreamWrite1Bit(stream, picParam->weighted_pred_flag); + status = BitstreamWriteBits(stream, 2, picParam->weighted_bipred_idc); + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = se_v(stream, picParam->pic_init_qp_minus26); + status = se_v(stream, picParam->pic_init_qs_minus26); + status = se_v(stream, picParam->chroma_qp_index_offset); + + temp = picParam->deblocking_filter_control_present_flag << 2; + temp |= (picParam->constrained_intra_pred_flag << 1); + temp |= picParam->redundant_pic_cnt_present_flag; + + status = BitstreamWriteBits(stream, 3, temp); + + return status; +} + +/** see subclause 7.4.3 */ +AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream) +{ + AVCCommonObj *video = encvid->common; + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCPicParamSet *currPPS = video->currPicParams; + AVCSeqParamSet *currSPS = video->currSeqParams; + AVCEnc_Status status = AVCENC_SUCCESS; + int slice_type, temp, i; + int num_bits; + + num_bits = (stream->write_pos << 3) - stream->bit_left; + + status = ue_v(stream, sliceHdr->first_mb_in_slice); + + slice_type = video->slice_type; + + if (video->mbNum == 0) /* first mb in frame */ + { + status = ue_v(stream, sliceHdr->slice_type); + } + else + { + status = ue_v(stream, slice_type); + } + + status = ue_v(stream, sliceHdr->pic_parameter_set_id); + + status = BitstreamWriteBits(stream, currSPS->log2_max_frame_num_minus4 + 4, sliceHdr->frame_num); + + if (status != AVCENC_SUCCESS) + { + return status; + } + /* if frame_mbs_only_flag is 0, encode field_pic_flag, bottom_field_flag here */ + + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + status = ue_v(stream, sliceHdr->idr_pic_id); + } + + if (currSPS->pic_order_cnt_type == 0) + { + status = BitstreamWriteBits(stream, currSPS->log2_max_pic_order_cnt_lsb_minus4 + 4, + sliceHdr->pic_order_cnt_lsb); + + if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag) + { + status = se_v(stream, sliceHdr->delta_pic_order_cnt_bottom); /* 32 bits */ + } + } + if (currSPS->pic_order_cnt_type == 1 && !currSPS->delta_pic_order_always_zero_flag) + { + status = se_v(stream, sliceHdr->delta_pic_order_cnt[0]); /* 32 bits */ + if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag) + { + status = se_v(stream, sliceHdr->delta_pic_order_cnt[1]); /* 32 bits */ + } + } + + if (currPPS->redundant_pic_cnt_present_flag) + { + status = ue_v(stream, sliceHdr->redundant_pic_cnt); + } + + if (slice_type == AVC_B_SLICE) + { + status = BitstreamWrite1Bit(stream, sliceHdr->direct_spatial_mv_pred_flag); + } + + if (status != AVCENC_SUCCESS) + { + return status; + } + + if (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE || slice_type == AVC_B_SLICE) + { + status = BitstreamWrite1Bit(stream, sliceHdr->num_ref_idx_active_override_flag); + if (sliceHdr->num_ref_idx_active_override_flag) + { + /* we shouldn't enter this part at all */ + status = ue_v(stream, sliceHdr->num_ref_idx_l0_active_minus1); + if (slice_type == AVC_B_SLICE) + { + status = ue_v(stream, sliceHdr->num_ref_idx_l1_active_minus1); + } + } + } + if (status != AVCENC_SUCCESS) + { + return status; + } + + /* ref_pic_list_reordering() */ + status = ref_pic_list_reordering(video, stream, sliceHdr, slice_type); + if (status != AVCENC_SUCCESS) + { + return status; + } + + if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) || + (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE)) + { + // pred_weight_table(); // not supported !! + return AVCENC_PRED_WEIGHT_TAB_FAIL; + } + + if (video->nal_ref_idc != 0) + { + status = dec_ref_pic_marking(video, stream, sliceHdr); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + + if (currPPS->entropy_coding_mode_flag && slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE) + { + return AVCENC_CABAC_FAIL; + /* ue_v(stream,&(sliceHdr->cabac_init_idc)); + if(sliceHdr->cabac_init_idc > 2){ + // not supported !!!! + }*/ + } + + status = se_v(stream, sliceHdr->slice_qp_delta); + if (status != AVCENC_SUCCESS) + { + return status; + } + + if (slice_type == AVC_SP_SLICE || slice_type == AVC_SI_SLICE) + { + if (slice_type == AVC_SP_SLICE) + { + status = BitstreamWrite1Bit(stream, sliceHdr->sp_for_switch_flag); + /* if sp_for_switch_flag is 0, P macroblocks in SP slice is decoded using + SP decoding process for non-switching pictures in 8.6.1 */ + /* else, P macroblocks in SP slice is decoded using SP and SI decoding + process for switching picture in 8.6.2 */ + } + status = se_v(stream, sliceHdr->slice_qs_delta); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + + if (currPPS->deblocking_filter_control_present_flag) + { + + status = ue_v(stream, sliceHdr->disable_deblocking_filter_idc); + + if (sliceHdr->disable_deblocking_filter_idc != 1) + { + status = se_v(stream, sliceHdr->slice_alpha_c0_offset_div2); + + status = se_v(stream, sliceHdr->slice_beta_offset_div_2); + } + if (status != AVCENC_SUCCESS) + { + return status; + } + } + + if (currPPS->num_slice_groups_minus1 > 0 && currPPS->slice_group_map_type >= 3 + && currPPS->slice_group_map_type <= 5) + { + /* Ceil(Log2(PicSizeInMapUnits/(float)SliceGroupChangeRate + 1)) */ + temp = video->PicSizeInMapUnits / video->SliceGroupChangeRate; + if (video->PicSizeInMapUnits % video->SliceGroupChangeRate) + { + temp++; + } + i = 0; + while (temp > 1) + { + temp >>= 1; + i++; + } + + BitstreamWriteBits(stream, i, sliceHdr->slice_group_change_cycle); + } + + + encvid->rateCtrl->NumberofHeaderBits += (stream->write_pos << 3) - stream->bit_left - num_bits; + + return AVCENC_SUCCESS; +} + +/** see subclause 7.4.3.1 */ +AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type) +{ + (void)(video); + int i; + AVCEnc_Status status = AVCENC_SUCCESS; + + if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE) + { + status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l0); + if (sliceHdr->ref_pic_list_reordering_flag_l0) + { + i = 0; + do + { + status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l0[i]); + if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 || + sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1) + { + status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l0[i]); + /* this check should be in InitSlice(), if we ever use it */ + /*if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 && + sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -1) + { + return AVCENC_REF_PIC_REORDER_FAIL; // out of range + } + if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1 && + sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -2) + { + return AVCENC_REF_PIC_REORDER_FAIL; // out of range + }*/ + } + else if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 2) + { + status = ue_v(stream, sliceHdr->long_term_pic_num_l0[i]); + } + i++; + } + while (sliceHdr->reordering_of_pic_nums_idc_l0[i] != 3 + && i <= (int)sliceHdr->num_ref_idx_l0_active_minus1 + 1) ; + } + } + if (slice_type == AVC_B_SLICE) + { + status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l1); + if (sliceHdr->ref_pic_list_reordering_flag_l1) + { + i = 0; + do + { + status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l1[i]); + if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 || + sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1) + { + status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l1[i]); + /* This check should be in InitSlice() if we ever use it + if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 && + sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -1) + { + return AVCENC_REF_PIC_REORDER_FAIL; // out of range + } + if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1 && + sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -2) + { + return AVCENC_REF_PIC_REORDER_FAIL; // out of range + }*/ + } + else if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 2) + { + status = ue_v(stream, sliceHdr->long_term_pic_num_l1[i]); + } + i++; + } + while (sliceHdr->reordering_of_pic_nums_idc_l1[i] != 3 + && i <= (int)sliceHdr->num_ref_idx_l1_active_minus1 + 1) ; + } + } + + return status; +} + +/** see subclause 7.4.3.3 */ +AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr) +{ + int i; + AVCEnc_Status status = AVCENC_SUCCESS; + + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + status = BitstreamWrite1Bit(stream, sliceHdr->no_output_of_prior_pics_flag); + status = BitstreamWrite1Bit(stream, sliceHdr->long_term_reference_flag); + if (sliceHdr->long_term_reference_flag == 0) /* used for short-term */ + { + video->MaxLongTermFrameIdx = -1; /* no long-term frame indx */ + } + else /* used for long-term */ + { + video->MaxLongTermFrameIdx = 0; + video->LongTermFrameIdx = 0; + } + } + else + { + status = BitstreamWrite1Bit(stream, sliceHdr->adaptive_ref_pic_marking_mode_flag); /* default to zero */ + if (sliceHdr->adaptive_ref_pic_marking_mode_flag) + { + i = 0; + do + { + status = ue_v(stream, sliceHdr->memory_management_control_operation[i]); + if (sliceHdr->memory_management_control_operation[i] == 1 || + sliceHdr->memory_management_control_operation[i] == 3) + { + status = ue_v(stream, sliceHdr->difference_of_pic_nums_minus1[i]); + } + if (sliceHdr->memory_management_control_operation[i] == 2) + { + status = ue_v(stream, sliceHdr->long_term_pic_num[i]); + } + if (sliceHdr->memory_management_control_operation[i] == 3 || + sliceHdr->memory_management_control_operation[i] == 6) + { + status = ue_v(stream, sliceHdr->long_term_frame_idx[i]); + } + if (sliceHdr->memory_management_control_operation[i] == 4) + { + status = ue_v(stream, sliceHdr->max_long_term_frame_idx_plus1[i]); + } + i++; + } + while (sliceHdr->memory_management_control_operation[i] != 0 && i < MAX_DEC_REF_PIC_MARKING); + if (i >= MAX_DEC_REF_PIC_MARKING && sliceHdr->memory_management_control_operation[i] != 0) + { + return AVCENC_DEC_REF_PIC_MARK_FAIL; /* we're screwed!!, not enough memory */ + } + } + } + + return status; +} + +/* see subclause 8.2.1 Decoding process for picture order count. +See also PostPOC() for initialization of some variables. */ +AVCEnc_Status InitPOC(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCSeqParamSet *currSPS = video->currSeqParams; + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCFrameIO *currInput = encvid->currInput; + int i; + + switch (currSPS->pic_order_cnt_type) + { + case 0: /* POC MODE 0 , subclause 8.2.1.1 */ + /* encoding part */ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + encvid->dispOrdPOCRef = currInput->disp_order; + } + while (currInput->disp_order < encvid->dispOrdPOCRef) + { + encvid->dispOrdPOCRef -= video->MaxPicOrderCntLsb; + } + sliceHdr->pic_order_cnt_lsb = currInput->disp_order - encvid->dispOrdPOCRef; + while (sliceHdr->pic_order_cnt_lsb >= video->MaxPicOrderCntLsb) + { + sliceHdr->pic_order_cnt_lsb -= video->MaxPicOrderCntLsb; + } + /* decoding part */ + /* Calculate the MSBs of current picture */ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + video->prevPicOrderCntMsb = 0; + video->prevPicOrderCntLsb = 0; + } + if (sliceHdr->pic_order_cnt_lsb < video->prevPicOrderCntLsb && + (video->prevPicOrderCntLsb - sliceHdr->pic_order_cnt_lsb) >= (video->MaxPicOrderCntLsb / 2)) + video->PicOrderCntMsb = video->prevPicOrderCntMsb + video->MaxPicOrderCntLsb; + else if (sliceHdr->pic_order_cnt_lsb > video->prevPicOrderCntLsb && + (sliceHdr->pic_order_cnt_lsb - video->prevPicOrderCntLsb) > (video->MaxPicOrderCntLsb / 2)) + video->PicOrderCntMsb = video->prevPicOrderCntMsb - video->MaxPicOrderCntLsb; + else + video->PicOrderCntMsb = video->prevPicOrderCntMsb; + + /* JVT-I010 page 81 is different from JM7.3 */ + if (!sliceHdr->field_pic_flag || !sliceHdr->bottom_field_flag) + { + video->PicOrderCnt = video->TopFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb; + } + + if (!sliceHdr->field_pic_flag) + { + video->BottomFieldOrderCnt = video->TopFieldOrderCnt + sliceHdr->delta_pic_order_cnt_bottom; + } + else if (sliceHdr->bottom_field_flag) + { + video->PicOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb; + } + + if (!sliceHdr->field_pic_flag) + { + video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt); + } + + if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag) + { + sliceHdr->delta_pic_order_cnt_bottom = 0; /* defaulted to zero */ + } + + break; + case 1: /* POC MODE 1, subclause 8.2.1.2 */ + /* calculate FrameNumOffset */ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + encvid->dispOrdPOCRef = currInput->disp_order; /* reset the reference point */ + video->prevFrameNumOffset = 0; + video->FrameNumOffset = 0; + } + else if (video->prevFrameNum > sliceHdr->frame_num) + { + video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum; + } + else + { + video->FrameNumOffset = video->prevFrameNumOffset; + } + /* calculate absFrameNum */ + if (currSPS->num_ref_frames_in_pic_order_cnt_cycle) + { + video->absFrameNum = video->FrameNumOffset + sliceHdr->frame_num; + } + else + { + video->absFrameNum = 0; + } + + if (video->absFrameNum > 0 && video->nal_ref_idc == 0) + { + video->absFrameNum--; + } + + /* derive picOrderCntCycleCnt and frameNumInPicOrderCntCycle */ + if (video->absFrameNum > 0) + { + video->picOrderCntCycleCnt = (video->absFrameNum - 1) / currSPS->num_ref_frames_in_pic_order_cnt_cycle; + video->frameNumInPicOrderCntCycle = (video->absFrameNum - 1) % currSPS->num_ref_frames_in_pic_order_cnt_cycle; + } + /* derive expectedDeltaPerPicOrderCntCycle, this value can be computed up front. */ + video->expectedDeltaPerPicOrderCntCycle = 0; + for (i = 0; i < (int)currSPS->num_ref_frames_in_pic_order_cnt_cycle; i++) + { + video->expectedDeltaPerPicOrderCntCycle += currSPS->offset_for_ref_frame[i]; + } + /* derive expectedPicOrderCnt */ + if (video->absFrameNum) + { + video->expectedPicOrderCnt = video->picOrderCntCycleCnt * video->expectedDeltaPerPicOrderCntCycle; + for (i = 0; i <= video->frameNumInPicOrderCntCycle; i++) + { + video->expectedPicOrderCnt += currSPS->offset_for_ref_frame[i]; + } + } + else + { + video->expectedPicOrderCnt = 0; + } + + if (video->nal_ref_idc == 0) + { + video->expectedPicOrderCnt += currSPS->offset_for_non_ref_pic; + } + /* derive TopFieldOrderCnt and BottomFieldOrderCnt */ + /* encoding part */ + if (!currSPS->delta_pic_order_always_zero_flag) + { + sliceHdr->delta_pic_order_cnt[0] = currInput->disp_order - encvid->dispOrdPOCRef - video->expectedPicOrderCnt; + + if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag) + { + sliceHdr->delta_pic_order_cnt[1] = sliceHdr->delta_pic_order_cnt[0]; /* should be calculated from currInput->bottom_field->disp_order */ + } + else + { + sliceHdr->delta_pic_order_cnt[1] = 0; + } + } + else + { + sliceHdr->delta_pic_order_cnt[0] = sliceHdr->delta_pic_order_cnt[1] = 0; + } + + if (sliceHdr->field_pic_flag == 0) + { + video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0]; + video->BottomFieldOrderCnt = video->TopFieldOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[1]; + + video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt); + } + else if (sliceHdr->bottom_field_flag == 0) + { + video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0]; + video->PicOrderCnt = video->TopFieldOrderCnt; + } + else + { + video->BottomFieldOrderCnt = video->expectedPicOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[0]; + video->PicOrderCnt = video->BottomFieldOrderCnt; + } + break; + + + case 2: /* POC MODE 2, subclause 8.2.1.3 */ + /* decoding order must be the same as display order */ + /* we don't check for that. The decoder will just output in decoding order. */ + /* Check for 2 consecutive non-reference frame */ + if (video->nal_ref_idc == 0) + { + if (encvid->dispOrdPOCRef == 1) + { + return AVCENC_CONSECUTIVE_NONREF; + } + encvid->dispOrdPOCRef = 1; /* act as a flag for non ref */ + } + else + { + encvid->dispOrdPOCRef = 0; + } + + + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + video->FrameNumOffset = 0; + } + else if (video->prevFrameNum > sliceHdr->frame_num) + { + video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum; + } + else + { + video->FrameNumOffset = video->prevFrameNumOffset; + } + /* derive tempPicOrderCnt, we just use PicOrderCnt */ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + video->PicOrderCnt = 0; + } + else if (video->nal_ref_idc == 0) + { + video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num) - 1; + } + else + { + video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num); + } + /* derive TopFieldOrderCnt and BottomFieldOrderCnt */ + if (sliceHdr->field_pic_flag == 0) + { + video->TopFieldOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCnt; + } + else if (sliceHdr->bottom_field_flag) + { + video->BottomFieldOrderCnt = video->PicOrderCnt; + } + else + { + video->TopFieldOrderCnt = video->PicOrderCnt; + } + break; + default: + return AVCENC_POC_FAIL; + } + + return AVCENC_SUCCESS; +} + +/** see subclause 8.2.1 */ +AVCEnc_Status PostPOC(AVCCommonObj *video) +{ + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCSeqParamSet *currSPS = video->currSeqParams; + + video->prevFrameNum = sliceHdr->frame_num; + + switch (currSPS->pic_order_cnt_type) + { + case 0: /* subclause 8.2.1.1 */ + if (video->mem_mgr_ctrl_eq_5) + { + video->prevPicOrderCntMsb = 0; + video->prevPicOrderCntLsb = video->TopFieldOrderCnt; + } + else + { + video->prevPicOrderCntMsb = video->PicOrderCntMsb; + video->prevPicOrderCntLsb = sliceHdr->pic_order_cnt_lsb; + } + break; + case 1: /* subclause 8.2.1.2 and 8.2.1.3 */ + case 2: + if (video->mem_mgr_ctrl_eq_5) + { + video->prevFrameNumOffset = 0; + } + else + { + video->prevFrameNumOffset = video->FrameNumOffset; + } + break; + } + + return AVCENC_SUCCESS; +} + diff --git a/media/libstagefright/codecs/avc/enc/src/init.cpp b/media/libstagefright/codecs/avc/enc/src/init.cpp new file mode 100644 index 0000000..c258b57 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/init.cpp @@ -0,0 +1,899 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +#include "avcenc_api.h" + +#define LOG2_MAX_FRAME_NUM_MINUS4 12 /* 12 default */ +#define SLICE_GROUP_CHANGE_CYCLE 1 /* default */ + +/* initialized variables to be used in SPS*/ +AVCEnc_Status SetEncodeParam(AVCHandle* avcHandle, AVCEncParams* encParam, + void* extSPS, void* extPPS) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCCommonObj *video = encvid->common; + AVCSeqParamSet *seqParam = video->currSeqParams; + AVCPicParamSet *picParam = video->currPicParams; + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCEnc_Status status; + void *userData = avcHandle->userData; + int ii, maxFrameNum; + + AVCSeqParamSet* extS = NULL; + AVCPicParamSet* extP = NULL; + + if (extSPS) extS = (AVCSeqParamSet*) extSPS; + if (extPPS) extP = (AVCPicParamSet*) extPPS; + + /* This part sets the default values of the encoding options this + library supports in seqParam, picParam and sliceHdr structures and + also copy the values from the encParam into the above 3 structures. + + Some parameters will be assigned later when we encode SPS or PPS such as + the seq_parameter_id or pic_parameter_id. Also some of the slice parameters + have to be re-assigned per slice basis such as frame_num, slice_type, + first_mb_in_slice, pic_order_cnt_lsb, slice_qp_delta, slice_group_change_cycle */ + + /* profile_idc, constrained_setx_flag and level_idc is set by VerifyProfile(), + and VerifyLevel() functions later. */ + + encvid->fullsearch_enable = encParam->fullsearch; + + encvid->outOfBandParamSet = ((encParam->out_of_band_param_set == AVC_ON) ? TRUE : FALSE); + + /* parameters derived from the the encParam that are used in SPS */ + if (extS) + { + video->MaxPicOrderCntLsb = 1 << (extS->log2_max_pic_order_cnt_lsb_minus4 + 4); + video->PicWidthInMbs = extS->pic_width_in_mbs_minus1 + 1; + video->PicHeightInMapUnits = extS->pic_height_in_map_units_minus1 + 1 ; + video->FrameHeightInMbs = (2 - extS->frame_mbs_only_flag) * video->PicHeightInMapUnits ; + } + else + { + video->MaxPicOrderCntLsb = 1 << (encParam->log2_max_poc_lsb_minus_4 + 4); + video->PicWidthInMbs = (encParam->width + 15) >> 4; /* round it to multiple of 16 */ + video->FrameHeightInMbs = (encParam->height + 15) >> 4; /* round it to multiple of 16 */ + video->PicHeightInMapUnits = video->FrameHeightInMbs; + } + + video->PicWidthInSamplesL = video->PicWidthInMbs * 16 ; + if (video->PicWidthInSamplesL + 32 > 0xFFFF) + { + return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch + } + + video->PicWidthInSamplesC = video->PicWidthInMbs * 8 ; + video->PicHeightInMbs = video->FrameHeightInMbs; + video->PicSizeInMapUnits = video->PicWidthInMbs * video->PicHeightInMapUnits ; + video->PicHeightInSamplesL = video->PicHeightInMbs * 16; + video->PicHeightInSamplesC = video->PicHeightInMbs * 8; + video->PicSizeInMbs = video->PicWidthInMbs * video->PicHeightInMbs; + + if (!extS && !extP) + { + maxFrameNum = (encParam->idr_period == -1) ? (1 << 16) : encParam->idr_period; + ii = 0; + while (maxFrameNum > 0) + { + ii++; + maxFrameNum >>= 1; + } + if (ii < 4) ii = 4; + else if (ii > 16) ii = 16; + + seqParam->log2_max_frame_num_minus4 = ii - 4;//LOG2_MAX_FRAME_NUM_MINUS4; /* default */ + + video->MaxFrameNum = 1 << ii; //(LOG2_MAX_FRAME_NUM_MINUS4 + 4); /* default */ + video->MaxPicNum = video->MaxFrameNum; + + /************* set the SPS *******************/ + seqParam->seq_parameter_set_id = 0; /* start with zero */ + /* POC */ + seqParam->pic_order_cnt_type = encParam->poc_type; /* POC type */ + if (encParam->poc_type == 0) + { + if (/*encParam->log2_max_poc_lsb_minus_4<0 || (no need, it's unsigned)*/ + encParam->log2_max_poc_lsb_minus_4 > 12) + { + return AVCENC_INVALID_POC_LSB; + } + seqParam->log2_max_pic_order_cnt_lsb_minus4 = encParam->log2_max_poc_lsb_minus_4; + } + else if (encParam->poc_type == 1) + { + seqParam->delta_pic_order_always_zero_flag = encParam->delta_poc_zero_flag; + seqParam->offset_for_non_ref_pic = encParam->offset_poc_non_ref; + seqParam->offset_for_top_to_bottom_field = encParam->offset_top_bottom; + seqParam->num_ref_frames_in_pic_order_cnt_cycle = encParam->num_ref_in_cycle; + if (encParam->offset_poc_ref == NULL) + { + return AVCENC_ENCPARAM_MEM_FAIL; + } + for (ii = 0; ii < encParam->num_ref_frame; ii++) + { + seqParam->offset_for_ref_frame[ii] = encParam->offset_poc_ref[ii]; + } + } + /* number of reference frame */ + if (encParam->num_ref_frame > 16 || encParam->num_ref_frame < 0) + { + return AVCENC_INVALID_NUM_REF; + } + seqParam->num_ref_frames = encParam->num_ref_frame; /* num reference frame range 0...16*/ + seqParam->gaps_in_frame_num_value_allowed_flag = FALSE; + seqParam->pic_width_in_mbs_minus1 = video->PicWidthInMbs - 1; + seqParam->pic_height_in_map_units_minus1 = video->PicHeightInMapUnits - 1; + seqParam->frame_mbs_only_flag = TRUE; + seqParam->mb_adaptive_frame_field_flag = FALSE; + seqParam->direct_8x8_inference_flag = FALSE; /* default */ + seqParam->frame_cropping_flag = FALSE; + seqParam->frame_crop_bottom_offset = 0; + seqParam->frame_crop_left_offset = 0; + seqParam->frame_crop_right_offset = 0; + seqParam->frame_crop_top_offset = 0; + seqParam->vui_parameters_present_flag = FALSE; /* default */ + } + else if (extS) // use external SPS and PPS + { + seqParam->seq_parameter_set_id = extS->seq_parameter_set_id; + seqParam->log2_max_frame_num_minus4 = extS->log2_max_frame_num_minus4; + video->MaxFrameNum = 1 << (extS->log2_max_frame_num_minus4 + 4); + video->MaxPicNum = video->MaxFrameNum; + if (encParam->idr_period > (int)(video->MaxFrameNum) || (encParam->idr_period == -1)) + { + encParam->idr_period = (int)video->MaxFrameNum; + } + + seqParam->pic_order_cnt_type = extS->pic_order_cnt_type; + if (seqParam->pic_order_cnt_type == 0) + { + if (/*extS->log2_max_pic_order_cnt_lsb_minus4<0 || (no need it's unsigned)*/ + extS->log2_max_pic_order_cnt_lsb_minus4 > 12) + { + return AVCENC_INVALID_POC_LSB; + } + seqParam->log2_max_pic_order_cnt_lsb_minus4 = extS->log2_max_pic_order_cnt_lsb_minus4; + } + else if (seqParam->pic_order_cnt_type == 1) + { + seqParam->delta_pic_order_always_zero_flag = extS->delta_pic_order_always_zero_flag; + seqParam->offset_for_non_ref_pic = extS->offset_for_non_ref_pic; + seqParam->offset_for_top_to_bottom_field = extS->offset_for_top_to_bottom_field; + seqParam->num_ref_frames_in_pic_order_cnt_cycle = extS->num_ref_frames_in_pic_order_cnt_cycle; + if (extS->offset_for_ref_frame == NULL) + { + return AVCENC_ENCPARAM_MEM_FAIL; + } + for (ii = 0; ii < (int) extS->num_ref_frames; ii++) + { + seqParam->offset_for_ref_frame[ii] = extS->offset_for_ref_frame[ii]; + } + } + /* number of reference frame */ + if (extS->num_ref_frames > 16 /*|| extS->num_ref_frames<0 (no need, it's unsigned)*/) + { + return AVCENC_INVALID_NUM_REF; + } + seqParam->num_ref_frames = extS->num_ref_frames; /* num reference frame range 0...16*/ + seqParam->gaps_in_frame_num_value_allowed_flag = extS->gaps_in_frame_num_value_allowed_flag; + seqParam->pic_width_in_mbs_minus1 = extS->pic_width_in_mbs_minus1; + seqParam->pic_height_in_map_units_minus1 = extS->pic_height_in_map_units_minus1; + seqParam->frame_mbs_only_flag = extS->frame_mbs_only_flag; + if (extS->frame_mbs_only_flag != TRUE) + { + return AVCENC_NOT_SUPPORTED; + } + seqParam->mb_adaptive_frame_field_flag = extS->mb_adaptive_frame_field_flag; + if (extS->mb_adaptive_frame_field_flag != FALSE) + { + return AVCENC_NOT_SUPPORTED; + } + + seqParam->direct_8x8_inference_flag = extS->direct_8x8_inference_flag; + seqParam->frame_cropping_flag = extS->frame_cropping_flag ; + if (extS->frame_cropping_flag != FALSE) + { + return AVCENC_NOT_SUPPORTED; + } + + seqParam->frame_crop_bottom_offset = 0; + seqParam->frame_crop_left_offset = 0; + seqParam->frame_crop_right_offset = 0; + seqParam->frame_crop_top_offset = 0; + seqParam->vui_parameters_present_flag = extS->vui_parameters_present_flag; + if (extS->vui_parameters_present_flag) + { + memcpy(&(seqParam->vui_parameters), &(extS->vui_parameters), sizeof(AVCVUIParams)); + } + } + else + { + return AVCENC_NOT_SUPPORTED; + } + + /***************** now PPS ******************************/ + if (!extP && !extS) + { + picParam->pic_parameter_set_id = (uint)(-1); /* start with zero */ + picParam->seq_parameter_set_id = (uint)(-1); /* start with zero */ + picParam->entropy_coding_mode_flag = 0; /* default to CAVLC */ + picParam->pic_order_present_flag = 0; /* default for now, will need it for B-slice */ + /* FMO */ + if (encParam->num_slice_group < 1 || encParam->num_slice_group > MAX_NUM_SLICE_GROUP) + { + return AVCENC_INVALID_NUM_SLICEGROUP; + } + picParam->num_slice_groups_minus1 = encParam->num_slice_group - 1; + + if (picParam->num_slice_groups_minus1 > 0) + { + picParam->slice_group_map_type = encParam->fmo_type; + switch (encParam->fmo_type) + { + case 0: + for (ii = 0; ii <= (int)picParam->num_slice_groups_minus1; ii++) + { + picParam->run_length_minus1[ii] = encParam->run_length_minus1[ii]; + } + break; + case 2: + for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++) + { + picParam->top_left[ii] = encParam->top_left[ii]; + picParam->bottom_right[ii] = encParam->bottom_right[ii]; + } + break; + case 3: + case 4: + case 5: + if (encParam->change_dir_flag == AVC_ON) + { + picParam->slice_group_change_direction_flag = TRUE; + } + else + { + picParam->slice_group_change_direction_flag = FALSE; + } + if (/*encParam->change_rate_minus1 < 0 || (no need it's unsigned) */ + encParam->change_rate_minus1 > video->PicSizeInMapUnits - 1) + { + return AVCENC_INVALID_CHANGE_RATE; + } + picParam->slice_group_change_rate_minus1 = encParam->change_rate_minus1; + video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1; + break; + case 6: + picParam->pic_size_in_map_units_minus1 = video->PicSizeInMapUnits - 1; + + /* allocate picParam->slice_group_id */ + picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR); + if (picParam->slice_group_id == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + if (encParam->slice_group == NULL) + { + return AVCENC_ENCPARAM_MEM_FAIL; + } + for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++) + { + picParam->slice_group_id[ii] = encParam->slice_group[ii]; + } + break; + default: + return AVCENC_INVALID_FMO_TYPE; + } + } + picParam->num_ref_idx_l0_active_minus1 = encParam->num_ref_frame - 1; /* assume frame only */ + picParam->num_ref_idx_l1_active_minus1 = 0; /* default value */ + picParam->weighted_pred_flag = 0; /* no weighted prediction supported */ + picParam->weighted_bipred_idc = 0; /* range 0,1,2 */ + if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */ + picParam->weighted_bipred_idc > 2) + { + return AVCENC_WEIGHTED_BIPRED_FAIL; + } + picParam->pic_init_qp_minus26 = 0; /* default, will be changed at slice level anyway */ + if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25) + { + return AVCENC_INIT_QP_FAIL; /* out of range */ + } + picParam->pic_init_qs_minus26 = 0; + if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25) + { + return AVCENC_INIT_QS_FAIL; /* out of range */ + } + + picParam->chroma_qp_index_offset = 0; /* default to zero for now */ + if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12) + { + return AVCENC_CHROMA_QP_FAIL; /* out of range */ + } + /* deblocking */ + picParam->deblocking_filter_control_present_flag = (encParam->db_filter == AVC_ON) ? TRUE : FALSE ; + /* constrained intra prediction */ + picParam->constrained_intra_pred_flag = (encParam->constrained_intra_pred == AVC_ON) ? TRUE : FALSE; + picParam->redundant_pic_cnt_present_flag = 0; /* default */ + } + else if (extP)// external PPS + { + picParam->pic_parameter_set_id = extP->pic_parameter_set_id - 1; /* to be increased by one */ + picParam->seq_parameter_set_id = extP->seq_parameter_set_id; + picParam->entropy_coding_mode_flag = extP->entropy_coding_mode_flag; + if (extP->entropy_coding_mode_flag != 0) /* default to CAVLC */ + { + return AVCENC_NOT_SUPPORTED; + } + picParam->pic_order_present_flag = extP->pic_order_present_flag; /* default for now, will need it for B-slice */ + if (extP->pic_order_present_flag != 0) + { + return AVCENC_NOT_SUPPORTED; + } + /* FMO */ + if (/*(extP->num_slice_groups_minus1<0) || (no need it's unsigned) */ + (extP->num_slice_groups_minus1 > MAX_NUM_SLICE_GROUP - 1)) + { + return AVCENC_INVALID_NUM_SLICEGROUP; + } + picParam->num_slice_groups_minus1 = extP->num_slice_groups_minus1; + + if (picParam->num_slice_groups_minus1 > 0) + { + picParam->slice_group_map_type = extP->slice_group_map_type; + switch (extP->slice_group_map_type) + { + case 0: + for (ii = 0; ii <= (int)extP->num_slice_groups_minus1; ii++) + { + picParam->run_length_minus1[ii] = extP->run_length_minus1[ii]; + } + break; + case 2: + for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++) + { + picParam->top_left[ii] = extP->top_left[ii]; + picParam->bottom_right[ii] = extP->bottom_right[ii]; + } + break; + case 3: + case 4: + case 5: + picParam->slice_group_change_direction_flag = extP->slice_group_change_direction_flag; + if (/*extP->slice_group_change_rate_minus1 < 0 || (no need, it's unsigned) */ + extP->slice_group_change_rate_minus1 > video->PicSizeInMapUnits - 1) + { + return AVCENC_INVALID_CHANGE_RATE; + } + picParam->slice_group_change_rate_minus1 = extP->slice_group_change_rate_minus1; + video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1; + break; + case 6: + if (extP->pic_size_in_map_units_minus1 != video->PicSizeInMapUnits - 1) + { + return AVCENC_NOT_SUPPORTED; + } + + picParam->pic_size_in_map_units_minus1 = extP->pic_size_in_map_units_minus1; + + /* allocate picParam->slice_group_id */ + picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR); + if (picParam->slice_group_id == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + if (extP->slice_group_id == NULL) + { + return AVCENC_ENCPARAM_MEM_FAIL; + } + for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++) + { + picParam->slice_group_id[ii] = extP->slice_group_id[ii]; + } + break; + default: + return AVCENC_INVALID_FMO_TYPE; + } + } + picParam->num_ref_idx_l0_active_minus1 = extP->num_ref_idx_l0_active_minus1; + picParam->num_ref_idx_l1_active_minus1 = extP->num_ref_idx_l1_active_minus1; /* default value */ + if (picParam->num_ref_idx_l1_active_minus1 != 0) + { + return AVCENC_NOT_SUPPORTED; + } + + if (extP->weighted_pred_flag) + { + return AVCENC_NOT_SUPPORTED; + } + + picParam->weighted_pred_flag = 0; /* no weighted prediction supported */ + picParam->weighted_bipred_idc = extP->weighted_bipred_idc; /* range 0,1,2 */ + if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */ + picParam->weighted_bipred_idc > 2) + { + return AVCENC_WEIGHTED_BIPRED_FAIL; + } + picParam->pic_init_qp_minus26 = extP->pic_init_qp_minus26; /* default, will be changed at slice level anyway */ + if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25) + { + return AVCENC_INIT_QP_FAIL; /* out of range */ + } + picParam->pic_init_qs_minus26 = extP->pic_init_qs_minus26; + if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25) + { + return AVCENC_INIT_QS_FAIL; /* out of range */ + } + + picParam->chroma_qp_index_offset = extP->chroma_qp_index_offset; /* default to zero for now */ + if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12) + { + return AVCENC_CHROMA_QP_FAIL; /* out of range */ + } + /* deblocking */ + picParam->deblocking_filter_control_present_flag = extP->deblocking_filter_control_present_flag; + /* constrained intra prediction */ + picParam->constrained_intra_pred_flag = extP->constrained_intra_pred_flag; + if (extP->redundant_pic_cnt_present_flag != 0) + { + return AVCENC_NOT_SUPPORTED; + } + picParam->redundant_pic_cnt_present_flag = extP->redundant_pic_cnt_present_flag; /* default */ + } + else + { + return AVCENC_NOT_SUPPORTED; + } + + /****************** now set up some SliceHeader parameters ***********/ + if (picParam->deblocking_filter_control_present_flag == TRUE) + { + /* these values only present when db_filter is ON */ + if (encParam->disable_db_idc > 2) + { + return AVCENC_INVALID_DEBLOCK_IDC; /* out of range */ + } + sliceHdr->disable_deblocking_filter_idc = encParam->disable_db_idc; + + if (encParam->alpha_offset < -6 || encParam->alpha_offset > 6) + { + return AVCENC_INVALID_ALPHA_OFFSET; + } + sliceHdr->slice_alpha_c0_offset_div2 = encParam->alpha_offset; + + if (encParam->beta_offset < -6 || encParam->beta_offset > 6) + { + return AVCENC_INVALID_BETA_OFFSET; + } + sliceHdr->slice_beta_offset_div_2 = encParam->beta_offset; + } + if (encvid->outOfBandParamSet == TRUE) + { + sliceHdr->idr_pic_id = 0; + } + else + { + sliceHdr->idr_pic_id = (uint)(-1); /* start with zero */ + } + sliceHdr->field_pic_flag = FALSE; + sliceHdr->bottom_field_flag = FALSE; /* won't be used anyway */ + video->MbaffFrameFlag = (seqParam->mb_adaptive_frame_field_flag && !sliceHdr->field_pic_flag); + + /* the rest will be set in InitSlice() */ + + /* now the rate control and performance related parameters */ + rateCtrl->scdEnable = (encParam->auto_scd == AVC_ON) ? TRUE : FALSE; + rateCtrl->idrPeriod = encParam->idr_period + 1; + rateCtrl->intraMBRate = encParam->intramb_refresh; + rateCtrl->dpEnable = (encParam->data_par == AVC_ON) ? TRUE : FALSE; + + rateCtrl->subPelEnable = (encParam->sub_pel == AVC_ON) ? TRUE : FALSE; + rateCtrl->mvRange = encParam->search_range; + + rateCtrl->subMBEnable = (encParam->submb_pred == AVC_ON) ? TRUE : FALSE; + rateCtrl->rdOptEnable = (encParam->rdopt_mode == AVC_ON) ? TRUE : FALSE; + rateCtrl->bidirPred = (encParam->bidir_pred == AVC_ON) ? TRUE : FALSE; + + rateCtrl->rcEnable = (encParam->rate_control == AVC_ON) ? TRUE : FALSE; + rateCtrl->initQP = encParam->initQP; + rateCtrl->initQP = AVC_CLIP3(0, 51, rateCtrl->initQP); + + rateCtrl->bitRate = encParam->bitrate; + rateCtrl->cpbSize = encParam->CPB_size; + rateCtrl->initDelayOffset = (rateCtrl->bitRate * encParam->init_CBP_removal_delay / 1000); + + if (encParam->frame_rate == 0) + { + return AVCENC_INVALID_FRAMERATE; + } + + rateCtrl->frame_rate = (OsclFloat)(encParam->frame_rate * 1.0 / 1000); +// rateCtrl->srcInterval = encParam->src_interval; + rateCtrl->first_frame = 1; /* set this flag for the first time */ + + /* contrained_setx_flag will be set inside the VerifyProfile called below.*/ + if (!extS && !extP) + { + seqParam->profile_idc = encParam->profile; + seqParam->constrained_set0_flag = FALSE; + seqParam->constrained_set1_flag = FALSE; + seqParam->constrained_set2_flag = FALSE; + seqParam->constrained_set3_flag = FALSE; + seqParam->level_idc = encParam->level; + } + else + { + seqParam->profile_idc = extS->profile_idc; + seqParam->constrained_set0_flag = extS->constrained_set0_flag; + seqParam->constrained_set1_flag = extS->constrained_set1_flag; + seqParam->constrained_set2_flag = extS->constrained_set2_flag; + seqParam->constrained_set3_flag = extS->constrained_set3_flag; + seqParam->level_idc = extS->level_idc; + } + + + status = VerifyProfile(encvid, seqParam, picParam); + if (status != AVCENC_SUCCESS) + { + return status; + } + + status = VerifyLevel(encvid, seqParam, picParam); + if (status != AVCENC_SUCCESS) + { + return status; + } + + return AVCENC_SUCCESS; +} + +/* verify the profile setting */ +AVCEnc_Status VerifyProfile(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam) +{ + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCEnc_Status status = AVCENC_SUCCESS; + + if (seqParam->profile_idc == 0) /* find profile for this setting */ + { + /* find the right profile for it */ + if (seqParam->direct_8x8_inference_flag == TRUE && + picParam->entropy_coding_mode_flag == FALSE && + picParam->num_slice_groups_minus1 <= 7 /*&& + picParam->num_slice_groups_minus1>=0 (no need, it's unsigned) */) + { + seqParam->profile_idc = AVC_EXTENDED; + seqParam->constrained_set2_flag = TRUE; + } + + if (rateCtrl->dpEnable == FALSE && + picParam->num_slice_groups_minus1 == 0 && + picParam->redundant_pic_cnt_present_flag == FALSE) + { + seqParam->profile_idc = AVC_MAIN; + seqParam->constrained_set1_flag = TRUE; + } + + if (rateCtrl->bidirPred == FALSE && + rateCtrl->dpEnable == FALSE && + seqParam->frame_mbs_only_flag == TRUE && + picParam->weighted_pred_flag == FALSE && + picParam->weighted_bipred_idc == 0 && + picParam->entropy_coding_mode_flag == FALSE && + picParam->num_slice_groups_minus1 <= 7 /*&& + picParam->num_slice_groups_minus1>=0 (no need, it's unsigned)*/) + { + seqParam->profile_idc = AVC_BASELINE; + seqParam->constrained_set0_flag = TRUE; + } + + if (seqParam->profile_idc == 0) /* still zero */ + { + return AVCENC_PROFILE_NOT_SUPPORTED; + } + } + + /* check the list of supported profile by this library */ + switch (seqParam->profile_idc) + { + case AVC_BASELINE: + if (rateCtrl->bidirPred == TRUE || + rateCtrl->dpEnable == TRUE || + seqParam->frame_mbs_only_flag != TRUE || + picParam->weighted_pred_flag == TRUE || + picParam->weighted_bipred_idc != 0 || + picParam->entropy_coding_mode_flag == TRUE || + picParam->num_slice_groups_minus1 > 7 /*|| + picParam->num_slice_groups_minus1<0 (no need, it's unsigned) */) + { + status = AVCENC_TOOLS_NOT_SUPPORTED; + } + break; + + case AVC_MAIN: + case AVC_EXTENDED: + status = AVCENC_PROFILE_NOT_SUPPORTED; + } + + return status; +} + +/* verify the level setting */ +AVCEnc_Status VerifyLevel(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam) +{ + (void)(picParam); + + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCCommonObj *video = encvid->common; + int mb_per_sec, ii; + int lev_idx; + int dpb_size; + + mb_per_sec = (int)(video->PicSizeInMbs * rateCtrl->frame_rate + 0.5); + dpb_size = (seqParam->num_ref_frames * video->PicSizeInMbs * 3) >> 6; + + if (seqParam->level_idc == 0) /* find level for this setting */ + { + for (ii = 0; ii < MAX_LEVEL_IDX; ii++) + { + if (mb_per_sec <= MaxMBPS[ii] && + video->PicSizeInMbs <= (uint)MaxFS[ii] && + rateCtrl->bitRate <= (int32)MaxBR[ii]*1000 && + rateCtrl->cpbSize <= (int32)MaxCPB[ii]*1000 && + rateCtrl->mvRange <= MaxVmvR[ii] && + dpb_size <= MaxDPBX2[ii]*512) + { + seqParam->level_idc = mapIdx2Lev[ii]; + break; + } + } + if (seqParam->level_idc == 0) + { + return AVCENC_LEVEL_NOT_SUPPORTED; + } + } + + /* check if this level is supported by this library */ + lev_idx = mapLev2Idx[seqParam->level_idc]; + if (seqParam->level_idc == AVC_LEVEL1_B) + { + seqParam->constrained_set3_flag = 1; + } + + + if (lev_idx == 255) /* not defined */ + { + return AVCENC_LEVEL_NOT_SUPPORTED; + } + + /* check if the encoding setting complies with the level */ + if (mb_per_sec > MaxMBPS[lev_idx] || + video->PicSizeInMbs > (uint)MaxFS[lev_idx] || + rateCtrl->bitRate > (int32)MaxBR[lev_idx]*1000 || + rateCtrl->cpbSize > (int32)MaxCPB[lev_idx]*1000 || + rateCtrl->mvRange > MaxVmvR[lev_idx]) + { + return AVCENC_LEVEL_FAIL; + } + + return AVCENC_SUCCESS; +} + +/* initialize variables at the beginning of each frame */ +/* determine the picture type */ +/* encode POC */ +/* maybe we should do more stuff here. MotionEstimation+SCD and generate a new SPS and PPS */ +AVCEnc_Status InitFrame(AVCEncObject *encvid) +{ + AVCStatus ret; + AVCEnc_Status status; + AVCCommonObj *video = encvid->common; + AVCSliceHeader *sliceHdr = video->sliceHdr; + + /* look for the next frame in coding_order and look for available picture + in the DPB. Note, video->currFS->PicOrderCnt, currFS->FrameNum and currPic->PicNum + are set to wrong number in this function (right for decoder). */ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + // call init DPB in here. + ret = AVCConfigureSequence(encvid->avcHandle, video, TRUE); + if (ret != AVC_SUCCESS) + { + return AVCENC_FAIL; + } + } + + /* flexible macroblock ordering (every frame)*/ + /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */ + /* It changes once per each PPS. */ + FMOInit(video); + + ret = DPBInitBuffer(encvid->avcHandle, video); // get new buffer + + if (ret != AVC_SUCCESS) + { + return (AVCEnc_Status)ret; // AVCENC_PICTURE_READY, FAIL + } + + DPBInitPic(video, 0); /* 0 is dummy */ + + /************* determine picture type IDR or non-IDR ***********/ + video->currPicType = AVC_FRAME; + video->slice_data_partitioning = FALSE; + encvid->currInput->is_reference = 1; /* default to all frames */ + video->nal_ref_idc = 1; /* need to set this for InitPOC */ + video->currPic->isReference = TRUE; + + /************* set frame_num ********************/ + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + video->prevFrameNum = video->MaxFrameNum; + video->PrevRefFrameNum = 0; + sliceHdr->frame_num = 0; + } + /* otherwise, it's set to previous reference frame access unit's frame_num in decoding order, + see the end of PVAVCDecodeSlice()*/ + /* There's also restriction on the frame_num, see page 59 of JVT-I1010.doc. */ + /* Basically, frame_num can't be repeated unless it's opposite fields or non reference fields */ + else + { + sliceHdr->frame_num = (video->PrevRefFrameNum + 1) % video->MaxFrameNum; + } + video->CurrPicNum = sliceHdr->frame_num; /* for field_pic_flag = 0 */ + //video->CurrPicNum = 2*sliceHdr->frame_num + 1; /* for field_pic_flag = 1 */ + + /* assign pic_order_cnt, video->PicOrderCnt */ + status = InitPOC(encvid); + if (status != AVCENC_SUCCESS) /* incorrigable fail */ + { + return status; + } + + /* Initialize refListIdx for this picture */ + RefListInit(video); + + /************* motion estimation and scene analysis ************/ + // , to move this to MB-based MV search for comparison + // use sub-optimal QP for mv search + AVCMotionEstimation(encvid); /* AVCENC_SUCCESS or AVCENC_NEW_IDR */ + + /* after this point, the picture type will be fixed to either IDR or non-IDR */ + video->currFS->PicOrderCnt = video->PicOrderCnt; + video->currFS->FrameNum = video->sliceHdr->frame_num; + video->currPic->PicNum = video->CurrPicNum; + video->mbNum = 0; /* start from zero MB */ + encvid->currSliceGroup = 0; /* start from slice group #0 */ + encvid->numIntraMB = 0; /* reset this counter */ + + if (video->nal_unit_type == AVC_NALTYPE_IDR) + { + RCInitGOP(encvid); + + /* calculate picture QP */ + RCInitFrameQP(encvid); + + return AVCENC_NEW_IDR; + } + + /* calculate picture QP */ + RCInitFrameQP(encvid); /* get QP after MV search */ + + return AVCENC_SUCCESS; +} + +/* initialize variables for this slice */ +AVCEnc_Status InitSlice(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCPicParamSet *currPPS = video->currPicParams; + AVCSeqParamSet *currSPS = video->currSeqParams; + int slice_type = video->slice_type; + + sliceHdr->first_mb_in_slice = video->mbNum; + if (video->mbNum) // not first slice of a frame + { + video->sliceHdr->slice_type = (AVCSliceType)slice_type; + } + + /* sliceHdr->slice_type already set in InitFrame */ + + sliceHdr->pic_parameter_set_id = video->currPicParams->pic_parameter_set_id; + + /* sliceHdr->frame_num already set in InitFrame */ + + if (!currSPS->frame_mbs_only_flag) /* we shouldn't need this check */ + { + sliceHdr->field_pic_flag = sliceHdr->bottom_field_flag = FALSE; + return AVCENC_TOOLS_NOT_SUPPORTED; + } + + /* sliceHdr->idr_pic_id already set in PVAVCEncodeNAL + + sliceHdr->pic_order_cnt_lsb already set in InitFrame..InitPOC + sliceHdr->delta_pic_order_cnt_bottom already set in InitPOC + + sliceHdr->delta_pic_order_cnt[0] already set in InitPOC + sliceHdr->delta_pic_order_cnt[1] already set in InitPOC + */ + + sliceHdr->redundant_pic_cnt = 0; /* default if(currPPS->redundant_pic_cnt_present_flag), range 0..127 */ + sliceHdr->direct_spatial_mv_pred_flag = 0; // default if(slice_type == AVC_B_SLICE) + + sliceHdr->num_ref_idx_active_override_flag = FALSE; /* default, if(slice_type== P,SP or B)*/ + sliceHdr->num_ref_idx_l0_active_minus1 = 0; /* default, if (num_ref_idx_active_override_flag) */ + sliceHdr->num_ref_idx_l1_active_minus1 = 0; /* default, if above and B_slice */ + /* the above 2 values range from 0..15 for frame picture and 0..31 for field picture */ + + /* ref_pic_list_reordering(), currently we don't do anything */ + sliceHdr->ref_pic_list_reordering_flag_l0 = FALSE; /* default */ + sliceHdr->ref_pic_list_reordering_flag_l1 = FALSE; /* default */ + /* if the above are TRUE, some other params must be set */ + + if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) || + (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE)) + { + // pred_weight_table(); // not supported !! + return AVCENC_TOOLS_NOT_SUPPORTED; + } + + /* dec_ref_pic_marking(), this will be done later*/ + sliceHdr->no_output_of_prior_pics_flag = FALSE; /* default */ + sliceHdr->long_term_reference_flag = FALSE; /* for IDR frame, do not make it long term */ + sliceHdr->adaptive_ref_pic_marking_mode_flag = FALSE; /* default */ + /* other params are not set here because they are not used */ + + sliceHdr->cabac_init_idc = 0; /* default, if entropy_coding_mode_flag && slice_type==I or SI, range 0..2 */ + sliceHdr->slice_qp_delta = 0; /* default for now */ + sliceHdr->sp_for_switch_flag = FALSE; /* default, if slice_type == SP */ + sliceHdr->slice_qs_delta = 0; /* default, if slice_type == SP or SI */ + + /* derived variables from encParam */ + /* deblocking filter */ + video->FilterOffsetA = video->FilterOffsetB = 0; + if (currPPS->deblocking_filter_control_present_flag == TRUE) + { + video->FilterOffsetA = sliceHdr->slice_alpha_c0_offset_div2 << 1; + video->FilterOffsetB = sliceHdr->slice_beta_offset_div_2 << 1; + } + + /* flexible macroblock ordering */ + /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */ + /* We already call it at the end of PVAVCEncInitialize(). It changes once per each PPS. */ + if (video->currPicParams->num_slice_groups_minus1 > 0 && video->currPicParams->slice_group_map_type >= 3 + && video->currPicParams->slice_group_map_type <= 5) + { + sliceHdr->slice_group_change_cycle = SLICE_GROUP_CHANGE_CYCLE; /* default, don't understand how to set it!!!*/ + + video->MapUnitsInSliceGroup0 = + AVC_MIN(sliceHdr->slice_group_change_cycle * video->SliceGroupChangeRate, video->PicSizeInMapUnits); + + FMOInit(video); + } + + /* calculate SliceQPy first */ + /* calculate QSy first */ + + sliceHdr->slice_qp_delta = video->QPy - 26 - currPPS->pic_init_qp_minus26; + //sliceHdr->slice_qs_delta = video->QSy - 26 - currPPS->pic_init_qs_minus26; + + return AVCENC_SUCCESS; +} + diff --git a/media/libstagefright/codecs/avc/enc/src/intra_est.cpp b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp new file mode 100644 index 0000000..17e5985 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp @@ -0,0 +1,2199 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +#define TH_I4 0 /* threshold biasing toward I16 mode instead of I4 mode */ +#define TH_Intra 0 /* threshold biasing toward INTER mode instead of intra mode */ + +#define FIXED_INTRAPRED_MODE AVC_I16 +#define FIXED_I16_MODE AVC_I16_DC +#define FIXED_I4_MODE AVC_I4_Diagonal_Down_Left +#define FIXED_INTRA_CHROMA_MODE AVC_IC_DC + +#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ + x = 0xFF & (~(x>>31));} + + +bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch) +{ + AVCCommonObj *video = encvid->common; + AVCFrameIO *currInput = encvid->currInput; + int orgPitch = currInput->pitch; + int x_pos = (video->mb_x) << 4; + int y_pos = (video->mb_y) << 4; + uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos; + int j; + uint8 *topL, *leftL, *orgY_2, *orgY_3; + int temp, SBE, offset; + OsclFloat ABE; + bool intra = true; + + if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) && + ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) && + video->intraAvailA && + video->intraAvailB) + { + SBE = 0; + /* top neighbor */ + topL = curL - picPitch; + /* left neighbor */ + leftL = curL - 1; + orgY_2 = orgY - orgPitch; + + for (j = 0; j < 16; j++) + { + temp = *topL++ - orgY[j]; + SBE += ((temp >= 0) ? temp : -temp); + temp = *(leftL += picPitch) - *(orgY_2 += orgPitch); + SBE += ((temp >= 0) ? temp : -temp); + } + + /* calculate chroma */ + offset = (y_pos >> 2) * picPitch + (x_pos >> 1); + topL = video->currPic->Scb + offset; + orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch); + + leftL = topL - 1; + topL -= (picPitch >> 1); + orgY_3 = orgY_2 - (orgPitch >> 1); + for (j = 0; j < 8; j++) + { + temp = *topL++ - orgY_2[j]; + SBE += ((temp >= 0) ? temp : -temp); + temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1)); + SBE += ((temp >= 0) ? temp : -temp); + } + + topL = video->currPic->Scr + offset; + orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch); + + leftL = topL - 1; + topL -= (picPitch >> 1); + orgY_3 = orgY_2 - (orgPitch >> 1); + for (j = 0; j < 8; j++) + { + temp = *topL++ - orgY_2[j]; + SBE += ((temp >= 0) ? temp : -temp); + temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1)); + SBE += ((temp >= 0) ? temp : -temp); + } + + /* compare mincost/384 and SBE/64 */ + ABE = SBE / 64.0; + if (ABE*0.8 >= min_cost / 384.0) + { + intra = false; + } + } + + return intra; +} + +/* perform searching for MB mode */ +/* assuming that this is done inside the encoding loop, +no need to call InitNeighborAvailability */ + +void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch) +{ + AVCCommonObj *video = encvid->common; + AVCFrameIO *currInput = encvid->currInput; + AVCMacroblock *currMB = video->currMB; + int min_cost; + uint8 *orgY; + int x_pos = (video->mb_x) << 4; + int y_pos = (video->mb_y) << 4; + uint32 *saved_inter; + int j; + int orgPitch = currInput->pitch; + bool intra = true; + + currMB->CBP = 0; + + /* first do motion vector and variable block size search */ + min_cost = encvid->min_cost[mbnum]; + + /* now perform intra prediction search */ + /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra + if it's not worth checking. */ + if (video->slice_type == AVC_P_SLICE) + { + /* Decide whether intra search is necessary or not */ + /* This one, we do it in the encoding loop so the neighboring pixel are the + actual reconstructed pixels. */ + intra = IntraDecisionABE(encvid, min_cost, curL, picPitch); + } + + if (intra == true || video->slice_type == AVC_I_SLICE) + { + orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos; + + /* i16 mode search */ + /* generate all the predictions */ + intrapred_luma_16x16(encvid); + + /* evaluate them one by one */ + find_cost_16x16(encvid, orgY, &min_cost); + + if (video->slice_type == AVC_P_SLICE) + { + /* save current inter prediction */ + saved_inter = encvid->subpel_pred; /* reuse existing buffer */ + j = 16; + curL -= 4; + picPitch -= 16; + while (j--) + { + *saved_inter++ = *((uint32*)(curL += 4)); + *saved_inter++ = *((uint32*)(curL += 4)); + *saved_inter++ = *((uint32*)(curL += 4)); + *saved_inter++ = *((uint32*)(curL += 4)); + curL += picPitch; + } + + } + + /* i4 mode search */ + mb_intra4x4_search(encvid, &min_cost); + + encvid->min_cost[mbnum] = min_cost; /* update min_cost */ + } + + + if (currMB->mb_intra) + { + chroma_intra_search(encvid); + + /* need to set this in order for the MBInterPrediction to work!! */ + memset(currMB->mvL0, 0, sizeof(int32)*16); + currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] = + currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1; + } + else if (video->slice_type == AVC_P_SLICE && intra == true) + { + /* restore current inter prediction */ + saved_inter = encvid->subpel_pred; /* reuse existing buffer */ + j = 16; + curL -= ((picPitch + 16) << 4); + while (j--) + { + *((uint32*)(curL += 4)) = *saved_inter++; + *((uint32*)(curL += 4)) = *saved_inter++; + *((uint32*)(curL += 4)) = *saved_inter++; + *((uint32*)(curL += 4)) = *saved_inter++; + curL += picPitch; + } + } + + return ; +} + +/* generate all the prediction values */ +void intrapred_luma_16x16(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCPictureData *currPic = video->currPic; + + int x_pos = (video->mb_x) << 4; + int y_pos = (video->mb_y) << 4; + int pitch = currPic->pitch; + + int offset = y_pos * pitch + x_pos; + + uint8 *pred, *top, *left; + uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */ + uint32 word1, word2, word3, word4; + uint32 sum = 0; + + int a_16, b, c, factor_c; + uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1; + int H = 0, V = 0, tmp, value; + int i; + + if (video->intraAvailB) + { + //get vertical prediction mode + top = curL - pitch; + + pred = encvid->pred_i16[AVC_I16_Vertical] - 16; + + word1 = *((uint32*)(top)); /* read 4 bytes from top */ + word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */ + word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */ + word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */ + + for (i = 0; i < 16; i++) + { + *((uint32*)(pred += 16)) = word1; + *((uint32*)(pred + 4)) = word2; + *((uint32*)(pred + 8)) = word3; + *((uint32*)(pred + 12)) = word4; + + } + + sum = word1 & 0xFF00FF; + word1 = (word1 >> 8) & 0xFF00FF; + sum += word1; + word1 = (word2 & 0xFF00FF); + sum += word1; + word2 = (word2 >> 8) & 0xFF00FF; + sum += word2; + word1 = (word3 & 0xFF00FF); + sum += word1; + word3 = (word3 >> 8) & 0xFF00FF; + sum += word3; + word1 = (word4 & 0xFF00FF); + sum += word1; + word4 = (word4 >> 8) & 0xFF00FF; + sum += word4; + + sum += (sum >> 16); + sum &= 0xFFFF; + + if (!video->intraAvailA) + { + sum = (sum + 8) >> 4; + } + } + + if (video->intraAvailA) + { + // get horizontal mode + left = curL - 1 - pitch; + + pred = encvid->pred_i16[AVC_I16_Horizontal] - 16; + + for (i = 0; i < 16; i++) + { + word1 = *(left += pitch); + sum += word1; + + word1 = (word1 << 8) | word1; + word1 = (word1 << 16) | word1; /* make it 4 */ + + *(uint32*)(pred += 16) = word1; + *(uint32*)(pred + 4) = word1; + *(uint32*)(pred + 8) = word1; + *(uint32*)(pred + 12) = word1; + } + + if (!video->intraAvailB) + { + sum = (sum + 8) >> 4; + } + else + { + sum = (sum + 16) >> 5; + } + } + + // get DC mode + if (!video->intraAvailA && !video->intraAvailB) + { + sum = 0x80808080; + } + else + { + sum = (sum << 8) | sum; + sum = (sum << 16) | sum; + } + + pred = encvid->pred_i16[AVC_I16_DC] - 16; + for (i = 0; i < 16; i++) + { + *((uint32*)(pred += 16)) = sum; + *((uint32*)(pred + 4)) = sum; + *((uint32*)(pred + 8)) = sum; + *((uint32*)(pred + 12)) = sum; + } + + // get plane mode + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + pred = encvid->pred_i16[AVC_I16_Plane] - 16; + + comp_ref_x0 = curL - pitch + 8; + comp_ref_x1 = curL - pitch + 6; + comp_ref_y0 = curL - 1 + (pitch << 3); + comp_ref_y1 = curL - 1 + 6 * pitch; + + for (i = 1; i < 8; i++) + { + H += i * (*comp_ref_x0++ - *comp_ref_x1--); + V += i * (*comp_ref_y0 - *comp_ref_y1); + comp_ref_y0 += pitch; + comp_ref_y1 -= pitch; + } + + H += i * (*comp_ref_x0++ - curL[-pitch-1]); + V += i * (*comp_ref_y0 - *comp_ref_y1); + + + a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;; + b = (5 * H + 32) >> 6; + c = (5 * V + 32) >> 6; + + tmp = 0; + for (i = 0; i < 16; i++) + { + factor_c = a_16 + c * (tmp++ - 7); + factor_c -= 7 * b; + + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 16); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 24); + *((uint32*)(pred += 16)) = word1; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 16); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 24); + *((uint32*)(pred + 4)) = word1; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 16); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 24); + *((uint32*)(pred + 8)) = word1; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + word1 = (word1) | (value << 16); + value = factor_c >> 5; + CLIP_RESULT(value) + word1 = (word1) | (value << 24); + *((uint32*)(pred + 12)) = word1; + } + } + + return ; +} + + +/* evaluate each prediction mode of I16 */ +void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + int cost; + int org_pitch = encvid->currInput->pitch; + + /* evaluate vertical mode */ + if (video->intraAvailB) + { + cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost); + if (cost < *min_cost) + { + *min_cost = cost; + currMB->mbMode = AVC_I16; + currMB->mb_intra = 1; + currMB->i16Mode = AVC_I16_Vertical; + } + } + + + /* evaluate horizontal mode */ + if (video->intraAvailA) + { + cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost); + if (cost < *min_cost) + { + *min_cost = cost; + currMB->mbMode = AVC_I16; + currMB->mb_intra = 1; + currMB->i16Mode = AVC_I16_Horizontal; + } + } + + /* evaluate DC mode */ + cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost); + if (cost < *min_cost) + { + *min_cost = cost; + currMB->mbMode = AVC_I16; + currMB->mb_intra = 1; + currMB->i16Mode = AVC_I16_DC; + } + + /* evaluate plane mode */ + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost); + if (cost < *min_cost) + { + *min_cost = cost; + currMB->mbMode = AVC_I16; + currMB->mb_intra = 1; + currMB->i16Mode = AVC_I16_Plane; + } + } + + return ; +} + + +int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost) +{ + + int cost; + int j, k; + int16 res[256], *pres; // residue + int m0, m1, m2, m3; + + // calculate SATD + org_pitch -= 16; + pres = res; + // horizontal transform + for (j = 0; j < 16; j++) + { + k = 4; + while (k > 0) + { + m0 = org[0] - pred[0]; + m3 = org[3] - pred[3]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = org[1] - pred[1]; + m2 = org[2] - pred[2]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[2] = m0 - m1; + pres[1] = m2 + m3; + pres[3] = m3 - m2; + + org += 4; + pres += 4; + pred += 4; + k--; + } + org += org_pitch; + } + /* vertical transform */ + cost = 0; + for (j = 0; j < 4; j++) + { + pres = res + (j << 6); + k = 16; + while (k > 0) + { + m0 = pres[0]; + m3 = pres[3<<4]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = pres[1<<4]; + m2 = pres[2<<4]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 = m0 + m1; + + if (k&0x3) // only sum up non DC values. + { + cost += ((m0 > 0) ? m0 : -m0); + } + + m1 = m0 - (m1 << 1); + cost += ((m1 > 0) ? m1 : -m1); + m3 = m2 + m3; + cost += ((m3 > 0) ? m3 : -m3); + m2 = m3 - (m2 << 1); + cost += ((m2 > 0) ? m2 : -m2); + + pres++; + k--; + } + if ((cost >> 1) > min_cost) /* early drop out */ + { + return (cost >> 1); + } + } + + /* Hadamard of the DC coefficient */ + pres = res; + k = 4; + while (k > 0) + { + m0 = pres[0]; + m3 = pres[3<<2]; + m0 >>= 2; + m0 += (m3 >> 2); + m3 = m0 - (m3 >> 1); + m1 = pres[1<<2]; + m2 = pres[2<<2]; + m1 >>= 2; + m1 += (m2 >> 2); + m2 = m1 - (m2 >> 1); + pres[0] = (m0 + m1); + pres[2<<2] = (m0 - m1); + pres[1<<2] = (m2 + m3); + pres[3<<2] = (m3 - m2); + pres += (4 << 4); + k--; + } + + pres = res; + k = 4; + while (k > 0) + { + m0 = pres[0]; + m3 = pres[3<<6]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = pres[1<<6]; + m2 = pres[2<<6]; + m1 += m2; + m2 = m1 - (m2 << 1); + m0 = m0 + m1; + cost += ((m0 >= 0) ? m0 : -m0); + m1 = m0 - (m1 << 1); + cost += ((m1 >= 0) ? m1 : -m1); + m3 = m2 + m3; + cost += ((m3 >= 0) ? m3 : -m3); + m2 = m3 - (m2 << 1); + cost += ((m2 >= 0) ? m2 : -m2); + pres += 4; + + if ((cost >> 1) > min_cost) /* early drop out */ + { + return (cost >> 1); + } + + k--; + } + + return (cost >> 1); +} + + +void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + AVCPictureData *currPic = video->currPic; + AVCFrameIO *currInput = encvid->currInput; + int pitch = currPic->pitch; + int org_pitch = currInput->pitch; + int offset; + uint8 *curL, *comp, *org4, *org8; + int y = video->mb_y << 4; + int x = video->mb_x << 4; + + int b8, b4, cost4x4, blkidx; + int cost = 0; + int numcoef; + int dummy = 0; + int mb_intra = currMB->mb_intra; // save the original value + + offset = y * pitch + x; + + curL = currPic->Sl + offset; + org8 = currInput->YCbCr[0] + y * org_pitch + x; + video->pred_pitch = 4; + + cost = (int)(6.0 * encvid->lambda_mode + 0.4999); + cost <<= 2; + + currMB->mb_intra = 1; // temporary set this to one to enable the IDCT + // operation inside dct_luma + + for (b8 = 0; b8 < 4; b8++) + { + comp = curL; + org4 = org8; + + for (b4 = 0; b4 < 4; b4++) + { + blkidx = blkIdx2blkXY[b8][b4]; + cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4); + cost += cost4x4; + if (cost > *min_cost) + { + currMB->mb_intra = mb_intra; // restore the value + return ; + } + + /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/ + video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]]; + numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy); + currMB->nz_coeff[blkidx] = numcoef; + if (numcoef) + { + video->cbp4x4 |= (1 << blkidx); + currMB->CBP |= (1 << b8); + } + + if (b4&1) + { + comp += ((pitch << 2) - 4); + org4 += ((org_pitch << 2) - 4); + } + else + { + comp += 4; + org4 += 4; + } + } + + if (b8&1) + { + curL += ((pitch << 3) - 8); + org8 += ((org_pitch << 3) - 8); + } + else + { + curL += 8; + org8 += 8; + } + } + + currMB->mb_intra = mb_intra; // restore the value + + if (cost < *min_cost) + { + *min_cost = cost; + currMB->mbMode = AVC_I4; + currMB->mb_intra = 1; + } + + return ; +} + + +/* search for i4 mode for a 4x4 block */ +int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org) +{ + AVCCommonObj *video = encvid->common; + AVCNeighborAvailability availability; + AVCMacroblock *currMB = video->currMB; + bool top_left = FALSE; + int pitch = video->currPic->pitch; + uint8 mode_avail[AVCNumI4PredMode]; + uint32 temp, DC; + uint8 *pred; + int org_pitch = encvid->currInput->pitch; + uint16 min_cost, cost; + + int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1; + int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2; + uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X; + int r0, r1, r2, r3, r4, r5, r6, r7; + int x0, x1, x2, x3, x4, x5; + uint32 temp1, temp2; + + int ipmode, mostProbableMode; + int fixedcost = 4 * encvid->lambda_mode; + int min_sad = 0x7FFF; + + availability.left = TRUE; + availability.top = TRUE; + if (blkidx <= 3) /* top row block (!block_y) */ + { /* check availability up */ + availability.top = video->intraAvailB ; + } + if (!(blkidx&0x3)) /* left column block (!block_x)*/ + { /* check availability left */ + availability.left = video->intraAvailA ; + } + availability.top_right = BlkTopRight[blkidx]; + + if (availability.top_right == 2) + { + availability.top_right = video->intraAvailB; + } + else if (availability.top_right == 3) + { + availability.top_right = video->intraAvailC; + } + + if (availability.top == TRUE) + { + temp = *(uint32*)(cur - pitch); + P_A = temp & 0xFF; + P_B = (temp >> 8) & 0xFF; + P_C = (temp >> 16) & 0xFF; + P_D = (temp >> 24) & 0xFF; + } + else + { + P_A = P_B = P_C = P_D = 128; + } + + if (availability.top_right == TRUE) + { + temp = *(uint32*)(cur - pitch + 4); + P_E = temp & 0xFF; + P_F = (temp >> 8) & 0xFF; + P_G = (temp >> 16) & 0xFF; + P_H = (temp >> 24) & 0xFF; + } + else + { + P_E = P_F = P_G = P_H = 128; + } + + if (availability.left == TRUE) + { + cur--; + P_I = *cur; + P_J = *(cur += pitch); + P_K = *(cur += pitch); + P_L = *(cur + pitch); + cur -= (pitch << 1); + cur++; + } + else + { + P_I = P_J = P_K = P_L = 128; + } + + /* check if top-left pixel is available */ + if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA) + || ((blkidx&0x3) && video->intraAvailB) + || (video->intraAvailA && video->intraAvailD && video->intraAvailB)) + { + top_left = TRUE; + P_X = *(cur - pitch - 1); + } + else + { + P_X = 128; + } + + //===== INTRA PREDICTION FOR 4x4 BLOCK ===== + /* vertical */ + mode_avail[AVC_I4_Vertical] = 0; + if (availability.top) + { + mode_avail[AVC_I4_Vertical] = 1; + pred = encvid->pred_i4[AVC_I4_Vertical]; + + temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ; + *((uint32*)pred) = temp; /* write 4 at a time */ + *((uint32*)(pred += 4)) = temp; + *((uint32*)(pred += 4)) = temp; + *((uint32*)(pred += 4)) = temp; + } + /* horizontal */ + mode_avail[AVC_I4_Horizontal] = 0; + mode_avail[AVC_I4_Horizontal_Up] = 0; + if (availability.left) + { + mode_avail[AVC_I4_Horizontal] = 1; + pred = encvid->pred_i4[AVC_I4_Horizontal]; + + temp = P_I | (P_I << 8); + temp = temp | (temp << 16); + *((uint32*)pred) = temp; + temp = P_J | (P_J << 8); + temp = temp | (temp << 16); + *((uint32*)(pred += 4)) = temp; + temp = P_K | (P_K << 8); + temp = temp | (temp << 16); + *((uint32*)(pred += 4)) = temp; + temp = P_L | (P_L << 8); + temp = temp | (temp << 16); + *((uint32*)(pred += 4)) = temp; + + mode_avail[AVC_I4_Horizontal_Up] = 1; + pred = encvid->pred_i4[AVC_I4_Horizontal_Up]; + + Q0 = (P_J + P_K + 1) >> 1; + Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2; + P0 = ((P_I + P_J + 1) >> 1); + P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2); + + temp = P0 | (P1 << 8); // [P0 P1 Q0 Q1] + temp |= (Q0 << 16); // [Q0 Q1 R0 DO] + temp |= (Q1 << 24); // [R0 D0 D1 D1] + *((uint32*)pred) = temp; // [D1 D1 D1 D1] + + D0 = (P_K + 3 * P_L + 2) >> 2; + R0 = (P_K + P_L + 1) >> 1; + + temp = Q0 | (Q1 << 8); + temp |= (R0 << 16); + temp |= (D0 << 24); + *((uint32*)(pred += 4)) = temp; + + D1 = P_L; + + temp = R0 | (D0 << 8); + temp |= (D1 << 16); + temp |= (D1 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = D1 | (D1 << 8); + temp |= (temp << 16); + *((uint32*)(pred += 4)) = temp; + } + /* DC */ + mode_avail[AVC_I4_DC] = 1; + pred = encvid->pred_i4[AVC_I4_DC]; + if (availability.left) + { + DC = P_I + P_J + P_K + P_L; + + if (availability.top) + { + DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3; + } + else + { + DC = (DC + 2) >> 2; + + } + } + else if (availability.top) + { + DC = (P_A + P_B + P_C + P_D + 2) >> 2; + + } + else + { + DC = 128; + } + + temp = DC | (DC << 8); + temp = temp | (temp << 16); + *((uint32*)pred) = temp; + *((uint32*)(pred += 4)) = temp; + *((uint32*)(pred += 4)) = temp; + *((uint32*)(pred += 4)) = temp; + + /* Down-left */ + mode_avail[AVC_I4_Diagonal_Down_Left] = 0; + + if (availability.top) + { + mode_avail[AVC_I4_Diagonal_Down_Left] = 1; + + pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left]; + + r0 = P_A; + r1 = P_B; + r2 = P_C; + r3 = P_D; + + r0 += (r1 << 1); + r0 += r2; + r0 += 2; + r0 >>= 2; + r1 += (r2 << 1); + r1 += r3; + r1 += 2; + r1 >>= 2; + + if (availability.top_right) + { + r4 = P_E; + r5 = P_F; + r6 = P_G; + r7 = P_H; + + r2 += (r3 << 1); + r2 += r4; + r2 += 2; + r2 >>= 2; + r3 += (r4 << 1); + r3 += r5; + r3 += 2; + r3 >>= 2; + r4 += (r5 << 1); + r4 += r6; + r4 += 2; + r4 >>= 2; + r5 += (r6 << 1); + r5 += r7; + r5 += 2; + r5 >>= 2; + r6 += (3 * r7); + r6 += 2; + r6 >>= 2; + temp = r0 | (r1 << 8); + temp |= (r2 << 16); + temp |= (r3 << 24); + *((uint32*)pred) = temp; + + temp = (temp >> 8) | (r4 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = (temp >> 8) | (r5 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = (temp >> 8) | (r6 << 24); + *((uint32*)(pred += 4)) = temp; + } + else + { + r2 += (r3 * 3); + r2 += 2; + r2 >>= 2; + r3 = ((r3 << 2) + 2); + r3 >>= 2; + + temp = r0 | (r1 << 8); + temp |= (r2 << 16); + temp |= (r3 << 24); + *((uint32*)pred) = temp; + + temp = (temp >> 8) | (r3 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = (temp >> 8) | (r3 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = (temp >> 8) | (r3 << 24); + *((uint32*)(pred += 4)) = temp; + + } + } + + /* Down Right */ + mode_avail[AVC_I4_Diagonal_Down_Right] = 0; + /* Diagonal Vertical Right */ + mode_avail[AVC_I4_Vertical_Right] = 0; + /* Horizontal Down */ + mode_avail[AVC_I4_Horizontal_Down] = 0; + + if (top_left == TRUE) + { + /* Down Right */ + mode_avail[AVC_I4_Diagonal_Down_Right] = 1; + pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right]; + + Q_x = (P_A + 2 * P_B + P_C + 2) >> 2; + R_x = (P_B + 2 * P_C + P_D + 2) >> 2; + P_x = (P_X + 2 * P_A + P_B + 2) >> 2; + D = (P_A + 2 * P_X + P_I + 2) >> 2; + P_y = (P_X + 2 * P_I + P_J + 2) >> 2; + Q_y = (P_I + 2 * P_J + P_K + 2) >> 2; + R_y = (P_J + 2 * P_K + P_L + 2) >> 2; + + /* we can pack these */ + temp = D | (P_x << 8); //[D P_x Q_x R_x] + //[P_y D P_x Q_x] + temp |= (Q_x << 16); //[Q_y P_y D P_x] + temp |= (R_x << 24); //[R_y Q_y P_y D ] + *((uint32*)pred) = temp; + + temp = P_y | (D << 8); + temp |= (P_x << 16); + temp |= (Q_x << 24); + *((uint32*)(pred += 4)) = temp; + + temp = Q_y | (P_y << 8); + temp |= (D << 16); + temp |= (P_x << 24); + *((uint32*)(pred += 4)) = temp; + + temp = R_y | (Q_y << 8); + temp |= (P_y << 16); + temp |= (D << 24); + *((uint32*)(pred += 4)) = temp; + + + /* Diagonal Vertical Right */ + mode_avail[AVC_I4_Vertical_Right] = 1; + pred = encvid->pred_i4[AVC_I4_Vertical_Right]; + + Q0 = P_A + P_B + 1; + R0 = P_B + P_C + 1; + S0 = P_C + P_D + 1; + P0 = P_X + P_A + 1; + D = (P_I + 2 * P_X + P_A + 2) >> 2; + + P1 = (P0 + Q0) >> 2; + Q1 = (Q0 + R0) >> 2; + R1 = (R0 + S0) >> 2; + + P0 >>= 1; + Q0 >>= 1; + R0 >>= 1; + S0 >>= 1; + + P2 = (P_X + 2 * P_I + P_J + 2) >> 2; + Q2 = (P_I + 2 * P_J + P_K + 2) >> 2; + + temp = P0 | (Q0 << 8); //[P0 Q0 R0 S0] + //[D P1 Q1 R1] + temp |= (R0 << 16); //[P2 P0 Q0 R0] + temp |= (S0 << 24); //[Q2 D P1 Q1] + *((uint32*)pred) = temp; + + temp = D | (P1 << 8); + temp |= (Q1 << 16); + temp |= (R1 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = P2 | (P0 << 8); + temp |= (Q0 << 16); + temp |= (R0 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = Q2 | (D << 8); + temp |= (P1 << 16); + temp |= (Q1 << 24); + *((uint32*)(pred += 4)) = temp; + + + /* Horizontal Down */ + mode_avail[AVC_I4_Horizontal_Down] = 1; + pred = encvid->pred_i4[AVC_I4_Horizontal_Down]; + + + Q2 = (P_A + 2 * P_B + P_C + 2) >> 2; + P2 = (P_X + 2 * P_A + P_B + 2) >> 2; + D = (P_I + 2 * P_X + P_A + 2) >> 2; + P0 = P_X + P_I + 1; + Q0 = P_I + P_J + 1; + R0 = P_J + P_K + 1; + S0 = P_K + P_L + 1; + + P1 = (P0 + Q0) >> 2; + Q1 = (Q0 + R0) >> 2; + R1 = (R0 + S0) >> 2; + + P0 >>= 1; + Q0 >>= 1; + R0 >>= 1; + S0 >>= 1; + + + /* we can pack these */ + temp = P0 | (D << 8); //[P0 D P2 Q2] + //[Q0 P1 P0 D ] + temp |= (P2 << 16); //[R0 Q1 Q0 P1] + temp |= (Q2 << 24); //[S0 R1 R0 Q1] + *((uint32*)pred) = temp; + + temp = Q0 | (P1 << 8); + temp |= (P0 << 16); + temp |= (D << 24); + *((uint32*)(pred += 4)) = temp; + + temp = R0 | (Q1 << 8); + temp |= (Q0 << 16); + temp |= (P1 << 24); + *((uint32*)(pred += 4)) = temp; + + temp = S0 | (R1 << 8); + temp |= (R0 << 16); + temp |= (Q1 << 24); + *((uint32*)(pred += 4)) = temp; + + } + + /* vertical left */ + mode_avail[AVC_I4_Vertical_Left] = 0; + if (availability.top) + { + mode_avail[AVC_I4_Vertical_Left] = 1; + pred = encvid->pred_i4[AVC_I4_Vertical_Left]; + + x0 = P_A + P_B + 1; + x1 = P_B + P_C + 1; + x2 = P_C + P_D + 1; + if (availability.top_right) + { + x3 = P_D + P_E + 1; + x4 = P_E + P_F + 1; + x5 = P_F + P_G + 1; + } + else + { + x3 = x4 = x5 = (P_D << 1) + 1; + } + + temp1 = (x0 >> 1); + temp1 |= ((x1 >> 1) << 8); + temp1 |= ((x2 >> 1) << 16); + temp1 |= ((x3 >> 1) << 24); + + *((uint32*)pred) = temp1; + + temp2 = ((x0 + x1) >> 2); + temp2 |= (((x1 + x2) >> 2) << 8); + temp2 |= (((x2 + x3) >> 2) << 16); + temp2 |= (((x3 + x4) >> 2) << 24); + + *((uint32*)(pred += 4)) = temp2; + + temp1 = (temp1 >> 8) | ((x4 >> 1) << 24); /* rotate out old value */ + *((uint32*)(pred += 4)) = temp1; + + temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */ + *((uint32*)(pred += 4)) = temp2; + } + + //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES ===== + // can re-order the search here instead of going in order + + // find most probable mode + encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx); + + min_cost = 0xFFFF; + + for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++) + { + if (mode_avail[ipmode] == TRUE) + { + cost = (ipmode == mostProbableMode) ? 0 : fixedcost; + pred = encvid->pred_i4[ipmode]; + + cost_i4(org, org_pitch, pred, &cost); + + if (cost < min_cost) + { + currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode; + min_cost = cost; + min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost); + } + } + } + + if (blkidx == 0) + { + encvid->i4_sad = min_sad; + } + else + { + encvid->i4_sad += min_sad; + } + + return min_cost; +} + +int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx) +{ + int dcOnlyPredictionFlag; + AVCMacroblock *currMB = video->currMB; + int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode; + + + dcOnlyPredictionFlag = 0; + if (blkidx&0x3) + { + intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left + } + else /* for blk 0, 4, 8, 12 */ + { + if (video->intraAvailA) + { + if (video->mblock[video->mbAddrA].mbMode == AVC_I4) + { + intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3]; + } + else + { + intra4x4PredModeA = AVC_I4_DC; + } + } + else + { + dcOnlyPredictionFlag = 1; + goto PRED_RESULT_READY; // skip below + } + } + + if (blkidx >> 2) + { + intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above + } + else /* block 0, 1, 2, 3 */ + { + if (video->intraAvailB) + { + if (video->mblock[video->mbAddrB].mbMode == AVC_I4) + { + intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12]; + } + else + { + intra4x4PredModeB = AVC_I4_DC; + } + } + else + { + dcOnlyPredictionFlag = 1; + } + } + +PRED_RESULT_READY: + if (dcOnlyPredictionFlag) + { + intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC; + } + + predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB); + + return predIntra4x4PredMode; +} + +void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost) +{ + int k; + int16 res[16], *pres; + int m0, m1, m2, m3, tmp1; + int satd = 0; + + pres = res; + // horizontal transform + k = 4; + while (k > 0) + { + m0 = org[0] - pred[0]; + m3 = org[3] - pred[3]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = org[1] - pred[1]; + m2 = org[2] - pred[2]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[2] = m0 - m1; + pres[1] = m2 + m3; + pres[3] = m3 - m2; + + org += org_pitch; + pres += 4; + pred += 4; + k--; + } + /* vertical transform */ + pres = res; + k = 4; + while (k > 0) + { + m0 = pres[0]; + m3 = pres[12]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = pres[4]; + m2 = pres[8]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[8] = m0 - m1; + pres[4] = m2 + m3; + pres[12] = m3 - m2; + + pres++; + k--; + + } + + pres = res; + k = 4; + while (k > 0) + { + tmp1 = *pres++; + satd += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + satd += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + satd += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + satd += ((tmp1 >= 0) ? tmp1 : -tmp1); + k--; + } + + satd = (satd + 1) >> 1; + *cost += satd; + + return ; +} + +void chroma_intra_search(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCPictureData *currPic = video->currPic; + + int x_pos = video->mb_x << 3; + int y_pos = video->mb_y << 3; + int pitch = currPic->pitch >> 1; + int offset = y_pos * pitch + x_pos; + + uint8 *comp_ref_x, *comp_ref_y, *pred; + int sum_x0, sum_x1, sum_y0, sum_y1; + int pred_0[2], pred_1[2], pred_2[2], pred_3[2]; + uint32 pred_a, pred_b, pred_c, pred_d; + int i, j, component; + int a_16, b, c, factor_c, topleft; + int H, V, value; + uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1; + + uint8 *curCb = currPic->Scb + offset; + uint8 *curCr = currPic->Scr + offset; + + uint8 *orgCb, *orgCr; + AVCFrameIO *currInput = encvid->currInput; + AVCMacroblock *currMB = video->currMB; + int org_pitch; + int cost, mincost; + + /* evaluate DC mode */ + if (video->intraAvailB & video->intraAvailA) + { + comp_ref_x = curCb - pitch; + comp_ref_y = curCb - 1; + + for (i = 0; i < 2; i++) + { + pred_a = *((uint32*)comp_ref_x); + comp_ref_x += 4; + pred_b = (pred_a >> 8) & 0xFF00FF; + pred_a &= 0xFF00FF; + pred_a += pred_b; + pred_a += (pred_a >> 16); + sum_x0 = pred_a & 0xFFFF; + + pred_a = *((uint32*)comp_ref_x); + pred_b = (pred_a >> 8) & 0xFF00FF; + pred_a &= 0xFF00FF; + pred_a += pred_b; + pred_a += (pred_a >> 16); + sum_x1 = pred_a & 0xFFFF; + + pred_1[i] = (sum_x1 + 2) >> 2; + + sum_y0 = *comp_ref_y; + sum_y0 += *(comp_ref_y += pitch); + sum_y0 += *(comp_ref_y += pitch); + sum_y0 += *(comp_ref_y += pitch); + + sum_y1 = *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + + pred_2[i] = (sum_y1 + 2) >> 2; + + pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3; + pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3; + + comp_ref_x = curCr - pitch; + comp_ref_y = curCr - 1; + } + } + + else if (video->intraAvailA) + { + comp_ref_y = curCb - 1; + for (i = 0; i < 2; i++) + { + sum_y0 = *comp_ref_y; + sum_y0 += *(comp_ref_y += pitch); + sum_y0 += *(comp_ref_y += pitch); + sum_y0 += *(comp_ref_y += pitch); + + sum_y1 = *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + sum_y1 += *(comp_ref_y += pitch); + + pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2; + pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2; + + comp_ref_y = curCr - 1; + } + } + else if (video->intraAvailB) + { + comp_ref_x = curCb - pitch; + for (i = 0; i < 2; i++) + { + pred_a = *((uint32*)comp_ref_x); + comp_ref_x += 4; + pred_b = (pred_a >> 8) & 0xFF00FF; + pred_a &= 0xFF00FF; + pred_a += pred_b; + pred_a += (pred_a >> 16); + sum_x0 = pred_a & 0xFFFF; + + pred_a = *((uint32*)comp_ref_x); + pred_b = (pred_a >> 8) & 0xFF00FF; + pred_a &= 0xFF00FF; + pred_a += pred_b; + pred_a += (pred_a >> 16); + sum_x1 = pred_a & 0xFFFF; + + pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2; + pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2; + + comp_ref_x = curCr - pitch; + } + } + else + { + pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] = + pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128; + } + + pred = encvid->pred_ic[AVC_IC_DC]; + + pred_a = pred_0[0]; + pred_b = pred_1[0]; + pred_a |= (pred_a << 8); + pred_a |= (pred_a << 16); + pred_b |= (pred_b << 8); + pred_b |= (pred_b << 16); + + pred_c = pred_0[1]; + pred_d = pred_1[1]; + pred_c |= (pred_c << 8); + pred_c |= (pred_c << 16); + pred_d |= (pred_d << 8); + pred_d |= (pred_d << 16); + + + for (j = 0; j < 4; j++) /* 4 lines */ + { + *((uint32*)pred) = pred_a; + *((uint32*)(pred + 4)) = pred_b; + *((uint32*)(pred + 8)) = pred_c; + *((uint32*)(pred + 12)) = pred_d; + pred += 16; /* move to the next line */ + } + + pred_a = pred_2[0]; + pred_b = pred_3[0]; + pred_a |= (pred_a << 8); + pred_a |= (pred_a << 16); + pred_b |= (pred_b << 8); + pred_b |= (pred_b << 16); + + pred_c = pred_2[1]; + pred_d = pred_3[1]; + pred_c |= (pred_c << 8); + pred_c |= (pred_c << 16); + pred_d |= (pred_d << 8); + pred_d |= (pred_d << 16); + + for (j = 0; j < 4; j++) /* 4 lines */ + { + *((uint32*)pred) = pred_a; + *((uint32*)(pred + 4)) = pred_b; + *((uint32*)(pred + 8)) = pred_c; + *((uint32*)(pred + 12)) = pred_d; + pred += 16; /* move to the next line */ + } + + /* predict horizontal mode */ + if (video->intraAvailA) + { + comp_ref_y = curCb - 1; + comp_ref_x = curCr - 1; + pred = encvid->pred_ic[AVC_IC_Horizontal]; + + for (i = 4; i < 6; i++) + { + for (j = 0; j < 4; j++) + { + pred_a = *comp_ref_y; + comp_ref_y += pitch; + pred_a |= (pred_a << 8); + pred_a |= (pred_a << 16); + *((uint32*)pred) = pred_a; + *((uint32*)(pred + 4)) = pred_a; + + pred_a = *comp_ref_x; + comp_ref_x += pitch; + pred_a |= (pred_a << 8); + pred_a |= (pred_a << 16); + *((uint32*)(pred + 8)) = pred_a; + *((uint32*)(pred + 12)) = pred_a; + + pred += 16; + } + } + } + + /* vertical mode */ + if (video->intraAvailB) + { + comp_ref_x = curCb - pitch; + comp_ref_y = curCr - pitch; + pred = encvid->pred_ic[AVC_IC_Vertical]; + + pred_a = *((uint32*)comp_ref_x); + pred_b = *((uint32*)(comp_ref_x + 4)); + pred_c = *((uint32*)comp_ref_y); + pred_d = *((uint32*)(comp_ref_y + 4)); + + for (j = 0; j < 8; j++) + { + *((uint32*)pred) = pred_a; + *((uint32*)(pred + 4)) = pred_b; + *((uint32*)(pred + 8)) = pred_c; + *((uint32*)(pred + 12)) = pred_d; + pred += 16; + } + } + + /* Intra_Chroma_Plane */ + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + comp_ref_x = curCb - pitch; + comp_ref_y = curCb - 1; + topleft = curCb[-pitch-1]; + + pred = encvid->pred_ic[AVC_IC_Plane]; + for (component = 0; component < 2; component++) + { + H = V = 0; + comp_ref_x0 = comp_ref_x + 4; + comp_ref_x1 = comp_ref_x + 2; + comp_ref_y0 = comp_ref_y + (pitch << 2); + comp_ref_y1 = comp_ref_y + (pitch << 1); + for (i = 1; i < 4; i++) + { + H += i * (*comp_ref_x0++ - *comp_ref_x1--); + V += i * (*comp_ref_y0 - *comp_ref_y1); + comp_ref_y0 += pitch; + comp_ref_y1 -= pitch; + } + H += i * (*comp_ref_x0++ - topleft); + V += i * (*comp_ref_y0 - *comp_ref_y1); + + a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16; + b = (17 * H + 16) >> 5; + c = (17 * V + 16) >> 5; + + pred_a = 0; + for (i = 4; i < 6; i++) + { + for (j = 0; j < 4; j++) + { + factor_c = a_16 + c * (pred_a++ - 3); + + factor_c -= 3 * b; + + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 16); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 24); + *((uint32*)pred) = pred_b; + + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b = value; + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 8); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 16); + value = factor_c >> 5; + factor_c += b; + CLIP_RESULT(value) + pred_b |= (value << 24); + *((uint32*)(pred + 4)) = pred_b; + pred += 16; + } + } + + pred -= 120; /* point to cr */ + comp_ref_x = curCr - pitch; + comp_ref_y = curCr - 1; + topleft = curCr[-pitch-1]; + } + } + + /* now evaluate it */ + + org_pitch = (currInput->pitch) >> 1; + offset = x_pos + y_pos * org_pitch; + + orgCb = currInput->YCbCr[1] + offset; + orgCr = currInput->YCbCr[2] + offset; + + mincost = 0x7fffffff; + cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost); + if (cost < mincost) + { + mincost = cost; + currMB->intra_chroma_pred_mode = AVC_IC_DC; + } + + if (video->intraAvailA) + { + cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost); + if (cost < mincost) + { + mincost = cost; + currMB->intra_chroma_pred_mode = AVC_IC_Horizontal; + } + } + + if (video->intraAvailB) + { + cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost); + if (cost < mincost) + { + mincost = cost; + currMB->intra_chroma_pred_mode = AVC_IC_Vertical; + } + } + + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost); + if (cost < mincost) + { + mincost = cost; + currMB->intra_chroma_pred_mode = AVC_IC_Plane; + } + } + + + return ; +} + + +int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost) +{ + int cost; + /* first take difference between orgCb, orgCr and pred */ + int16 res[128], *pres; // residue + int m0, m1, m2, m3, tmp1; + int j, k; + + pres = res; + org_pitch -= 8; + // horizontal transform + for (j = 0; j < 8; j++) + { + k = 2; + while (k > 0) + { + m0 = orgCb[0] - pred[0]; + m3 = orgCb[3] - pred[3]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = orgCb[1] - pred[1]; + m2 = orgCb[2] - pred[2]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[2] = m0 - m1; + pres[1] = m2 + m3; + pres[3] = m3 - m2; + + orgCb += 4; + pres += 4; + pred += 4; + k--; + } + orgCb += org_pitch; + k = 2; + while (k > 0) + { + m0 = orgCr[0] - pred[0]; + m3 = orgCr[3] - pred[3]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = orgCr[1] - pred[1]; + m2 = orgCr[2] - pred[2]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[2] = m0 - m1; + pres[1] = m2 + m3; + pres[3] = m3 - m2; + + orgCr += 4; + pres += 4; + pred += 4; + k--; + } + orgCr += org_pitch; + } + + /* vertical transform */ + for (j = 0; j < 2; j++) + { + pres = res + (j << 6); + k = 16; + while (k > 0) + { + m0 = pres[0]; + m3 = pres[3<<4]; + m0 += m3; + m3 = m0 - (m3 << 1); + m1 = pres[1<<4]; + m2 = pres[2<<4]; + m1 += m2; + m2 = m1 - (m2 << 1); + pres[0] = m0 + m1; + pres[2<<4] = m0 - m1; + pres[1<<4] = m2 + m3; + pres[3<<4] = m3 - m2; + + pres++; + k--; + } + } + + /* now sum of absolute value */ + pres = res; + cost = 0; + k = 128; + while (k > 0) + { + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + tmp1 = *pres++; + cost += ((tmp1 >= 0) ? tmp1 : -tmp1); + k -= 8; + if (cost > min_cost) /* early drop out */ + { + return cost; + } + } + + return cost; +} + + + +///////////////////////////////// old code, unused +/* find the best intra mode based on original (unencoded) frame */ +/* output is + currMB->mb_intra, currMB->mbMode, + currMB->i16Mode (if currMB->mbMode == AVC_I16) + currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */ + +#ifdef FIXED_INTRAPRED_MODE +void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum) +{ + (void)(mbNum); + + AVCCommonObj *video = encvid->common; + int indx, block_x, block_y; + + video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0; + + if (!video->currPicParams->constrained_intra_pred_flag) + { + video->intraAvailA = video->mbAvailA; + video->intraAvailB = video->mbAvailB; + video->intraAvailC = video->mbAvailC; + video->intraAvailD = video->mbAvailD; + } + else + { + if (video->mbAvailA) + { + video->intraAvailA = video->mblock[video->mbAddrA].mb_intra; + } + if (video->mbAvailB) + { + video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ; + } + if (video->mbAvailC) + { + video->intraAvailC = video->mblock[video->mbAddrC].mb_intra; + } + if (video->mbAvailD) + { + video->intraAvailD = video->mblock[video->mbAddrD].mb_intra; + } + } + + currMB->mb_intra = TRUE; + currMB->mbMode = FIXED_INTRAPRED_MODE; + + if (currMB->mbMode == AVC_I16) + { + currMB->i16Mode = FIXED_I16_MODE; + + if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB) + { + currMB->i16Mode = AVC_I16_DC; + } + + if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA) + { + currMB->i16Mode = AVC_I16_DC; + } + + if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD)) + { + currMB->i16Mode = AVC_I16_DC; + } + } + else //if(currMB->mbMode == AVC_I4) + { + for (indx = 0; indx < 16; indx++) + { + block_x = blkIdx2blkX[indx]; + block_y = blkIdx2blkY[indx]; + + currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE; + + if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB)) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA)) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left && + (block_y == 0 && !video->intraAvailB)) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right && + !((block_y && block_x) + || (block_y && video->intraAvailA) + || (block_x && video->intraAvailB) + || (video->intraAvailA && video->intraAvailD && video->intraAvailB))) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Vertical_Right && + !((block_y && block_x) + || (block_y && video->intraAvailA) + || (block_x && video->intraAvailB) + || (video->intraAvailA && video->intraAvailD && video->intraAvailB))) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Horizontal_Down && + !((block_y && block_x) + || (block_y && video->intraAvailA) + || (block_x && video->intraAvailB) + || (video->intraAvailA && video->intraAvailD && video->intraAvailB))) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Vertical_Left && + (block_y == 0 && !video->intraAvailB)) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + + if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA)) + { + currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC; + } + } + } + + currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE; + + if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA)) + { + currMB->intra_chroma_pred_mode = AVC_IC_DC; + } + + if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB)) + { + currMB->intra_chroma_pred_mode = AVC_IC_DC; + } + + if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD)) + { + currMB->intra_chroma_pred_mode = AVC_IC_DC; + } + + /* also reset the motion vectors */ + /* set MV and Ref_Idx codes of Intra blocks in P-slices */ + memset(currMB->mvL0, 0, sizeof(int32)*16); + currMB->ref_idx_L0[0] = -1; + currMB->ref_idx_L0[1] = -1; + currMB->ref_idx_L0[2] = -1; + currMB->ref_idx_L0[3] = -1; + + // output from this function, currMB->mbMode should be set to either + // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */ + return ; +} +#else // faster combined prediction+SAD calculation +void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum) +{ + AVCCommonObj *video = encvid->common; + AVCFrameIO *currInput = encvid->currInput; + uint8 *curL, *curCb, *curCr; + uint8 *comp, *pred_block; + int block_x, block_y, offset; + uint sad, sad4, sadI4, sadI16; + int component, SubBlock_indx, temp; + int pitch = video->currPic->pitch; + + /* calculate the cost of each intra prediction mode and compare to the + inter mode */ + /* full search for all intra prediction */ + offset = (video->mb_y << 4) * pitch + (video->mb_x << 4); + curL = currInput->YCbCr[0] + offset; + pred_block = video->pred_block + 84; + + /* Assuming that InitNeighborAvailability has been called prior to this function */ + video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0; + + if (!video->currPicParams->constrained_intra_pred_flag) + { + video->intraAvailA = video->mbAvailA; + video->intraAvailB = video->mbAvailB; + video->intraAvailC = video->mbAvailC; + video->intraAvailD = video->mbAvailD; + } + else + { + if (video->mbAvailA) + { + video->intraAvailA = video->mblock[video->mbAddrA].mb_intra; + } + if (video->mbAvailB) + { + video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ; + } + if (video->mbAvailC) + { + video->intraAvailC = video->mblock[video->mbAddrC].mb_intra; + } + if (video->mbAvailD) + { + video->intraAvailD = video->mblock[video->mbAddrD].mb_intra; + } + } + + /* currently we're doing exhaustive search. Smart search will be used later */ + + /* I16 modes */ + curL = currInput->YCbCr[0] + offset; + video->pintra_pred_top = curL - pitch; + video->pintra_pred_left = curL - 1; + if (video->mb_y) + { + video->intra_pred_topleft = *(curL - pitch - 1); + } + + /* Intra_16x16_Vertical */ + sadI16 = 65536; + /* check availability of top */ + if (video->intraAvailB) + { + sad = SAD_I16_Vert(video, curL, sadI16); + + if (sad < sadI16) + { + sadI16 = sad; + currMB->i16Mode = AVC_I16_Vertical; + } + } + /* Intra_16x16_Horizontal */ + /* check availability of left */ + if (video->intraAvailA) + { + sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16); + + if (sad < sadI16) + { + sadI16 = sad; + currMB->i16Mode = AVC_I16_Horizontal; + } + } + + /* Intra_16x16_DC, default mode */ + sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16); + if (sad < sadI16) + { + sadI16 = sad; + currMB->i16Mode = AVC_I16_DC; + } + + /* Intra_16x16_Plane */ + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + sad = SAD_I16_Plane(video, curL, sadI16); + + if (sad < sadI16) + { + sadI16 = sad; + currMB->i16Mode = AVC_I16_Plane; + } + } + + sadI16 >>= 1; /* before comparison */ + + /* selection between intra4, intra16 or inter mode */ + if (sadI16 < encvid->min_cost) + { + currMB->mb_intra = TRUE; + currMB->mbMode = AVC_I16; + encvid->min_cost = sadI16; + } + + if (currMB->mb_intra) /* only do the chrominance search when intra is decided */ + { + /* Note that we might be able to guess the type of prediction from + the luma prediction type */ + + /* now search for the best chroma intra prediction */ + offset = (offset >> 2) + (video->mb_x << 2); + curCb = currInput->YCbCr[1] + offset; + curCr = currInput->YCbCr[2] + offset; + + pitch >>= 1; + video->pintra_pred_top_cb = curCb - pitch; + video->pintra_pred_left_cb = curCb - 1; + video->pintra_pred_top_cr = curCr - pitch; + video->pintra_pred_left_cr = curCr - 1; + + if (video->mb_y) + { + video->intra_pred_topleft_cb = *(curCb - pitch - 1); + video->intra_pred_topleft_cr = *(curCr - pitch - 1); + } + + /* Intra_Chroma_DC */ + sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536); + currMB->intra_chroma_pred_mode = AVC_IC_DC; + + /* Intra_Chroma_Horizontal */ + if (video->intraAvailA) + { + /* check availability of left */ + sad = SAD_Chroma_Horz(video, curCb, curCr, sad4); + if (sad < sad4) + { + sad4 = sad; + currMB->intra_chroma_pred_mode = AVC_IC_Horizontal; + } + } + + /* Intra_Chroma_Vertical */ + if (video->intraAvailB) + { + /* check availability of top */ + sad = SAD_Chroma_Vert(video, curCb, curCr, sad4); + + if (sad < sad4) + { + sad4 = sad; + currMB->intra_chroma_pred_mode = AVC_IC_Vertical; + } + } + + /* Intra_Chroma_Plane */ + if (video->intraAvailA && video->intraAvailB && video->intraAvailD) + { + /* check availability of top and left */ + Intra_Chroma_Plane(video, pitch); + + sad = SADChroma(pred_block + 452, curCb, curCr, pitch); + + if (sad < sad4) + { + sad4 = sad; + currMB->intra_chroma_pred_mode = AVC_IC_Plane; + } + } + + /* also reset the motion vectors */ + /* set MV and Ref_Idx codes of Intra blocks in P-slices */ + memset(currMB->mvL0, 0, sizeof(int32)*16); + memset(currMB->ref_idx_L0, -1, sizeof(int16)*4); + + } + + // output from this function, currMB->mbMode should be set to either + // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */ + + return ; +} +#endif + + diff --git a/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp new file mode 100644 index 0000000..ac62d78 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp @@ -0,0 +1,2156 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +#include "avcenc_int.h" + + +#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ + x = 0xFF & (~(x>>31));} + +/* (blkwidth << 2) + (dy << 1) + dx */ +static void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) = +{ + &eChromaFullMC_SIMD, + &eChromaHorizontalMC_SIMD, + &eChromaVerticalMC_SIMD, + &eChromaDiagonalMC_SIMD, + &eChromaFullMC_SIMD, + &eChromaHorizontalMC2_SIMD, + &eChromaVerticalMC2_SIMD, + &eChromaDiagonalMC2_SIMD +}; +/* Perform motion prediction and compensation with residue if exist. */ +void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video) +{ + (void)(encvid); + + AVCMacroblock *currMB = video->currMB; + AVCPictureData *currPic = video->currPic; + int mbPartIdx, subMbPartIdx; + int ref_idx; + int offset_MbPart_indx = 0; + int16 *mv; + uint32 x_pos, y_pos; + uint8 *curL, *curCb, *curCr; + uint8 *ref_l, *ref_Cb, *ref_Cr; + uint8 *predBlock, *predCb, *predCr; + int block_x, block_y, offset_x, offset_y, offsetP, offset; + int x_position = (video->mb_x << 4); + int y_position = (video->mb_y << 4); + int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx; + int picWidth = currPic->width; + int picPitch = currPic->pitch; + int picHeight = currPic->height; + uint32 tmp_word; + + tmp_word = y_position * picPitch; + curL = currPic->Sl + tmp_word + x_position; + offset = (tmp_word >> 2) + (x_position >> 1); + curCb = currPic->Scb + offset; + curCr = currPic->Scr + offset; + + predBlock = curL; + predCb = curCb; + predCr = curCr; + + GetMotionVectorPredictor(video, 1); + + for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) + { + MbHeight = currMB->SubMbPartHeight[mbPartIdx]; + MbWidth = currMB->SubMbPartWidth[mbPartIdx]; + mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1); + mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1; + ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X]; + offset_indx = 0; + + ref_l = video->RefPicList0[ref_idx]->Sl; + ref_Cb = video->RefPicList0[ref_idx]->Scb; + ref_Cr = video->RefPicList0[ref_idx]->Scr; + + for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) + { + block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1); + block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1); + mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2)); + offset_x = x_position + (block_x << 2); + offset_y = y_position + (block_y << 2); + x_pos = (offset_x << 2) + *mv++; /*quarter pel */ + y_pos = (offset_y << 2) + *mv; /*quarter pel */ + + //offset = offset_y * currPic->width; + //offsetC = (offset >> 2) + (offset_x >> 1); + offsetP = (block_y << 2) * picPitch + (block_x << 2); + eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos, + /*comp_Sl + offset + offset_x,*/ + predBlock + offsetP, picPitch, MbWidth, MbHeight); + + offsetP = (block_y * picWidth) + (block_x << 1); + eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos, + /*comp_Scb + offsetC,*/ + predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); + eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos, + /*comp_Scr + offsetC,*/ + predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); + + offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3; + } + offset_MbPart_indx = currMB->MbPartWidth >> 4; + } + + return ; +} + + +/* preform the actual motion comp here */ +void eLumaMotionComp(uint8 *ref, int picpitch, int picheight, + int x_pos, int y_pos, + uint8 *pred, int pred_pitch, + int blkwidth, int blkheight) +{ + (void)(picheight); + + int dx, dy; + int temp2[21][21]; /* for intermediate results */ + uint8 *ref2; + + dx = x_pos & 3; + dy = y_pos & 3; + x_pos = x_pos >> 2; /* round it to full-pel resolution */ + y_pos = y_pos >> 2; + + /* perform actual motion compensation */ + if (dx == 0 && dy == 0) + { /* fullpel position *//* G */ + + ref += y_pos * picpitch + x_pos; + + eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight); + + } /* other positions */ + else if (dy == 0) + { /* no vertical interpolation *//* a,b,c*/ + + ref += y_pos * picpitch + x_pos; + + eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx); + } + else if (dx == 0) + { /*no horizontal interpolation *//* d,h,n */ + + ref += y_pos * picpitch + x_pos; + + eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy); + } + else if (dy == 2) + { /* horizontal cross *//* i, j, k */ + + ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */ + + eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight); + + eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx); + } + else if (dx == 2) + { /* vertical cross */ /* f,q */ + + ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */ + + eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5); + eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy); + } + else + { /* diagonal *//* e,g,p,r */ + + ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos; + + ref += (y_pos * picpitch) + x_pos + (dx / 2); + + eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight); + } + + return ; +} + +void eCreateAlign(uint8 *ref, int picpitch, int y_pos, + uint8 *out, int blkwidth, int blkheight) +{ + int i, j; + int offset, out_offset; + uint32 prev_pix, result, pix1, pix2, pix4; + + ref += y_pos * picpitch;// + x_pos; + out_offset = 24 - blkwidth; + + //switch(x_pos&0x3){ + switch (((uint32)ref)&0x3) + { + case 1: + offset = picpitch - blkwidth - 3; + for (j = 0; j < blkheight; j++) + { + pix1 = *ref++; + pix2 = *((uint16*)ref); + ref += 2; + result = (pix2 << 8) | pix1; + + for (i = 3; i < blkwidth; i += 4) + { + pix4 = *((uint32*)ref); + ref += 4; + prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */ + result |= prev_pix; + *((uint32*)out) = result; /* write 4 bytes */ + out += 4; + result = pix4 >> 8; /* for the next loop */ + } + ref += offset; + out += out_offset; + } + break; + case 2: + offset = picpitch - blkwidth - 2; + for (j = 0; j < blkheight; j++) + { + result = *((uint16*)ref); + ref += 2; + for (i = 2; i < blkwidth; i += 4) + { + pix4 = *((uint32*)ref); + ref += 4; + prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */ + result |= prev_pix; + *((uint32*)out) = result; /* write 4 bytes */ + out += 4; + result = pix4 >> 16; /* for the next loop */ + } + ref += offset; + out += out_offset; + } + break; + case 3: + offset = picpitch - blkwidth - 1; + for (j = 0; j < blkheight; j++) + { + result = *ref++; + for (i = 1; i < blkwidth; i += 4) + { + pix4 = *((uint32*)ref); + ref += 4; + prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */ + result |= prev_pix; + *((uint32*)out) = result; /* write 4 bytes */ + out += 4; + result = pix4 >> 24; /* for the next loop */ + } + ref += offset; + out += out_offset; + } + break; + } +} + +void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dx) +{ + uint8 *p_ref; + uint32 *p_cur; + uint32 tmp, pkres; + int result, curr_offset, ref_offset; + int j; + int32 r0, r1, r2, r3, r4, r5; + int32 r13, r6; + + p_cur = (uint32*)out; /* assume it's word aligned */ + curr_offset = (outpitch - blkwidth) >> 2; + p_ref = in; + ref_offset = inpitch - blkwidth; + + if (dx&1) + { + dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ + p_ref -= 2; + r13 = 0; + for (j = blkheight; j > 0; j--) + { + tmp = (uint32)(p_ref + blkwidth); + r0 = p_ref[0]; + r1 = p_ref[2]; + r0 |= (r1 << 16); /* 0,c,0,a */ + r1 = p_ref[1]; + r2 = p_ref[3]; + r1 |= (r2 << 16); /* 0,d,0,b */ + while ((uint32)p_ref < tmp) + { + r2 = *(p_ref += 4); /* move pointer to e */ + r3 = p_ref[2]; + r2 |= (r3 << 16); /* 0,g,0,e */ + r3 = p_ref[1]; + r4 = p_ref[3]; + r3 |= (r4 << 16); /* 0,h,0,f */ + + r4 = r0 + r3; /* c+h, a+f */ + r5 = r0 + r1; /* c+d, a+b */ + r6 = r2 + r3; /* g+h, e+f */ + r5 >>= 16; + r5 |= (r6 << 16); /* e+f, c+d */ + r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ + r4 += 0x100010; /* +16, +16 */ + r5 = r1 + r2; /* d+g, b+e */ + r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ + r4 >>= 5; + r13 |= r4; /* check clipping */ + + r5 = p_ref[dx+2]; + r6 = p_ref[dx+4]; + r5 |= (r6 << 16); + r4 += r5; + r4 += 0x10001; + r4 = (r4 >> 1) & 0xFF00FF; + + r5 = p_ref[4]; /* i */ + r6 = (r5 << 16); + r5 = r6 | (r2 >> 16);/* 0,i,0,g */ + r5 += r1; /* d+i, b+g */ /* r5 not free */ + r1 >>= 16; + r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ + r1 += r2; /* f+g, d+e */ + r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ + r0 >>= 16; + r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ + r0 += r3; /* e+h, c+f */ + r5 += 0x100010; /* 16,16 */ + r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ + r5 >>= 5; + r13 |= r5; /* check clipping */ + + r0 = p_ref[dx+3]; + r1 = p_ref[dx+5]; + r0 |= (r1 << 16); + r5 += r0; + r5 += 0x10001; + r5 = (r5 >> 1) & 0xFF00FF; + + r4 |= (r5 << 8); /* pack them together */ + *p_cur++ = r4; + r1 = r3; + r0 = r2; + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ + + if (r13&0xFF000700) /* need clipping */ + { + /* move back to the beginning of the line */ + p_ref -= (ref_offset + blkwidth); /* input */ + p_cur -= (outpitch >> 2); + + tmp = (uint32)(p_ref + blkwidth); + for (; (uint32)p_ref < tmp;) + { + + r0 = *p_ref++; + r1 = *p_ref++; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dx] + 1); + pkres = (result >> 1) ; + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dx] + 1); + result = (result >> 1); + pkres |= (result << 8); + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dx] + 1); + result = (result >> 1); + pkres |= (result << 16); + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dx] + 1); + result = (result >> 1); + pkres |= (result << 24); + *p_cur++ = pkres; /* write 4 pixels */ + p_ref -= 5; /* offset back to the middle of filter */ + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; /* move to the next line */ + } + } + } + else + { + p_ref -= 2; + r13 = 0; + for (j = blkheight; j > 0; j--) + { + tmp = (uint32)(p_ref + blkwidth); + r0 = p_ref[0]; + r1 = p_ref[2]; + r0 |= (r1 << 16); /* 0,c,0,a */ + r1 = p_ref[1]; + r2 = p_ref[3]; + r1 |= (r2 << 16); /* 0,d,0,b */ + while ((uint32)p_ref < tmp) + { + r2 = *(p_ref += 4); /* move pointer to e */ + r3 = p_ref[2]; + r2 |= (r3 << 16); /* 0,g,0,e */ + r3 = p_ref[1]; + r4 = p_ref[3]; + r3 |= (r4 << 16); /* 0,h,0,f */ + + r4 = r0 + r3; /* c+h, a+f */ + r5 = r0 + r1; /* c+d, a+b */ + r6 = r2 + r3; /* g+h, e+f */ + r5 >>= 16; + r5 |= (r6 << 16); /* e+f, c+d */ + r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ + r4 += 0x100010; /* +16, +16 */ + r5 = r1 + r2; /* d+g, b+e */ + r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ + r4 >>= 5; + r13 |= r4; /* check clipping */ + r4 &= 0xFF00FF; /* mask */ + + r5 = p_ref[4]; /* i */ + r6 = (r5 << 16); + r5 = r6 | (r2 >> 16);/* 0,i,0,g */ + r5 += r1; /* d+i, b+g */ /* r5 not free */ + r1 >>= 16; + r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ + r1 += r2; /* f+g, d+e */ + r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ + r0 >>= 16; + r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ + r0 += r3; /* e+h, c+f */ + r5 += 0x100010; /* 16,16 */ + r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ + r5 >>= 5; + r13 |= r5; /* check clipping */ + r5 &= 0xFF00FF; /* mask */ + + r4 |= (r5 << 8); /* pack them together */ + *p_cur++ = r4; + r1 = r3; + r0 = r2; + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ + + if (r13&0xFF000700) /* need clipping */ + { + /* move back to the beginning of the line */ + p_ref -= (ref_offset + blkwidth); /* input */ + p_cur -= (outpitch >> 2); + + tmp = (uint32)(p_ref + blkwidth); + for (; (uint32)p_ref < tmp;) + { + + r0 = *p_ref++; + r1 = *p_ref++; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres = result; + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 8); + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 16); + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 24); + *p_cur++ = pkres; /* write 4 pixels */ + p_ref -= 5; + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; + } + } + } + + return ; +} + +void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dx) +{ + int *p_ref; + uint32 *p_cur; + uint32 tmp, pkres; + int result, result2, curr_offset, ref_offset; + int j, r0, r1, r2, r3, r4, r5; + + p_cur = (uint32*)out; /* assume it's word aligned */ + curr_offset = (outpitch - blkwidth) >> 2; + p_ref = in; + ref_offset = inpitch - blkwidth; + + if (dx&1) + { + dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ + + for (j = blkheight; j > 0 ; j--) + { + tmp = (uint32)(p_ref + blkwidth); + for (; (uint32)p_ref < tmp;) + { + + r0 = p_ref[-2]; + r1 = p_ref[-1]; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dx] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + pkres = (result >> 1); + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dx] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + pkres |= (result << 8); + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dx] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + pkres |= (result << 16); + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dx] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + pkres |= (result << 24); + *p_cur++ = pkres; /* write 4 pixels */ + p_ref -= 3; /* offset back to the middle of filter */ + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; /* move to the next line */ + } + } + else + { + for (j = blkheight; j > 0 ; j--) + { + tmp = (uint32)(p_ref + blkwidth); + for (; (uint32)p_ref < tmp;) + { + + r0 = p_ref[-2]; + r1 = p_ref[-1]; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + pkres = result; + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + pkres |= (result << 8); + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + pkres |= (result << 16); + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + pkres |= (result << 24); + *p_cur++ = pkres; /* write 4 pixels */ + p_ref -= 3; /* offset back to the middle of filter */ + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; /* move to the next line */ + } + } + + return ; +} + +void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch, + int blkwidth, int blkheight) +{ + uint8 *p_ref; + int *p_cur; + uint32 tmp; + int result, curr_offset, ref_offset; + int j, r0, r1, r2, r3, r4, r5; + + p_cur = out; + curr_offset = (outpitch - blkwidth); + p_ref = in; + ref_offset = inpitch - blkwidth; + + for (j = blkheight; j > 0 ; j--) + { + tmp = (uint32)(p_ref + blkwidth); + for (; (uint32)p_ref < tmp;) + { + + r0 = p_ref[-2]; + r1 = p_ref[-1]; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + *p_cur++ = result; + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + *p_cur++ = result; + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + *p_cur++ = result; + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + *p_cur++ = result; + p_ref -= 3; /* move back to the middle of the filter */ + } + p_cur += curr_offset; /* move to the next line */ + p_ref += ref_offset; + } + + return ; +} +void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dy) +{ + uint8 *p_cur, *p_ref; + uint32 tmp; + int result, curr_offset, ref_offset; + int j, i; + int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13; + uint8 tmp_in[24][24]; + + /* not word-aligned */ + if (((uint32)in)&0x3) + { + eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); + in = &tmp_in[2][0]; + inpitch = 24; + } + p_cur = out; + curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ + ref_offset = blkheight * inpitch; /* for limit */ + + curr_offset += 3; + + if (dy&1) + { + dy = (dy >> 1) ? 0 : -inpitch; + + for (j = 0; j < blkwidth; j += 4, in += 4) + { + r13 = 0; + p_ref = in; + p_cur -= outpitch; /* compensate for the first offset */ + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) /* the loop un-rolled */ + { + r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ + p_ref += inpitch; + r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ + r0 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + + r0 += r1; + r6 += r7; + + r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 += 20 * r1; + r6 += 20 * r7; + r0 += 0x100010; + r6 += 0x100010; + + r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 -= 5 * r1; + r6 -= 5 * r7; + + r0 >>= 5; + r6 >>= 5; + /* clip */ + r13 |= r6; + r13 |= r0; + //CLIPPACK(r6,result) + + r1 = *((uint32*)(p_ref + dy)); + r2 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r0 += r1; + r6 += r2; + r0 += 0x10001; + r6 += 0x10001; + r0 = (r0 >> 1) & 0xFF00FF; + r6 = (r6 >> 1) & 0xFF00FF; + + r0 |= (r6 << 8); /* pack it back */ + *((uint32*)(p_cur += outpitch)) = r0; + } + p_cur += curr_offset; /* offset to the next pixel */ + if (r13 & 0xFF000700) /* this column need clipping */ + { + p_cur -= 4; + for (i = 0; i < 4; i++) + { + p_ref = in + i; + p_cur -= outpitch; /* compensate for the first offset */ + + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) + { /* loop un-rolled */ + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dy-(inpitch<<1)] + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dy-(inpitch<<1)] + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dy-(inpitch<<1)] + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + /* 3/4 pel, no need to clip */ + result = (result + p_ref[dy-(inpitch<<1)] + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += (curr_offset - 3); + } + } + } + } + else + { + for (j = 0; j < blkwidth; j += 4, in += 4) + { + r13 = 0; + p_ref = in; + p_cur -= outpitch; /* compensate for the first offset */ + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) /* the loop un-rolled */ + { + r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ + p_ref += inpitch; + r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ + r0 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + + r0 += r1; + r6 += r7; + + r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 += 20 * r1; + r6 += 20 * r7; + r0 += 0x100010; + r6 += 0x100010; + + r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 -= 5 * r1; + r6 -= 5 * r7; + + r0 >>= 5; + r6 >>= 5; + /* clip */ + r13 |= r6; + r13 |= r0; + //CLIPPACK(r6,result) + r0 &= 0xFF00FF; + r6 &= 0xFF00FF; + r0 |= (r6 << 8); /* pack it back */ + *((uint32*)(p_cur += outpitch)) = r0; + } + p_cur += curr_offset; /* offset to the next pixel */ + if (r13 & 0xFF000700) /* this column need clipping */ + { + p_cur -= 4; + for (i = 0; i < 4; i++) + { + p_ref = in + i; + p_cur -= outpitch; /* compensate for the first offset */ + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) + { /* loop un-rolled */ + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += (curr_offset - 3); + } + } + } + } + + return ; +} + +void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch, + int blkwidth, int blkheight) +{ + int *p_cur; + uint8 *p_ref; + uint32 tmp; + int result, curr_offset, ref_offset; + int j, r0, r1, r2, r3, r4, r5; + + p_cur = out; + curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ + ref_offset = blkheight * inpitch; /* for limit */ + + for (j = 0; j < blkwidth; j++) + { + p_cur -= outpitch; /* compensate for the first offset */ + p_ref = in++; + + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) + { /* loop un-rolled */ + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += curr_offset; + } + + return ; +} + +void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight, int dy) +{ + uint8 *p_cur; + int *p_ref; + uint32 tmp; + int result, result2, curr_offset, ref_offset; + int j, r0, r1, r2, r3, r4, r5; + + p_cur = out; + curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ + ref_offset = blkheight * inpitch; /* for limit */ + + if (dy&1) + { + dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch; + + for (j = 0; j < blkwidth; j++) + { + p_cur -= outpitch; /* compensate for the first offset */ + p_ref = in++; + + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) + { /* loop un-rolled */ + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dy] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dy] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dy] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + result2 = ((p_ref[dy] + 16) >> 5); + CLIP_RESULT(result2) + /* 3/4 pel, no need to clip */ + result = (result + result2 + 1); + result = (result >> 1); + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += curr_offset; + } + } + else + { + for (j = 0; j < blkwidth; j++) + { + p_cur -= outpitch; /* compensate for the first offset */ + p_ref = in++; + + tmp = (uint32)(p_ref + ref_offset); /* limit */ + while ((uint32)p_ref < tmp) + { /* loop un-rolled */ + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 512) >> 10; + CLIP_RESULT(result) + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += curr_offset; + } + } + + return ; +} + +void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch, + uint8 *out, int outpitch, + int blkwidth, int blkheight) +{ + int j, i; + int result; + uint8 *p_cur, *p_ref, *p_tmp8; + int curr_offset, ref_offset; + uint8 tmp_res[24][24], tmp_in[24][24]; + uint32 *p_tmp; + uint32 tmp, pkres, tmp_result; + int32 r0, r1, r2, r3, r4, r5; + int32 r6, r7, r8, r9, r10, r13; + + ref_offset = inpitch - blkwidth; + p_ref = in1 - 2; + /* perform horizontal interpolation */ + /* not word-aligned */ + /* It is faster to read 1 byte at time to avoid calling CreateAlign */ + /* if(((uint32)p_ref)&0x3) + { + CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight); + p_ref = &tmp_in[0][0]; + ref_offset = 24-blkwidth; + }*/ + + p_tmp = (uint32*) & (tmp_res[0][0]); + for (j = blkheight; j > 0; j--) + { + r13 = 0; + tmp = (uint32)(p_ref + blkwidth); + + //r0 = *((uint32*)p_ref); /* d,c,b,a */ + //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */ + //r0 &= 0xFF00FF; /* 0,c,0,a */ + /* It is faster to read 1 byte at a time */ + r0 = p_ref[0]; + r1 = p_ref[2]; + r0 |= (r1 << 16); /* 0,c,0,a */ + r1 = p_ref[1]; + r2 = p_ref[3]; + r1 |= (r2 << 16); /* 0,d,0,b */ + + while ((uint32)p_ref < tmp) + { + //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */ + //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */ + //r2 &= 0xFF00FF; /* 0,g,0,e */ + /* It is faster to read 1 byte at a time */ + r2 = *(p_ref += 4); + r3 = p_ref[2]; + r2 |= (r3 << 16); /* 0,g,0,e */ + r3 = p_ref[1]; + r4 = p_ref[3]; + r3 |= (r4 << 16); /* 0,h,0,f */ + + r4 = r0 + r3; /* c+h, a+f */ + r5 = r0 + r1; /* c+d, a+b */ + r6 = r2 + r3; /* g+h, e+f */ + r5 >>= 16; + r5 |= (r6 << 16); /* e+f, c+d */ + r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ + r4 += 0x100010; /* +16, +16 */ + r5 = r1 + r2; /* d+g, b+e */ + r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ + r4 >>= 5; + r13 |= r4; /* check clipping */ + r4 &= 0xFF00FF; /* mask */ + + r5 = p_ref[4]; /* i */ + r6 = (r5 << 16); + r5 = r6 | (r2 >> 16);/* 0,i,0,g */ + r5 += r1; /* d+i, b+g */ /* r5 not free */ + r1 >>= 16; + r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ + r1 += r2; /* f+g, d+e */ + r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ + r0 >>= 16; + r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ + r0 += r3; /* e+h, c+f */ + r5 += 0x100010; /* 16,16 */ + r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ + r5 >>= 5; + r13 |= r5; /* check clipping */ + r5 &= 0xFF00FF; /* mask */ + + r4 |= (r5 << 8); /* pack them together */ + *p_tmp++ = r4; + r1 = r3; + r0 = r2; + } + p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ + p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ + + if (r13&0xFF000700) /* need clipping */ + { + /* move back to the beginning of the line */ + p_ref -= (ref_offset + blkwidth); /* input */ + p_tmp -= 6; /* intermediate output */ + tmp = (uint32)(p_ref + blkwidth); + while ((uint32)p_ref < tmp) + { + r0 = *p_ref++; + r1 = *p_ref++; + r2 = *p_ref++; + r3 = *p_ref++; + r4 = *p_ref++; + /* first pixel */ + r5 = *p_ref++; + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres = result; + /* second pixel */ + r0 = *p_ref++; + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 8); + /* third pixel */ + r1 = *p_ref++; + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 16); + /* fourth pixel */ + r2 = *p_ref++; + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + pkres |= (result << 24); + + *p_tmp++ = pkres; /* write 4 pixel */ + p_ref -= 5; + } + p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ + p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ + } + } + + /* perform vertical interpolation */ + /* not word-aligned */ + if (((uint32)in2)&0x3) + { + eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); + in2 = &tmp_in[2][0]; + inpitch = 24; + } + + p_cur = out; + curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */ + pkres = blkheight * inpitch; /* reuse it for limit */ + + curr_offset += 3; + + for (j = 0; j < blkwidth; j += 4, in2 += 4) + { + r13 = 0; + p_ref = in2; + p_tmp8 = &(tmp_res[0][j]); /* intermediate result */ + p_tmp8 -= 24; /* compensate for the first offset */ + p_cur -= outpitch; /* compensate for the first offset */ + tmp = (uint32)(p_ref + pkres); /* limit */ + while ((uint32)p_ref < tmp) /* the loop un-rolled */ + { + /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */ + /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2]; + r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3]; + r6 |= (r1<<16); p_ref+=inpitch; */ + r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ + p_ref += inpitch; + r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ + r0 &= 0xFF00FF; + + /*p_ref8 = p_ref+(inpitch<<1); + r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16); + r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/ + r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + + r0 += r1; + r6 += r7; + + /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16); + r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/ + r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; + r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; + r2 = p_ref8[3]; r7 |= (r2<<16);*/ + r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 += 20 * r1; + r6 += 20 * r7; + r0 += 0x100010; + r6 += 0x100010; + + /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2]; + r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/ + r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ + r8 = (r2 >> 8) & 0xFF00FF; + r2 &= 0xFF00FF; + + /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; + r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; + r2 = p_ref8[3]; r7 |= (r2<<16);*/ + r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ + r7 = (r1 >> 8) & 0xFF00FF; + r1 &= 0xFF00FF; + r1 += r2; + + r7 += r8; + + r0 -= 5 * r1; + r6 -= 5 * r7; + + r0 >>= 5; + r6 >>= 5; + /* clip */ + r13 |= r6; + r13 |= r0; + //CLIPPACK(r6,result) + /* add with horizontal results */ + r10 = *((uint32*)(p_tmp8 += 24)); + r9 = (r10 >> 8) & 0xFF00FF; + r10 &= 0xFF00FF; + + r0 += r10; + r0 += 0x10001; + r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */ + + r6 += r9; + r6 += 0x10001; + r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */ + + r0 |= (r6 << 8); /* pack it back */ + *((uint32*)(p_cur += outpitch)) = r0; + } + p_cur += curr_offset; /* offset to the next pixel */ + if (r13 & 0xFF000700) /* this column need clipping */ + { + p_cur -= 4; + for (i = 0; i < 4; i++) + { + p_ref = in2 + i; + p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */ + p_tmp8 -= 24; /* compensate for the first offset */ + p_cur -= outpitch; /* compensate for the first offset */ + tmp = (uint32)(p_ref + pkres); /* limit */ + while ((uint32)p_ref < tmp) /* the loop un-rolled */ + { + r0 = *(p_ref - (inpitch << 1)); + r1 = *(p_ref - inpitch); + r2 = *p_ref; + r3 = *(p_ref += inpitch); /* modify pointer before loading */ + r4 = *(p_ref += inpitch); + /* first pixel */ + r5 = *(p_ref += inpitch); + result = (r0 + r5); + r0 = (r1 + r4); + result -= (r0 * 5);//result -= r0; result -= (r0<<2); + r0 = (r2 + r3); + result += (r0 * 20);//result += (r0<<4); result += (r0<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */ + result = (result + tmp_result + 1); /* no clip */ + result = (result >> 1); + *(p_cur += outpitch) = result; + /* second pixel */ + r0 = *(p_ref += inpitch); + result = (r1 + r0); + r1 = (r2 + r5); + result -= (r1 * 5);//result -= r1; result -= (r1<<2); + r1 = (r3 + r4); + result += (r1 * 20);//result += (r1<<4); result += (r1<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + tmp_result = *(p_tmp8 += 24); /* intermediate result */ + result = (result + tmp_result + 1); /* no clip */ + result = (result >> 1); + *(p_cur += outpitch) = result; + /* third pixel */ + r1 = *(p_ref += inpitch); + result = (r2 + r1); + r2 = (r3 + r0); + result -= (r2 * 5);//result -= r2; result -= (r2<<2); + r2 = (r4 + r5); + result += (r2 * 20);//result += (r2<<4); result += (r2<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + tmp_result = *(p_tmp8 += 24); /* intermediate result */ + result = (result + tmp_result + 1); /* no clip */ + result = (result >> 1); + *(p_cur += outpitch) = result; + /* fourth pixel */ + r2 = *(p_ref += inpitch); + result = (r3 + r2); + r3 = (r4 + r1); + result -= (r3 * 5);//result -= r3; result -= (r3<<2); + r3 = (r5 + r0); + result += (r3 * 20);//result += (r3<<4); result += (r3<<2); + result = (result + 16) >> 5; + CLIP_RESULT(result) + tmp_result = *(p_tmp8 += 24); /* intermediate result */ + result = (result + tmp_result + 1); /* no clip */ + result = (result >> 1); + *(p_cur += outpitch) = result; + p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ + } + p_cur += (curr_offset - 3); + } + } + } + + return ; +} + +/* position G */ +void eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch, + int blkwidth, int blkheight) +{ + int i, j; + int offset_in = inpitch - blkwidth; + int offset_out = outpitch - blkwidth; + uint32 temp; + uint8 byte; + + if (((uint32)in)&3) + { + for (j = blkheight; j > 0; j--) + { + for (i = blkwidth; i > 0; i -= 4) + { + temp = *in++; + byte = *in++; + temp |= (byte << 8); + byte = *in++; + temp |= (byte << 16); + byte = *in++; + temp |= (byte << 24); + + *((uint32*)out) = temp; /* write 4 bytes */ + out += 4; + } + out += offset_out; + in += offset_in; + } + } + else + { + for (j = blkheight; j > 0; j--) + { + for (i = blkwidth; i > 0; i -= 4) + { + temp = *((uint32*)in); + *((uint32*)out) = temp; + in += 4; + out += 4; + } + out += offset_out; + in += offset_in; + } + } + return ; +} + +void ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos) +{ + int pad_height; + int pad_width; + uint8 *start; + uint32 word1, word2, word3; + int offset, j; + + + pad_height = 8 + ((y_pos & 7) ? 1 : 0); + pad_width = 8 + ((x_pos & 7) ? 1 : 0); + + y_pos >>= 3; + x_pos >>= 3; + // pad vertical first + if (y_pos < 0) // need to pad up + { + if (x_pos < -8) start = ref - 8; + else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width; + else start = ref + x_pos; + + /* word-align start */ + offset = (uint32)start & 0x3; + if (offset) start -= offset; + + word1 = *((uint32*)start); + word2 = *((uint32*)(start + 4)); + word3 = *((uint32*)(start + 8)); + + /* pad up N rows */ + j = -y_pos; + if (j > 8) j = 8; + while (j--) + { + *((uint32*)(start -= picpitch)) = word1; + *((uint32*)(start + 4)) = word2; + *((uint32*)(start + 8)) = word3; + } + + } + else if (y_pos + pad_height >= picheight) /* pad down */ + { + if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8; + else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) + + picwidth + 7 - pad_width; + else start = ref + picpitch * (picheight - 1) + x_pos; + + /* word-align start */ + offset = (uint32)start & 0x3; + if (offset) start -= offset; + + word1 = *((uint32*)start); + word2 = *((uint32*)(start + 4)); + word3 = *((uint32*)(start + 8)); + + /* pad down N rows */ + j = y_pos + pad_height - picheight; + if (j > 8) j = 8; + while (j--) + { + *((uint32*)(start += picpitch)) = word1; + *((uint32*)(start + 4)) = word2; + *((uint32*)(start + 8)) = word3; + } + } + + /* now pad horizontal */ + if (x_pos < 0) // pad left + { + if (y_pos < -8) start = ref - (picpitch << 3); + else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch; + else start = ref + y_pos * picpitch; + + // now pad left 8 pixels for pad_height rows */ + j = pad_height; + start -= picpitch; + while (j--) + { + word1 = *(start += picpitch); + word1 |= (word1 << 8); + word1 |= (word1 << 16); + *((uint32*)(start - 8)) = word1; + *((uint32*)(start - 4)) = word1; + } + } + else if (x_pos + pad_width >= picwidth) /* pad right */ + { + if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1; + else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1; + else start = ref + y_pos * picpitch + picwidth - 1; + + // now pad right 8 pixels for pad_height rows */ + j = pad_height; + start -= picpitch; + while (j--) + { + word1 = *(start += picpitch); + word1 |= (word1 << 8); + word1 |= (word1 << 16); + *((uint32*)(start + 1)) = word1; + *((uint32*)(start + 5)) = word1; + } + } + + return ; +} + + +void eChromaMotionComp(uint8 *ref, int picwidth, int picheight, + int x_pos, int y_pos, + uint8 *pred, int picpitch, + int blkwidth, int blkheight) +{ + int dx, dy; + int offset_dx, offset_dy; + int index; + + ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos); + + dx = x_pos & 7; + dy = y_pos & 7; + offset_dx = (dx + 7) >> 3; + offset_dy = (dy + 7) >> 3; + x_pos = x_pos >> 3; /* round it to full-pel resolution */ + y_pos = y_pos >> 3; + + ref += y_pos * picpitch + x_pos; + + index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7); + + (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight); + return ; +} + + +/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */ +void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + int32 r0, r1, r2, r3, result0, result1; + uint8 temp[288]; + uint8 *ref, *out; + int i, j; + int dx_8 = 8 - dx; + int dy_8 = 8 - dy; + + /* horizontal first */ + out = temp; + for (i = 0; i < blkheight + 1; i++) + { + ref = pRef; + r0 = ref[0]; + for (j = 0; j < blkwidth; j += 4) + { + r0 |= (ref[2] << 16); + result0 = dx_8 * r0; + + r1 = ref[1] | (ref[3] << 16); + result0 += dx * r1; + *(int32 *)out = result0; + + result0 = dx_8 * r1; + + r2 = ref[4]; + r0 = r0 >> 16; + r1 = r0 | (r2 << 16); + result0 += dx * r1; + *(int32 *)(out + 16) = result0; + + ref += 4; + out += 4; + r0 = r2; + } + pRef += srcPitch; + out += (32 - blkwidth); + } + +// pRef -= srcPitch*(blkheight+1); + ref = temp; + + for (j = 0; j < blkwidth; j += 4) + { + r0 = *(int32 *)ref; + r1 = *(int32 *)(ref + 16); + ref += 32; + out = pOut; + for (i = 0; i < (blkheight >> 1); i++) + { + result0 = dy_8 * r0 + 0x00200020; + r2 = *(int32 *)ref; + result0 += dy * r2; + result0 >>= 6; + result0 &= 0x00FF00FF; + r0 = r2; + + result1 = dy_8 * r1 + 0x00200020; + r3 = *(int32 *)(ref + 16); + result1 += dy * r3; + result1 >>= 6; + result1 &= 0x00FF00FF; + r1 = r3; + *(int32 *)out = result0 | (result1 << 8); + out += predPitch; + ref += 32; + + result0 = dy_8 * r0 + 0x00200020; + r2 = *(int32 *)ref; + result0 += dy * r2; + result0 >>= 6; + result0 &= 0x00FF00FF; + r0 = r2; + + result1 = dy_8 * r1 + 0x00200020; + r3 = *(int32 *)(ref + 16); + result1 += dy * r3; + result1 >>= 6; + result1 &= 0x00FF00FF; + r1 = r3; + *(int32 *)out = result0 | (result1 << 8); + out += predPitch; + ref += 32; + } + pOut += 4; + ref = temp + 4; /* since it can only iterate twice max */ + } + return; +} + +void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(dy); + + int32 r0, r1, r2, result0, result1; + uint8 *ref, *out; + int i, j; + int dx_8 = 8 - dx; + + /* horizontal first */ + for (i = 0; i < blkheight; i++) + { + ref = pRef; + out = pOut; + + r0 = ref[0]; + for (j = 0; j < blkwidth; j += 4) + { + r0 |= (ref[2] << 16); + result0 = dx_8 * r0 + 0x00040004; + + r1 = ref[1] | (ref[3] << 16); + result0 += dx * r1; + result0 >>= 3; + result0 &= 0x00FF00FF; + + result1 = dx_8 * r1 + 0x00040004; + + r2 = ref[4]; + r0 = r0 >> 16; + r1 = r0 | (r2 << 16); + result1 += dx * r1; + result1 >>= 3; + result1 &= 0x00FF00FF; + + *(int32 *)out = result0 | (result1 << 8); + + ref += 4; + out += 4; + r0 = r2; + } + + pRef += srcPitch; + pOut += predPitch; + } + return; +} + +void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(dx); + + int32 r0, r1, r2, r3, result0, result1; + int i, j; + uint8 *ref, *out; + int dy_8 = 8 - dy; + /* vertical first */ + for (i = 0; i < blkwidth; i += 4) + { + ref = pRef; + out = pOut; + + r0 = ref[0] | (ref[2] << 16); + r1 = ref[1] | (ref[3] << 16); + ref += srcPitch; + for (j = 0; j < blkheight; j++) + { + result0 = dy_8 * r0 + 0x00040004; + r2 = ref[0] | (ref[2] << 16); + result0 += dy * r2; + result0 >>= 3; + result0 &= 0x00FF00FF; + r0 = r2; + + result1 = dy_8 * r1 + 0x00040004; + r3 = ref[1] | (ref[3] << 16); + result1 += dy * r3; + result1 >>= 3; + result1 &= 0x00FF00FF; + r1 = r3; + *(int32 *)out = result0 | (result1 << 8); + ref += srcPitch; + out += predPitch; + } + pOut += 4; + pRef += 4; + } + return; +} + +void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(blkwidth); + + int32 r0, r1, temp0, temp1, result; + int32 temp[9]; + int32 *out; + int i, r_temp; + int dy_8 = 8 - dy; + + /* horizontal first */ + out = temp; + for (i = 0; i < blkheight + 1; i++) + { + r_temp = pRef[1]; + temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]); + temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp); + r0 = temp0 | (temp1 << 16); + *out++ = r0; + pRef += srcPitch; + } + + pRef -= srcPitch * (blkheight + 1); + + out = temp; + + r0 = *out++; + + for (i = 0; i < blkheight; i++) + { + result = dy_8 * r0 + 0x00200020; + r1 = *out++; + result += dy * r1; + result >>= 6; + result &= 0x00FF00FF; + *(int16 *)pOut = (result >> 8) | (result & 0xFF); + r0 = r1; + pOut += predPitch; + } + return; +} + +void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(dy); + (void)(blkwidth); + + int i, temp, temp0, temp1; + + /* horizontal first */ + for (i = 0; i < blkheight; i++) + { + temp = pRef[1]; + temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3; + temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3; + + *(int16 *)pOut = temp0 | (temp1 << 8); + pRef += srcPitch; + pOut += predPitch; + + } + return; +} +void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(dx); + (void)(blkwidth); + + int32 r0, r1, result; + int i; + int dy_8 = 8 - dy; + r0 = pRef[0] | (pRef[1] << 16); + pRef += srcPitch; + for (i = 0; i < blkheight; i++) + { + result = dy_8 * r0 + 0x00040004; + r1 = pRef[0] | (pRef[1] << 16); + result += dy * r1; + result >>= 3; + result &= 0x00FF00FF; + *(int16 *)pOut = (result >> 8) | (result & 0xFF); + r0 = r1; + pRef += srcPitch; + pOut += predPitch; + } + return; +} + +void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, + uint8 *pOut, int predPitch, int blkwidth, int blkheight) +{ + (void)(dx); + (void)(dy); + + int i, j; + int offset_in = srcPitch - blkwidth; + int offset_out = predPitch - blkwidth; + uint16 temp; + uint8 byte; + + if (((uint32)pRef)&1) + { + for (j = blkheight; j > 0; j--) + { + for (i = blkwidth; i > 0; i -= 2) + { + temp = *pRef++; + byte = *pRef++; + temp |= (byte << 8); + *((uint16*)pOut) = temp; /* write 2 bytes */ + pOut += 2; + } + pOut += offset_out; + pRef += offset_in; + } + } + else + { + for (j = blkheight; j > 0; j--) + { + for (i = blkwidth; i > 0; i -= 2) + { + temp = *((uint16*)pRef); + *((uint16*)pOut) = temp; + pRef += 2; + pOut += 2; + } + pOut += offset_out; + pRef += offset_in; + } + } + return ; +} diff --git a/media/libstagefright/codecs/avc/enc/src/motion_est.cpp b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp new file mode 100644 index 0000000..f650ef9 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp @@ -0,0 +1,1774 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +#define MIN_GOP 1 /* minimum size of GOP, 1/23/01, need to be tested */ + +#define DEFAULT_REF_IDX 0 /* always from the first frame in the reflist */ + +#define ALL_CAND_EQUAL 10 /* any number greater than 5 will work */ + + +/* from TMN 3.2 */ +#define PREF_NULL_VEC 129 /* zero vector bias */ +#define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/ +#define PREF_INTRA 3024//512 /* bias for INTRA coding */ + +const static int tab_exclude[9][9] = // [last_loc][curr_loc] +{ + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 1, 1, 0, 0}, + {0, 0, 0, 0, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 1, 1, 1}, + {0, 1, 1, 0, 0, 0, 1, 1, 1}, + {0, 1, 1, 0, 0, 0, 0, 0, 1}, + {0, 1, 1, 1, 1, 0, 0, 0, 1}, + {0, 0, 1, 1, 1, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 1, 0, 0} +}; //to decide whether to continue or compute + +const static int refine_next[8][2] = /* [curr_k][increment] */ +{ + {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2} +}; + +#ifdef _SAD_STAT +uint32 num_MB = 0; +uint32 num_cand = 0; +#endif + +/************************************************************************/ +#define TH_INTER_2 100 /* temporary for now */ + +//#define FIXED_INTERPRED_MODE AVC_P16 +#define FIXED_REF_IDX 0 +#define FIXED_MVX 0 +#define FIXED_MVY 0 + +// only use when AVC_P8 or AVC_P8ref0 +#define FIXED_SUBMB_MODE AVC_4x4 +/*************************************************************************/ + +/* Initialize arrays necessary for motion search */ +AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCRateControl *rateCtrl = encvid->rateCtrl; + int search_range = rateCtrl->mvRange; + int number_of_subpel_positions = 4 * (2 * search_range + 3); + int max_mv_bits, max_mvd; + int temp_bits = 0; + uint8 *mvbits; + int bits, imax, imin, i; + uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions + + + while (number_of_subpel_positions > 0) + { + temp_bits++; + number_of_subpel_positions >>= 1; + } + + max_mv_bits = 3 + 2 * temp_bits; + max_mvd = (1 << (max_mv_bits >> 1)) - 1; + + encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, + sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR); + + if (encvid->mvbits_array == NULL) + { + return AVCENC_MEMORY_FAIL; + } + + mvbits = encvid->mvbits = encvid->mvbits_array + max_mvd; + + mvbits[0] = 1; + for (bits = 3; bits <= max_mv_bits; bits += 2) + { + imax = 1 << (bits >> 1); + imin = imax >> 1; + + for (i = imin; i < imax; i++) mvbits[-i] = mvbits[i] = bits; + } + + /* initialize half-pel search */ + encvid->hpel_cand[0] = subpel_pred + REF_CENTER; + encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ; + encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; + encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; + encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; + encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; + + /* For quarter-pel interpolation around best half-pel result */ + + encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->bilin_base[0][3] = subpel_pred + REF_CENTER; + + + encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24; + encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; + + encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24; + encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; + + encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; + encvid->bilin_base[3][2] = subpel_pred + REF_CENTER; + encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; + + encvid->bilin_base[4][0] = subpel_pred + REF_CENTER; + encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; + encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; + encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; + + encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->bilin_base[5][1] = subpel_pred + REF_CENTER; + encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; + + encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1; + encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24; + encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + + encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1; + encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; + + encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25; + encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; + encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; + + + return AVCENC_SUCCESS; +} + +/* Clean-up memory */ +void CleanMotionSearchModule(AVCHandle *avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + + if (encvid->mvbits_array) + { + avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array)); + encvid->mvbits = NULL; + } + + return ; +} + + +bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave) +{ + int j; + uint8 *out; + int temp, SBE; + OsclFloat ABE; + bool intra = true; + + SBE = 0; + /* top neighbor */ + out = cur - pitch; + for (j = 0; j < 16; j++) + { + temp = out[j] - cur[j]; + SBE += ((temp >= 0) ? temp : -temp); + } + + /* left neighbor */ + out = cur - 1; + out -= pitch; + cur -= pitch; + for (j = 0; j < 16; j++) + { + temp = *(out += pitch) - *(cur += pitch); + SBE += ((temp >= 0) ? temp : -temp); + } + + /* compare mincost/384 and SBE/64 */ + ABE = SBE / 32.0; //ABE = SBE/64.0; // + if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) // + { + intra = false; // no possibility of intra, just use inter + } + else + { + if (ave == true) + { + *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost + } + else + { + *min_cost = (int)(SBE * 8); + } + } + + return intra; +} + +/******* main function for macroblock prediction for the entire frame ***/ +/* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */ +void AVCMotionEstimation(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + int slice_type = video->slice_type; + AVCFrameIO *currInput = encvid->currInput; + AVCPictureData *refPic = video->RefPicList0[0]; + int i, j, k; + int mbwidth = video->PicWidthInMbs; + int mbheight = video->PicHeightInMbs; + int totalMB = video->PicSizeInMbs; + int pitch = currInput->pitch; + AVCMacroblock *currMB, *mblock = video->mblock; + AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16; + // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc; + AVCRateControl *rateCtrl = encvid->rateCtrl; + uint8 *intraSearch = encvid->intraSearch; + uint FS_en = encvid->fullsearch_enable; + + int NumIntraSearch, start_i, numLoop, incr_i; + int mbnum, offset; + uint8 *cur, *best_cand[5]; + int totalSAD = 0; /* average SAD for rate control */ + int type_pred; + int abe_cost; + +#ifdef HTFM + /***** HYPOTHESIS TESTING ********/ /* 2/28/01 */ + int collect = 0; + HTFM_Stat htfm_stat; + double newvar[16]; + double exp_lamda[15]; + /*********************************/ +#endif + int hp_guess = 0; + uint32 mv_uint32; + + offset = 0; + + if (slice_type == AVC_I_SLICE) + { + /* cannot do I16 prediction here because it needs full decoding. */ + for (i = 0; i < totalMB; i++) + { + encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */ + } + + memset(intraSearch, 1, sizeof(uint8)*totalMB); + + encvid->firstIntraRefreshMBIndx = 0; /* reset this */ + + return ; + } + else // P_SLICE + { + for (i = 0; i < totalMB; i++) + { + mblock[i].mb_intra = 0; + } + memset(intraSearch, 1, sizeof(uint8)*totalMB); + } + + if (refPic->padded == 0) + { + AVCPaddingEdge(refPic); + refPic->padded = 1; + } + /* Random INTRA update */ + if (rateCtrl->intraMBRate) + { + AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate); + } + + encvid->sad_extra_info = NULL; +#ifdef HTFM + /***** HYPOTHESIS TESTING ********/ + InitHTFM(video, &htfm_stat, newvar, &collect); + /*********************************/ +#endif + + if ((rateCtrl->scdEnable == 1) + && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP))) + /* do not try to detect a new scene if low frame rate and too close to previous I-frame */ + { + incr_i = 2; + numLoop = 2; + start_i = 1; + type_pred = 0; /* for initial candidate selection */ + } + else + { + incr_i = 1; + numLoop = 1; + start_i = 0; + type_pred = 2; + } + + /* First pass, loop thru half the macroblock */ + /* determine scene change */ + /* Second pass, for the rest of macroblocks */ + NumIntraSearch = 0; // to be intra searched in the encoding loop. + while (numLoop--) + { + for (j = 0; j < mbheight; j++) + { + if (incr_i > 1) + start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */ + + offset = pitch * (j << 4) + (start_i << 4); + + mbnum = j * mbwidth + start_i; + + for (i = start_i; i < mbwidth; i += incr_i) + { + video->mbNum = mbnum; + video->currMB = currMB = mblock + mbnum; + mot_mb_16x16 = mot16x16 + mbnum; + + cur = currInput->YCbCr[0] + offset; + + if (currMB->mb_intra == 0) /* for INTER mode */ + { +#if defined(HTFM) + HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch); +#else + AVCPrepareCurMB(encvid, cur, pitch); +#endif + /************************************************************/ + /******** full-pel 1MV search **********************/ + + AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred, + FS_en, &hp_guess); + + abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad; + + /* set mbMode and MVs */ + currMB->mbMode = AVC_P16; + currMB->MBPartPredMode[0][0] = AVC_Pred_L0; + mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff); + for (k = 0; k < 32; k += 2) + { + currMB->mvL0[k>>1] = mv_uint32; + } + + /* make a decision whether it should be tested for intra or not */ + if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0) + { + if (false == IntraDecisionABE(&abe_cost, cur, pitch, true)) + { + intraSearch[mbnum] = 0; + } + else + { + NumIntraSearch++; + rateCtrl->MADofMB[mbnum] = abe_cost; + } + } + else // boundary MBs, always do intra search + { + NumIntraSearch++; + } + + totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad; + } + else /* INTRA update, use for prediction */ + { + mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0; + + /* reset all other MVs to zero */ + /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */ + abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF; /* max value for int */ + + if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0) + { + IntraDecisionABE(&abe_cost, cur, pitch, false); + + rateCtrl->MADofMB[mbnum] = abe_cost; + totalSAD += abe_cost; + } + + NumIntraSearch++ ; + /* cannot do I16 prediction here because it needs full decoding. */ + // intraSearch[mbnum] = 1; + + } + + mbnum += incr_i; + offset += (incr_i << 4); + + } /* for i */ + } /* for j */ + + /* since we cannot do intra/inter decision here, the SCD has to be + based on other criteria such as motion vectors coherency or the SAD */ + if (incr_i > 1 && numLoop) /* scene change on and first loop */ + { + //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */ + if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */ + /* need to do more investigation about this threshold since the NumIntraSearch + only show potential intra MBs, not the actual one */ + { + /* we can choose to just encode I_SLICE without IDR */ + //video->nal_unit_type = AVC_NALTYPE_IDR; + video->nal_unit_type = AVC_NALTYPE_SLICE; + video->sliceHdr->slice_type = AVC_I_ALL_SLICE; + video->slice_type = AVC_I_SLICE; + memset(intraSearch, 1, sizeof(uint8)*totalMB); + i = totalMB; + while (i--) + { + mblock[i].mb_intra = 1; + encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */ + } + + rateCtrl->totalSAD = totalSAD * 2; /* SAD */ + + return ; + } + } + /******** no scene change, continue motion search **********************/ + start_i = 0; + type_pred++; /* second pass */ + } + + rateCtrl->totalSAD = totalSAD; /* SAD */ + +#ifdef HTFM + /***** HYPOTHESIS TESTING ********/ + if (collect) + { + collect = 0; + UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat); + } + /*********************************/ +#endif + + return ; +} + +/*===================================================================== + Function: PaddingEdge + Date: 09/16/2000 + Purpose: Pad edge of a Vop +=====================================================================*/ + +void AVCPaddingEdge(AVCPictureData *refPic) +{ + uint8 *src, *dst; + int i; + int pitch, width, height; + uint32 temp1, temp2; + + width = refPic->width; + height = refPic->height; + pitch = refPic->pitch; + + /* pad top */ + src = refPic->Sl; + + temp1 = *src; /* top-left corner */ + temp2 = src[width-1]; /* top-right corner */ + temp1 |= (temp1 << 8); + temp1 |= (temp1 << 16); + temp2 |= (temp2 << 8); + temp2 |= (temp2 << 16); + + dst = src - (pitch << 4); + + *((uint32*)(dst - 16)) = temp1; + *((uint32*)(dst - 12)) = temp1; + *((uint32*)(dst - 8)) = temp1; + *((uint32*)(dst - 4)) = temp1; + + memcpy(dst, src, width); + + *((uint32*)(dst += width)) = temp2; + *((uint32*)(dst + 4)) = temp2; + *((uint32*)(dst + 8)) = temp2; + *((uint32*)(dst + 12)) = temp2; + + dst = dst - width - 16; + + i = 15; + while (i--) + { + memcpy(dst + pitch, dst, pitch); + dst += pitch; + } + + /* pad sides */ + dst += (pitch + 16); + src = dst; + i = height; + while (i--) + { + temp1 = *src; + temp2 = src[width-1]; + temp1 |= (temp1 << 8); + temp1 |= (temp1 << 16); + temp2 |= (temp2 << 8); + temp2 |= (temp2 << 16); + + *((uint32*)(dst - 16)) = temp1; + *((uint32*)(dst - 12)) = temp1; + *((uint32*)(dst - 8)) = temp1; + *((uint32*)(dst - 4)) = temp1; + + *((uint32*)(dst += width)) = temp2; + *((uint32*)(dst + 4)) = temp2; + *((uint32*)(dst + 8)) = temp2; + *((uint32*)(dst + 12)) = temp2; + + src += pitch; + dst = src; + } + + /* pad bottom */ + dst -= 16; + i = 16; + while (i--) + { + memcpy(dst, dst - pitch, pitch); + dst += pitch; + } + + + return ; +} + +/*=========================================================================== + Function: AVCRasterIntraUpdate + Date: 2/26/01 + Purpose: To raster-scan assign INTRA-update . + N macroblocks are updated (also was programmable). +===========================================================================*/ +void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh) +{ + int indx, i; + + indx = encvid->firstIntraRefreshMBIndx; + for (i = 0; i < numRefresh && indx < totalMB; i++) + { + (mblock + indx)->mb_intra = 1; + encvid->intraSearch[indx++] = 1; + } + + /* if read the end of frame, reset and loop around */ + if (indx >= totalMB - 1) + { + indx = 0; + while (i < numRefresh && indx < totalMB) + { + (mblock + indx)->mb_intra = 1; + encvid->intraSearch[indx++] = 1; + i++; + } + } + + encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */ + + return ; +} + + +#ifdef HTFM +void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect) +{ + AVCCommonObj *video = encvid->common; + int i; + int lx = video->currPic->width; // padding + int lx2 = lx << 1; + int lx3 = lx2 + lx; + int rx = video->currPic->pitch; + int rx2 = rx << 1; + int rx3 = rx2 + rx; + + int *offset, *offset2; + + /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */ + if (((int)video->sliceHdr->frame_num) % 30 == 1) + { + + *collect = 1; + + htfm_stat->countbreak = 0; + htfm_stat->abs_dif_mad_avg = 0; + + for (i = 0; i < 16; i++) + { + newvar[i] = 0.0; + } +// encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect; + encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect; + encvid->functionPointer->SAD_MB_HalfPel[0] = NULL; + encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh; + encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh; + encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh; + encvid->sad_extra_info = (void*)(htfm_stat); + offset = htfm_stat->offsetArray; + offset2 = htfm_stat->offsetRef; + } + else + { +// encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM; + encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM; + encvid->functionPointer->SAD_MB_HalfPel[0] = NULL; + encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh; + encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh; + encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh; + encvid->sad_extra_info = (void*)(encvid->nrmlz_th); + offset = encvid->nrmlz_th + 16; + offset2 = encvid->nrmlz_th + 32; + } + + offset[0] = 0; + offset[1] = lx2 + 2; + offset[2] = 2; + offset[3] = lx2; + offset[4] = lx + 1; + offset[5] = lx3 + 3; + offset[6] = lx + 3; + offset[7] = lx3 + 1; + offset[8] = lx; + offset[9] = lx3 + 2; + offset[10] = lx3 ; + offset[11] = lx + 2 ; + offset[12] = 1; + offset[13] = lx2 + 3; + offset[14] = lx2 + 1; + offset[15] = 3; + + offset2[0] = 0; + offset2[1] = rx2 + 2; + offset2[2] = 2; + offset2[3] = rx2; + offset2[4] = rx + 1; + offset2[5] = rx3 + 3; + offset2[6] = rx + 3; + offset2[7] = rx3 + 1; + offset2[8] = rx; + offset2[9] = rx3 + 2; + offset2[10] = rx3 ; + offset2[11] = rx + 2 ; + offset2[12] = 1; + offset2[13] = rx2 + 3; + offset2[14] = rx2 + 1; + offset2[15] = 3; + + return ; +} + +void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat) +{ + if (htfm_stat->countbreak == 0) + htfm_stat->countbreak = 1; + + newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.); + + if (newvar[0] < 0.001) + { + newvar[0] = 0.001; /* to prevent floating overflow */ + } + exp_lamda[0] = 1 / (newvar[0] * 1.4142136); + exp_lamda[1] = exp_lamda[0] * 1.5825; + exp_lamda[2] = exp_lamda[0] * 2.1750; + exp_lamda[3] = exp_lamda[0] * 3.5065; + exp_lamda[4] = exp_lamda[0] * 3.1436; + exp_lamda[5] = exp_lamda[0] * 3.5315; + exp_lamda[6] = exp_lamda[0] * 3.7449; + exp_lamda[7] = exp_lamda[0] * 4.5854; + exp_lamda[8] = exp_lamda[0] * 4.6191; + exp_lamda[9] = exp_lamda[0] * 5.4041; + exp_lamda[10] = exp_lamda[0] * 6.5974; + exp_lamda[11] = exp_lamda[0] * 10.5341; + exp_lamda[12] = exp_lamda[0] * 10.0719; + exp_lamda[13] = exp_lamda[0] * 12.0516; + exp_lamda[14] = exp_lamda[0] * 15.4552; + + CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th); + return ; +} + + +void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[]) +{ + int i; + double temp[15]; + // printf("\nLamda: "); + + /* parametric PREMODELling */ + for (i = 0; i < 15; i++) + { + // printf("%g ",exp_lamda[i]); + if (pf < 0.5) + temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf); + else + temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf)); + } + + nrmlz_th[15] = 0; + for (i = 0; i < 15; i++) /* scale upto no.pixels */ + nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5); + + return ; +} + +void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch) +{ + AVCCommonObj *video = encvid->common; + uint32 *htfmMB = (uint32*)(encvid->currYMB); + uint8 *ptr, byte; + int *offset; + int i; + uint32 word; + + if (((int)video->sliceHdr->frame_num) % 30 == 1) + { + offset = htfm_stat->offsetArray; + } + else + { + offset = encvid->nrmlz_th + 16; + } + + for (i = 0; i < 16; i++) + { + ptr = cur + offset[i]; + word = ptr[0]; + byte = ptr[4]; + word |= (byte << 8); + byte = ptr[8]; + word |= (byte << 16); + byte = ptr[12]; + word |= (byte << 24); + *htfmMB++ = word; + + word = *(ptr += (pitch << 2)); + byte = ptr[4]; + word |= (byte << 8); + byte = ptr[8]; + word |= (byte << 16); + byte = ptr[12]; + word |= (byte << 24); + *htfmMB++ = word; + + word = *(ptr += (pitch << 2)); + byte = ptr[4]; + word |= (byte << 8); + byte = ptr[8]; + word |= (byte << 16); + byte = ptr[12]; + word |= (byte << 24); + *htfmMB++ = word; + + word = *(ptr += (pitch << 2)); + byte = ptr[4]; + word |= (byte << 8); + byte = ptr[8]; + word |= (byte << 16); + byte = ptr[12]; + word |= (byte << 24); + *htfmMB++ = word; + } + + return ; +} + + +#endif // HTFM + +void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch) +{ + void* tmp = (void*)(encvid->currYMB); + uint32 *currYMB = (uint32*) tmp; + int i; + + cur -= pitch; + + for (i = 0; i < 16; i++) + { + *currYMB++ = *((uint32*)(cur += pitch)); + *currYMB++ = *((uint32*)(cur + 4)); + *currYMB++ = *((uint32*)(cur + 8)); + *currYMB++ = *((uint32*)(cur + 12)); + } + + return ; +} + +#ifdef FIXED_INTERPRED_MODE + +/* due to the complexity of the predicted motion vector, we may not decide to skip +a macroblock here just yet. */ +/* We will find the best motion vector and the best intra prediction mode for each block. */ +/* output are + currMB->NumMbPart, currMB->MbPartWidth, currMB->MbPartHeight, + currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight, + currMB->MBPartPredMode[][] (L0 or L1 or BiPred) + currMB->RefIdx[], currMB->ref_idx_L0[], + currMB->mvL0[], currMB->mvL1[] + */ + +AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum, + int num_pass) +{ + AVCCommonObj *video = encvid->common; + int mbPartIdx, subMbPartIdx; + int16 *mv; + int i; + int SubMbPartHeight, SubMbPartWidth, NumSubMbPart; + + /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */ + + currMB->mbMode = FIXED_INTERPRED_MODE; + currMB->mb_intra = 0; + + if (currMB->mbMode == AVC_P16) + { + currMB->NumMbPart = 1; + currMB->MbPartWidth = 16; + currMB->MbPartHeight = 16; + currMB->SubMbPartHeight[0] = 16; + currMB->SubMbPartWidth[0] = 16; + currMB->NumSubMbPart[0] = 1; + } + else if (currMB->mbMode == AVC_P16x8) + { + currMB->NumMbPart = 2; + currMB->MbPartWidth = 16; + currMB->MbPartHeight = 8; + for (i = 0; i < 2; i++) + { + currMB->SubMbPartWidth[i] = 16; + currMB->SubMbPartHeight[i] = 8; + currMB->NumSubMbPart[i] = 1; + } + } + else if (currMB->mbMode == AVC_P8x16) + { + currMB->NumMbPart = 2; + currMB->MbPartWidth = 8; + currMB->MbPartHeight = 16; + for (i = 0; i < 2; i++) + { + currMB->SubMbPartWidth[i] = 8; + currMB->SubMbPartHeight[i] = 16; + currMB->NumSubMbPart[i] = 1; + } + } + else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0) + { + currMB->NumMbPart = 4; + currMB->MbPartWidth = 8; + currMB->MbPartHeight = 8; + if (FIXED_SUBMB_MODE == AVC_8x8) + { + SubMbPartHeight = 8; + SubMbPartWidth = 8; + NumSubMbPart = 1; + } + else if (FIXED_SUBMB_MODE == AVC_8x4) + { + SubMbPartHeight = 4; + SubMbPartWidth = 8; + NumSubMbPart = 2; + } + else if (FIXED_SUBMB_MODE == AVC_4x8) + { + SubMbPartHeight = 8; + SubMbPartWidth = 4; + NumSubMbPart = 2; + } + else if (FIXED_SUBMB_MODE == AVC_4x4) + { + SubMbPartHeight = 4; + SubMbPartWidth = 4; + NumSubMbPart = 4; + } + + for (i = 0; i < 4; i++) + { + currMB->subMbMode[i] = FIXED_SUBMB_MODE; + currMB->SubMbPartHeight[i] = SubMbPartHeight; + currMB->SubMbPartWidth[i] = SubMbPartWidth; + currMB->NumSubMbPart[i] = NumSubMbPart; + } + } + else /* it's probably intra mode */ + { + return AVCENC_SUCCESS; + } + + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + currMB->MBPartPredMode[mbPartIdx][0] = AVC_Pred_L0; + currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX; + currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx; + + for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++) + { + mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx); + + *mv++ = FIXED_MVX; + *mv = FIXED_MVY; + } + } + + encvid->min_cost = 0; + + return AVCENC_SUCCESS; +} + +#else /* perform the search */ + +/* This option #1 search is very similar to PV's MPEG4 motion search algorithm. + The search is done in hierarchical manner from 16x16 MB down to smaller and smaller + partition. At each level, a decision can be made to stop the search if the expected + prediction gain is not worth the computation. The decision can also be made at the finest + level for more fullsearch-like behavior with the price of heavier computation. */ +void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[], + int i0, int j0, int type_pred, int FS_en, int *hp_guess) +{ + AVCCommonObj *video = encvid->common; + AVCPictureData *currPic = video->currPic; + AVCSeqParamSet *currSPS = video->currSeqParams; + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCMacroblock *currMB = video->currMB; + uint8 *ref, *cand, *ncand; + void *extra_info = encvid->sad_extra_info; + int mbnum = video->mbNum; + int width = currPic->width; /* 6/12/01, must be multiple of 16 */ + int height = currPic->height; + AVCMV *mot16x16 = encvid->mot16x16; + int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock; + + int range = rateCtrl->mvRange; + + int lx = currPic->pitch; /* padding */ + int i, j, imin, jmin, ilow, ihigh, jlow, jhigh; + int d, dmin, dn[9]; + int k; + int mvx[5], mvy[5]; + int num_can, center_again; + int last_loc, new_loc = 0; + int step, max_step = range >> 1; + int next; + + int cmvx, cmvy; /* estimated predicted MV */ + int lev_idx; + int lambda_motion = encvid->lambda_motion; + uint8 *mvbits = encvid->mvbits; + int mvshift = 2; + int mvcost; + + int min_sad = 65535; + + ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */ + + /* have to initialize these params, necessary for interprediction part */ + currMB->NumMbPart = 1; + currMB->SubMbPartHeight[0] = 16; + currMB->SubMbPartWidth[0] = 16; + currMB->NumSubMbPart[0] = 1; + currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] = + currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX; + currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] = + currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX; + currMB->RefIdx[0] = currMB->RefIdx[1] = + currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx; + + cur = encvid->currYMB; /* use smaller memory space for current MB */ + + /* find limit of the search (adjusting search range)*/ + lev_idx = mapLev2Idx[currSPS->level_idc]; + + /* we can make this part dynamic based on previous statistics */ + ilow = i0 - range; + if (i0 - ilow > 2047) /* clip to conform with the standard */ + { + ilow = i0 - 2047; + } + if (ilow < -13) // change it from -15 to -13 because of 6-tap filter needs extra 2 lines. + { + ilow = -13; + } + + ihigh = i0 + range - 1; + if (ihigh - i0 > 2047) /* clip to conform with the standard */ + { + ihigh = i0 + 2047; + } + if (ihigh > width - 3) + { + ihigh = width - 3; // change from width-1 to width-3 for the same reason as above + } + + jlow = j0 - range; + if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */ + { + jlow = j0 - MaxVmvR[lev_idx] + 1; + } + if (jlow < -13) // same reason as above + { + jlow = -13; + } + + jhigh = j0 + range - 1; + if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */ + { + jhigh = j0 + MaxVmvR[lev_idx] - 1; + } + if (jhigh > height - 3) // same reason as above + { + jhigh = height - 3; + } + + /* find initial motion vector & predicted MV*/ + AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy); + + imin = i0; + jmin = j0; /* needed for fullsearch */ + ncand = ref + i0 + j0 * lx; + + /* for first row of MB, fullsearch can be used */ + if (FS_en) + { + *hp_guess = 0; /* no guess for fast half-pel */ + + dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy); + + ncand = ref + imin + jmin * lx; + } + else + { /* fullsearch the top row to only upto (0,3) MB */ + /* upto 30% complexity saving with the same complexity */ + if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1) + { + *hp_guess = 0; /* no guess for fast half-pel */ + dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy); + ncand = ref + imin + jmin * lx; + } + else + { + /************** initialize candidate **************************/ + + dmin = 65535; + + /* check if all are equal */ + if (num_can == ALL_CAND_EQUAL) + { + i = i0 + mvx[0]; + j = j0 + mvy[0]; + + if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) + { + cand = ref + i + j * lx; + + d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); + mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); + d += mvcost; + + if (d < dmin) + { + dmin = d; + imin = i; + jmin = j; + ncand = cand; + min_sad = d - mvcost; // for rate control + } + } + } + else + { + /************** evaluate unique candidates **********************/ + for (k = 0; k < num_can; k++) + { + i = i0 + mvx[k]; + j = j0 + mvy[k]; + + if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) + { + cand = ref + i + j * lx; + d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); + mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); + d += mvcost; + + if (d < dmin) + { + dmin = d; + imin = i; + jmin = j; + ncand = cand; + min_sad = d - mvcost; // for rate control + } + } + } + } + + /******************* local refinement ***************************/ + center_again = 0; + last_loc = new_loc = 0; + // ncand = ref + jmin*lx + imin; /* center of the search */ + step = 0; + dn[0] = dmin; + while (!center_again && step <= max_step) + { + + AVCMoveNeighborSAD(dn, last_loc); + + center_again = 1; + i = imin; + j = jmin - 1; + cand = ref + i + j * lx; + + /* starting from [0,-1] */ + /* spiral check one step at a time*/ + for (k = 2; k <= 8; k += 2) + { + if (!tab_exclude[last_loc][k]) /* exclude last step computation */ + { /* not already computed */ + if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) + { + d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); + mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); + d += mvcost; + + dn[k] = d; /* keep it for half pel use */ + + if (d < dmin) + { + ncand = cand; + dmin = d; + imin = i; + jmin = j; + center_again = 0; + new_loc = k; + min_sad = d - mvcost; // for rate control + } + } + } + if (k == 8) /* end side search*/ + { + if (!center_again) + { + k = -1; /* start diagonal search */ + cand -= lx; + j--; + } + } + else + { + next = refine_next[k][0]; + i += next; + cand += next; + next = refine_next[k][1]; + j += next; + cand += lx * next; + } + } + last_loc = new_loc; + step ++; + } + if (!center_again) + AVCMoveNeighborSAD(dn, last_loc); + + *hp_guess = AVCFindMin(dn); + + encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; + } + } + + mot16x16[mbnum].sad = dmin; + mot16x16[mbnum].x = (imin - i0) << 2; + mot16x16[mbnum].y = (jmin - j0) << 2; + best_cand[0] = ncand; + + if (rateCtrl->subPelEnable) // always enable half-pel search + { + /* find half-pel resolution motion vector */ + min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy); + + encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; + + + if (encvid->best_qpel_pos == -1) + { + ncand = encvid->hpel_cand[encvid->best_hpel_pos]; + } + else + { + ncand = encvid->qpel_cand[encvid->best_qpel_pos]; + } + } + else + { + encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; + } + + /** do motion comp here for now */ + ref = currPic->Sl + i0 + j0 * lx; + /* copy from the best result to current Picture */ + for (j = 0; j < 16; j++) + { + for (i = 0; i < 16; i++) + { + *ref++ = *ncand++; + } + ref += (lx - 16); + ncand += 8; + } + + return ; +} + +#endif + +/*=============================================================================== + Function: AVCFullSearch + Date: 09/16/2000 + Purpose: Perform full-search motion estimation over the range of search + region in a spiral-outward manner. + Input/Output: VideoEncData, current Vol, previou Vop, pointer to the left corner of + current VOP, current coord (also output), boundaries. +===============================================================================*/ +int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur, + int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh, + int cmvx, int cmvy) +{ + int range = encvid->rateCtrl->mvRange; + AVCPictureData *currPic = encvid->common->currPic; + uint8 *cand; + int i, j, k, l; + int d, dmin; + int i0 = *imin; /* current position */ + int j0 = *jmin; + int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock; + void *extra_info = encvid->sad_extra_info; + int lx = currPic->pitch; /* with padding */ + + int offset = i0 + j0 * lx; + + int lambda_motion = encvid->lambda_motion; + uint8 *mvbits = encvid->mvbits; + int mvshift = 2; + int mvcost; + int min_sad; + + cand = prev + offset; + + dmin = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info); + mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy); + min_sad = dmin; + dmin += mvcost; + + /* perform spiral search */ + for (k = 1; k <= range; k++) + { + + i = i0 - k; + j = j0 - k; + + cand = prev + i + j * lx; + + for (l = 0; l < 8*k; l++) + { + /* no need for boundary checking again */ + if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) + { + d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info); + mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); + d += mvcost; + + if (d < dmin) + { + dmin = d; + *imin = i; + *jmin = j; + min_sad = d - mvcost; + } + } + + if (l < (k << 1)) + { + i++; + cand++; + } + else if (l < (k << 2)) + { + j++; + cand += lx; + } + else if (l < ((k << 2) + (k << 1))) + { + i--; + cand--; + } + else + { + j--; + cand -= lx; + } + } + } + + encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control + + return dmin; +} + +/*=============================================================================== + Function: AVCCandidateSelection + Date: 09/16/2000 + Purpose: Fill up the list of candidate using spatio-temporal correlation + among neighboring blocks. + Input/Output: type_pred = 0: first pass, 1: second pass, or no SCD + Modified: , 09/23/01, get rid of redundant candidates before passing back. + , 09/11/07, added return for modified predicted MV, this will be + needed for both fast search and fullsearch. +===============================================================================*/ + +void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb, + AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy) +{ + AVCCommonObj *video = encvid->common; + AVCMV *mot16x16 = encvid->mot16x16; + AVCMV *pmot; + int mbnum = video->mbNum; + int mbwidth = video->PicWidthInMbs; + int mbheight = video->PicHeightInMbs; + int i, j, same, num1; + + /* this part is for predicted MV */ + int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0; + int availA = 0, availB = 0, availC = 0; + + *num_can = 0; + + if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame + { + /* Spatio-Temporal Candidate (five candidates) */ + if (type_pred == 0) /* first pass */ + { + pmot = &mot16x16[mbnum]; /* same coordinate previous frame */ + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + if (imb >= (mbwidth >> 1) && imb > 0) /*left neighbor previous frame */ + { + pmot = &mot16x16[mbnum-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + else if (imb + 1 < mbwidth) /*right neighbor previous frame */ + { + pmot = &mot16x16[mbnum+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + + if (jmb < mbheight - 1) /*bottom neighbor previous frame */ + { + pmot = &mot16x16[mbnum+mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + else if (jmb > 0) /*upper neighbor previous frame */ + { + pmot = &mot16x16[mbnum-mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + + if (imb > 0 && jmb > 0) /* upper-left neighbor current frame*/ + { + pmot = &mot16x16[mbnum-mbwidth-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor current frame*/ + { + pmot = &mot16x16[mbnum-mbwidth+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + } + else /* second pass */ + /* original ST1 algorithm */ + { + pmot = &mot16x16[mbnum]; /* same coordinate previous frame */ + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + + if (imb > 0) /*left neighbor current frame */ + { + pmot = &mot16x16[mbnum-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb > 0) /*upper neighbor current frame */ + { + pmot = &mot16x16[mbnum-mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (imb < mbwidth - 1) /*right neighbor previous frame */ + { + pmot = &mot16x16[mbnum+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb < mbheight - 1) /*bottom neighbor previous frame */ + { + pmot = &mot16x16[mbnum+mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + } + + /* get predicted MV */ + if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */ + { + availA = 1; + pmot = &mot16x16[mbnum-1]; + pmvA_x = pmot->x; + pmvA_y = pmot->y; + } + + if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */ + { + availB = 1; + pmot = &mot16x16[mbnum-mbwidth]; + pmvB_x = pmot->x; + pmvB_y = pmot->y; + + availC = 1; + + if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */ + { + pmot = &mot16x16[mbnum-mbwidth+1]; + } + else /* get MV from top-left (D) neighbor of current frame */ + { + pmot = &mot16x16[mbnum-mbwidth-1]; + } + pmvC_x = pmot->x; + pmvC_y = pmot->y; + } + + } + else /* only Spatial Candidate (four candidates)*/ + { + if (type_pred == 0) /*first pass*/ + { + if (imb > 1) /* neighbor two blocks away to the left */ + { + pmot = &mot16x16[mbnum-2]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (imb > 0 && jmb > 0) /* upper-left neighbor */ + { + pmot = &mot16x16[mbnum-mbwidth-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor */ + { + pmot = &mot16x16[mbnum-mbwidth+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + + /* get predicted MV */ + if (imb > 1) /* get MV from 2nd left (A) neighbor either of current frame */ + { + availA = 1; + pmot = &mot16x16[mbnum-2]; + pmvA_x = pmot->x; + pmvA_y = pmot->y; + } + + if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */ + { + availB = 1; + pmot = &mot16x16[mbnum-mbwidth-1]; + pmvB_x = pmot->x; + pmvB_y = pmot->y; + } + + if (jmb > 0 && imb < mbwidth - 1) + { + availC = 1; + pmot = &mot16x16[mbnum-mbwidth+1]; + pmvC_x = pmot->x; + pmvC_y = pmot->y; + } + } +//#ifdef SCENE_CHANGE_DETECTION + /* second pass (ST2 algorithm)*/ + else + { + if (type_pred == 1) /* 4/7/01 */ + { + if (imb > 0) /*left neighbor current frame */ + { + pmot = &mot16x16[mbnum-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb > 0) /*upper neighbor current frame */ + { + pmot = &mot16x16[mbnum-mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (imb < mbwidth - 1) /*right neighbor current frame */ + { + pmot = &mot16x16[mbnum+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + if (jmb < mbheight - 1) /*bottom neighbor current frame */ + { + pmot = &mot16x16[mbnum+mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + } + //#else + else /* original ST1 algorithm */ + { + if (imb > 0) /*left neighbor current frame */ + { + pmot = &mot16x16[mbnum-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + + if (jmb > 0) /*upper-left neighbor current frame */ + { + pmot = &mot16x16[mbnum-mbwidth-1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + + } + if (jmb > 0) /*upper neighbor current frame */ + { + pmot = &mot16x16[mbnum-mbwidth]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + + if (imb < mbheight - 1) /*upper-right neighbor current frame */ + { + pmot = &mot16x16[mbnum-mbwidth+1]; + mvx[(*num_can)] = (pmot->x) >> 2; + mvy[(*num_can)++] = (pmot->y) >> 2; + } + } + } + + /* get predicted MV */ + if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */ + { + availA = 1; + pmot = &mot16x16[mbnum-1]; + pmvA_x = pmot->x; + pmvA_y = pmot->y; + } + + if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */ + { + availB = 1; + pmot = &mot16x16[mbnum-mbwidth]; + pmvB_x = pmot->x; + pmvB_y = pmot->y; + + availC = 1; + + if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */ + { + pmot = &mot16x16[mbnum-mbwidth+1]; + } + else /* get MV from top-left (D) neighbor of current frame */ + { + pmot = &mot16x16[mbnum-mbwidth-1]; + } + pmvC_x = pmot->x; + pmvC_y = pmot->y; + } + } +//#endif + } + + /* 3/23/01, remove redundant candidate (possible k-mean) */ + num1 = *num_can; + *num_can = 1; + for (i = 1; i < num1; i++) + { + same = 0; + j = 0; + while (!same && j < *num_can) + { +#if (CANDIDATE_DISTANCE==0) + if (mvx[i] == mvx[j] && mvy[i] == mvy[j]) +#else + // modified k-mean, 3/24/01, shouldn't be greater than 3 + if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE) +#endif + same = 1; + j++; + } + if (!same) + { + mvx[*num_can] = mvx[i]; + mvy[*num_can] = mvy[i]; + (*num_can)++; + } + } + + if (num1 == 5 && *num_can == 1) + *num_can = ALL_CAND_EQUAL; /* all are equal */ + + /* calculate predicted MV */ + + if (availA && !(availB || availC)) + { + *cmvx = pmvA_x; + *cmvy = pmvA_y; + } + else + { + *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x); + *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y); + } + + return ; +} + + +/************************************************************* + Function: AVCMoveNeighborSAD + Date: 3/27/01 + Purpose: Move neighboring SAD around when center has shifted +*************************************************************/ + +void AVCMoveNeighborSAD(int dn[], int new_loc) +{ + int tmp[9]; + tmp[0] = dn[0]; + tmp[1] = dn[1]; + tmp[2] = dn[2]; + tmp[3] = dn[3]; + tmp[4] = dn[4]; + tmp[5] = dn[5]; + tmp[6] = dn[6]; + tmp[7] = dn[7]; + tmp[8] = dn[8]; + dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536; + + switch (new_loc) + { + case 0: + break; + case 1: + dn[4] = tmp[2]; + dn[5] = tmp[0]; + dn[6] = tmp[8]; + break; + case 2: + dn[4] = tmp[3]; + dn[5] = tmp[4]; + dn[6] = tmp[0]; + dn[7] = tmp[8]; + dn[8] = tmp[1]; + break; + case 3: + dn[6] = tmp[4]; + dn[7] = tmp[0]; + dn[8] = tmp[2]; + break; + case 4: + dn[1] = tmp[2]; + dn[2] = tmp[3]; + dn[6] = tmp[5]; + dn[7] = tmp[6]; + dn[8] = tmp[0]; + break; + case 5: + dn[1] = tmp[0]; + dn[2] = tmp[4]; + dn[8] = tmp[6]; + break; + case 6: + dn[1] = tmp[8]; + dn[2] = tmp[0]; + dn[3] = tmp[4]; + dn[4] = tmp[5]; + dn[8] = tmp[7]; + break; + case 7: + dn[2] = tmp[8]; + dn[3] = tmp[0]; + dn[4] = tmp[6]; + break; + case 8: + dn[2] = tmp[1]; + dn[3] = tmp[2]; + dn[4] = tmp[0]; + dn[5] = tmp[6]; + dn[6] = tmp[7]; + break; + } + dn[0] = tmp[new_loc]; + + return ; +} + +/* 3/28/01, find minimal of dn[9] */ + +int AVCFindMin(int dn[]) +{ + int min, i; + int dmin; + + dmin = dn[1]; + min = 1; + for (i = 2; i < 9; i++) + { + if (dn[i] < dmin) + { + dmin = dn[i]; + min = i; + } + } + + return min; +} + + + diff --git a/media/libstagefright/codecs/avc/enc/src/rate_control.cpp b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp new file mode 100644 index 0000000..15b55fb --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp @@ -0,0 +1,981 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +#include + +/* rate control variables */ +#define RC_MAX_QUANT 51 +#define RC_MIN_QUANT 0 //cap to 10 to prevent rate fluctuation + +#define MAD_MIN 1 /* handle the case of devision by zero in RC */ + + +/* local functions */ +double QP2Qstep(int QP); +int Qstep2QP(double Qstep); + +double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl); + +void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP); + +void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video, + AVCRateControl *rateCtrl, MultiPass *pMP); + +void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP); + +void AVCSaveRDSamples(MultiPass *pMP, int counter_samples); + +void updateRateControl(AVCRateControl *rateControl, int nal_type); + +int GetAvgFrameQP(AVCRateControl *rateCtrl) +{ + return rateCtrl->Qc; +} + +AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum) +{ + AVCCommonObj *video = encvid->common; + AVCSliceHeader *sliceHdr = video->sliceHdr; + uint32 modTimeRef = encvid->modTimeRef; + int32 currFrameNum ; + int frameInc; + + + /* check with the buffer fullness to make sure that we have enough bits to encode this frame */ + /* we can use a threshold to guarantee minimum picture quality */ + /**********************************/ + + /* for now, the default is to encode every frame, To Be Changed */ + if (rateCtrl->first_frame) + { + encvid->modTimeRef = modTime; + encvid->wrapModTime = 0; + encvid->prevFrameNum = 0; + encvid->prevProcFrameNum = 0; + + *frameNum = 0; + + /* set frame type to IDR-frame */ + video->nal_unit_type = AVC_NALTYPE_IDR; + sliceHdr->slice_type = AVC_I_ALL_SLICE; + video->slice_type = AVC_I_SLICE; + + return AVCENC_SUCCESS; + } + else + { + if (modTime < modTimeRef) /* modTime wrapped around */ + { + encvid->wrapModTime += ((uint32)0xFFFFFFFF - modTimeRef) + 1; + encvid->modTimeRef = modTimeRef = 0; + } + modTime += encvid->wrapModTime; /* wrapModTime is non zero after wrap-around */ + + currFrameNum = (int32)(((modTime - modTimeRef) * rateCtrl->frame_rate + 200) / 1000); /* add small roundings */ + + if (currFrameNum <= (int32)encvid->prevProcFrameNum) + { + return AVCENC_FAIL; /* this is a late frame do not encode it */ + } + + frameInc = currFrameNum - encvid->prevProcFrameNum; + + if (frameInc < rateCtrl->skip_next_frame + 1) + { + return AVCENC_FAIL; /* frame skip required to maintain the target bit rate. */ + } + + RCUpdateBuffer(video, rateCtrl, frameInc - rateCtrl->skip_next_frame); /* in case more frames dropped */ + + *frameNum = currFrameNum; + + /* This part would be similar to DetermineVopType of m4venc */ + if ((*frameNum >= (uint)rateCtrl->idrPeriod && rateCtrl->idrPeriod > 0) || (*frameNum > video->MaxFrameNum)) /* first frame or IDR*/ + { + /* set frame type to IDR-frame */ + if (rateCtrl->idrPeriod) + { + encvid->modTimeRef += (uint32)(rateCtrl->idrPeriod * 1000 / rateCtrl->frame_rate); + *frameNum -= rateCtrl->idrPeriod; + } + else + { + encvid->modTimeRef += (uint32)(video->MaxFrameNum * 1000 / rateCtrl->frame_rate); + *frameNum -= video->MaxFrameNum; + } + + video->nal_unit_type = AVC_NALTYPE_IDR; + sliceHdr->slice_type = AVC_I_ALL_SLICE; + video->slice_type = AVC_I_SLICE; + encvid->prevProcFrameNum = *frameNum; + } + else + { + video->nal_unit_type = AVC_NALTYPE_SLICE; + sliceHdr->slice_type = AVC_P_ALL_SLICE; + video->slice_type = AVC_P_SLICE; + encvid->prevProcFrameNum = currFrameNum; + } + + } + + return AVCENC_SUCCESS; +} + +void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc) +{ + int tmp; + MultiPass *pMP = rateCtrl->pMP; + + OSCL_UNUSED_ARG(video); + + if (rateCtrl->rcEnable == TRUE) + { + if (frameInc > 1) + { + tmp = rateCtrl->bitsPerFrame * (frameInc - 1); + rateCtrl->VBV_fullness -= tmp; + pMP->counter_BTsrc += 10 * (frameInc - 1); + + /* Check buffer underflow */ + if (rateCtrl->VBV_fullness < rateCtrl->low_bound) + { + rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2; + rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound; + pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10)); + } + } + } +} + + +AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCCommonObj *video = encvid->common; + AVCRateControl *rateCtrl = encvid->rateCtrl; + double L1, L2, L3, bpp; + int qp; + int i, j; + + rateCtrl->basicUnit = video->PicSizeInMbs; + + rateCtrl->MADofMB = (double*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, + video->PicSizeInMbs * sizeof(double), DEFAULT_ATTR); + + if (!rateCtrl->MADofMB) + { + goto CLEANUP_RC; + } + + if (rateCtrl->rcEnable == TRUE) + { + rateCtrl->pMP = (MultiPass*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, sizeof(MultiPass), DEFAULT_ATTR); + if (!rateCtrl->pMP) + { + goto CLEANUP_RC; + } + memset(rateCtrl->pMP, 0, sizeof(MultiPass)); + rateCtrl->pMP->encoded_frames = -1; /* forget about the very first I frame */ + + /* RDInfo **pRDSamples */ + rateCtrl->pMP->pRDSamples = (RDInfo **)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (30 * sizeof(RDInfo *)), DEFAULT_ATTR); + if (!rateCtrl->pMP->pRDSamples) + { + goto CLEANUP_RC; + } + + for (i = 0; i < 30; i++) + { + rateCtrl->pMP->pRDSamples[i] = (RDInfo *)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (32 * sizeof(RDInfo)), DEFAULT_ATTR); + if (!rateCtrl->pMP->pRDSamples[i]) + { + goto CLEANUP_RC; + } + for (j = 0; j < 32; j++) memset(&(rateCtrl->pMP->pRDSamples[i][j]), 0, sizeof(RDInfo)); + } + rateCtrl->pMP->frameRange = (int)(rateCtrl->frame_rate * 1.0); /* 1.0s time frame*/ + rateCtrl->pMP->frameRange = AVC_MAX(rateCtrl->pMP->frameRange, 5); + rateCtrl->pMP->frameRange = AVC_MIN(rateCtrl->pMP->frameRange, 30); + + rateCtrl->pMP->framePos = -1; + + + rateCtrl->bitsPerFrame = (int32)(rateCtrl->bitRate / rateCtrl->frame_rate); + + /* BX rate control */ + rateCtrl->skip_next_frame = 0; /* must be initialized */ + + rateCtrl->Bs = rateCtrl->cpbSize; + rateCtrl->TMN_W = 0; + rateCtrl->VBV_fullness = (int)(rateCtrl->Bs * 0.5); /* rateCtrl->Bs */ + rateCtrl->encoded_frames = 0; + + rateCtrl->TMN_TH = rateCtrl->bitsPerFrame; + + rateCtrl->max_BitVariance_num = (int)((OsclFloat)(rateCtrl->Bs - rateCtrl->VBV_fullness) / (rateCtrl->bitsPerFrame / 10.0)) - 5; + if (rateCtrl->max_BitVariance_num < 0) rateCtrl->max_BitVariance_num += 5; + + // Set the initial buffer fullness + /* According to the spec, the initial buffer fullness needs to be set to 1/3 */ + rateCtrl->VBV_fullness = (int)(rateCtrl->Bs / 3.0 - rateCtrl->Bs / 2.0); /* the buffer range is [-Bs/2, Bs/2] */ + rateCtrl->pMP->counter_BTsrc = (int)((rateCtrl->Bs / 2.0 - rateCtrl->Bs / 3.0) / (rateCtrl->bitsPerFrame / 10.0)); + rateCtrl->TMN_W = (int)(rateCtrl->VBV_fullness + rateCtrl->pMP->counter_BTsrc * (rateCtrl->bitsPerFrame / 10.0)); + + rateCtrl->low_bound = -rateCtrl->Bs / 2; + rateCtrl->VBV_fullness_offset = 0; + + /* Setting the bitrate and framerate */ + rateCtrl->pMP->bitrate = rateCtrl->bitRate; + rateCtrl->pMP->framerate = rateCtrl->frame_rate; + rateCtrl->pMP->target_bits_per_frame = rateCtrl->pMP->bitrate / rateCtrl->pMP->framerate; + + /*compute the initial QP*/ + bpp = 1.0 * rateCtrl->bitRate / (rateCtrl->frame_rate * (video->PicSizeInMbs << 8)); + if (video->PicWidthInSamplesL == 176) + { + L1 = 0.1; + L2 = 0.3; + L3 = 0.6; + } + else if (video->PicWidthInSamplesL == 352) + { + L1 = 0.2; + L2 = 0.6; + L3 = 1.2; + } + else + { + L1 = 0.6; + L2 = 1.4; + L3 = 2.4; + } + + if (rateCtrl->initQP == 0) + { + if (bpp <= L1) + qp = 35; + else if (bpp <= L2) + qp = 25; + else if (bpp <= L3) + qp = 20; + else + qp = 15; + rateCtrl->initQP = qp; + } + + rateCtrl->Qc = rateCtrl->initQP; + } + + return AVCENC_SUCCESS; + +CLEANUP_RC: + + CleanupRateControlModule(avcHandle); + return AVCENC_MEMORY_FAIL; + +} + + +void CleanupRateControlModule(AVCHandle *avcHandle) +{ + AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; + AVCRateControl *rateCtrl = encvid->rateCtrl; + int i; + + if (rateCtrl->MADofMB) + { + avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->MADofMB)); + } + + if (rateCtrl->pMP) + { + if (rateCtrl->pMP->pRDSamples) + { + for (i = 0; i < 30; i++) + { + if (rateCtrl->pMP->pRDSamples[i]) + { + avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples[i]); + } + } + avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples); + } + avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->pMP)); + } + + return ; +} + +void RCInitGOP(AVCEncObject *encvid) +{ + /* in BX RC, there's no GOP-level RC */ + + OSCL_UNUSED_ARG(encvid); + + return ; +} + + +void RCInitFrameQP(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCPicParamSet *picParam = video->currPicParams; + MultiPass *pMP = rateCtrl->pMP; + + if (rateCtrl->rcEnable == TRUE) + { + /* frame layer rate control */ + if (rateCtrl->encoded_frames == 0) + { + video->QPy = rateCtrl->Qc = rateCtrl->initQP; + } + else + { + calculateQuantizer_Multipass(encvid, video, rateCtrl, pMP); + video->QPy = rateCtrl->Qc; + } + + rateCtrl->NumberofHeaderBits = 0; + rateCtrl->NumberofTextureBits = 0; + rateCtrl->numFrameBits = 0; // reset + + /* update pMP->framePos */ + if (++pMP->framePos == pMP->frameRange) pMP->framePos = 0; + + if (rateCtrl->T == 0) + { + pMP->counter_BTdst = (int)(rateCtrl->frame_rate * 7.5 + 0.5); /* 0.75s time frame */ + pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, (int)(rateCtrl->max_BitVariance_num / 2 * 0.40)); /* 0.75s time frame may go beyond VBV buffer if we set the buffer size smaller than 0.75s */ + pMP->counter_BTdst = AVC_MAX(pMP->counter_BTdst, (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.30 / (rateCtrl->TMN_TH / 10.0) + 0.5)); /* At least 30% of VBV buffer size/2 */ + pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, 20); /* Limit the target to be smaller than 3C */ + + pMP->target_bits = rateCtrl->T = rateCtrl->TMN_TH = (int)(rateCtrl->TMN_TH * (1.0 + pMP->counter_BTdst * 0.1)); + pMP->diff_counter = pMP->counter_BTdst; + } + + /* collect the necessary data: target bits, actual bits, mad and QP */ + pMP->target_bits = rateCtrl->T; + pMP->QP = video->QPy; + + pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl); + if (pMP->mad < MAD_MIN) pMP->mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */ + + pMP->bitrate = rateCtrl->bitRate; /* calculated in RCVopQPSetting */ + pMP->framerate = rateCtrl->frame_rate; + + /* first pass encoding */ + pMP->nRe_Quantized = 0; + + } // rcEnable + else + { + video->QPy = rateCtrl->initQP; + } + +// printf(" %d ",video->QPy); + + if (video->CurrPicNum == 0 && encvid->outOfBandParamSet == FALSE) + { + picParam->pic_init_qs_minus26 = 0; + picParam->pic_init_qp_minus26 = video->QPy - 26; + } + + // need this for motion estimation + encvid->lambda_mode = QP2QUANT[AVC_MAX(0, video->QPy-SHIFT_QP)]; + encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode); + return ; +} + +/* Mad based variable bit allocation + QP calculation with a new quadratic method */ +void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video, + AVCRateControl *rateCtrl, MultiPass *pMP) +{ + int prev_actual_bits = 0, curr_target, /*pos=0,*/i, j; + OsclFloat Qstep, prev_QP = 0.625; + + OsclFloat curr_mad, prev_mad, curr_RD, prev_RD, average_mad, aver_QP; + + /* Mad based variable bit allocation */ + targetBitCalculation(encvid, video, rateCtrl, pMP); + + if (rateCtrl->T <= 0 || rateCtrl->totalSAD == 0) + { + if (rateCtrl->T < 0) rateCtrl->Qc = RC_MAX_QUANT; + return; + } + + /* ---------------------------------------------------------------------------------------------------*/ + /* current frame QP estimation */ + curr_target = rateCtrl->T; + curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; + if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */ + curr_RD = (OsclFloat)curr_target / curr_mad; + + if (rateCtrl->skip_next_frame == -1) // previous was skipped + { + i = pMP->framePos; + prev_mad = pMP->pRDSamples[i][0].mad; + prev_QP = pMP->pRDSamples[i][0].QP; + prev_actual_bits = pMP->pRDSamples[i][0].actual_bits; + } + else + { + /* Another version of search the optimal point */ + prev_mad = 0.0; + i = 0; + while (i < pMP->frameRange && prev_mad < 0.001) /* find first one with nonzero prev_mad */ + { + prev_mad = pMP->pRDSamples[i][0].mad; + i++; + } + + if (i < pMP->frameRange) + { + prev_actual_bits = pMP->pRDSamples[i-1][0].actual_bits; + + for (j = 0; i < pMP->frameRange; i++) + { + if (pMP->pRDSamples[i][0].mad != 0 && + AVC_ABS(prev_mad - curr_mad) > AVC_ABS(pMP->pRDSamples[i][0].mad - curr_mad)) + { + prev_mad = pMP->pRDSamples[i][0].mad; + prev_actual_bits = pMP->pRDSamples[i][0].actual_bits; + j = i; + } + } + prev_QP = QP2Qstep(pMP->pRDSamples[j][0].QP); + + for (i = 1; i < pMP->samplesPerFrame[j]; i++) + { + if (AVC_ABS(prev_actual_bits - curr_target) > AVC_ABS(pMP->pRDSamples[j][i].actual_bits - curr_target)) + { + prev_actual_bits = pMP->pRDSamples[j][i].actual_bits; + prev_QP = QP2Qstep(pMP->pRDSamples[j][i].QP); + } + } + } + } + + // quadratic approximation + if (prev_mad > 0.001) // only when prev_mad is greater than 0, otherwise keep using the same QP + { + prev_RD = (OsclFloat)prev_actual_bits / prev_mad; + //rateCtrl->Qc = (Int)(prev_QP * sqrt(prev_actual_bits/curr_target) + 0.4); + if (prev_QP == 0.625) // added this to allow getting out of QP = 0 easily + { + Qstep = (int)(prev_RD / curr_RD + 0.5); + } + else + { + // rateCtrl->Qc =(Int)(prev_QP * M4VENC_SQRT(prev_RD/curr_RD) + 0.9); + + if (prev_RD / curr_RD > 0.5 && prev_RD / curr_RD < 2.0) + Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + prev_RD / curr_RD) / 2.0 + 0.9); /* Quadratic and linear approximation */ + else + Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + pow(prev_RD / curr_RD, 1.0 / 3.0)) / 2.0 + 0.9); + } + // lower bound on Qc should be a function of curr_mad + // When mad is already low, lower bound on Qc doesn't have to be small. + // Note, this doesn't work well for low complexity clip encoded at high bit rate + // it doesn't hit the target bit rate due to this QP lower bound. + /// if((curr_mad < 8) && (rateCtrl->Qc < 12)) rateCtrl->Qc = 12; + // else if((curr_mad < 128) && (rateCtrl->Qc < 3)) rateCtrl->Qc = 3; + + rateCtrl->Qc = Qstep2QP(Qstep); + + if (rateCtrl->Qc < RC_MIN_QUANT) rateCtrl->Qc = RC_MIN_QUANT; + if (rateCtrl->Qc > RC_MAX_QUANT) rateCtrl->Qc = RC_MAX_QUANT; + } + + /* active bit resource protection */ + aver_QP = (pMP->encoded_frames == 0 ? 0 : pMP->sum_QP / (OsclFloat)pMP->encoded_frames); + average_mad = (pMP->encoded_frames == 0 ? 0 : pMP->sum_mad / (OsclFloat)pMP->encoded_frames); /* this function is called from the scond encoded frame*/ + if (pMP->diff_counter == 0 && + ((OsclFloat)rateCtrl->Qc <= aver_QP*1.1 || curr_mad <= average_mad*1.1) && + pMP->counter_BTsrc <= (pMP->counter_BTdst + (int)(pMP->framerate*1.0 + 0.5))) + { + rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame / 10.0); + rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W; + pMP->counter_BTsrc++; + pMP->diff_counter--; + } + +} + +void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP) +{ + OSCL_UNUSED_ARG(encvid); + OsclFloat curr_mad;//, average_mad; + int diff_counter_BTsrc, diff_counter_BTdst, prev_counter_diff, curr_counter_diff, bound; + /* BT = Bit Transfer, for pMP->counter_BTsrc, pMP->counter_BTdst */ + + /* some stuff about frame dropping remained here to be done because pMP cannot be inserted into updateRateControl()*/ + updateRC_PostProc(rateCtrl, pMP); + + /* update pMP->counter_BTsrc and pMP->counter_BTdst to avoid interger overflow */ + if (pMP->counter_BTsrc > 1000 && pMP->counter_BTdst > 1000) + { + pMP->counter_BTsrc -= 1000; + pMP->counter_BTdst -= 1000; + } + + /* ---------------------------------------------------------------------------------------------------*/ + /* target calculation */ + curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; + if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */ + diff_counter_BTsrc = diff_counter_BTdst = 0; + pMP->diff_counter = 0; + + + /*1.calculate average mad */ + pMP->sum_mad += curr_mad; + //average_mad = (pMP->encoded_frames < 1 ? curr_mad : pMP->sum_mad/(OsclFloat)(pMP->encoded_frames+1)); /* this function is called from the scond encoded frame*/ + //pMP->aver_mad = average_mad; + if (pMP->encoded_frames >= 0) /* pMP->encoded_frames is set to -1 initially, so forget about the very first I frame */ + pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames + curr_mad) / (pMP->encoded_frames + 1); + + if (pMP->overlapped_win_size > 0 && pMP->encoded_frames_prev >= 0) + pMP->aver_mad_prev = (pMP->aver_mad_prev * pMP->encoded_frames_prev + curr_mad) / (pMP->encoded_frames_prev + 1); + + /*2.average_mad, mad ==> diff_counter_BTsrc, diff_counter_BTdst */ + if (pMP->overlapped_win_size == 0) + { + /* original verison */ + if (curr_mad > pMP->aver_mad*1.1) + { + if (curr_mad / (pMP->aver_mad + 0.0001) > 2) + diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.4) - 10; + //diff_counter_BTdst = (int)((sqrt(curr_mad/pMP->aver_mad)*2+curr_mad/pMP->aver_mad)/(3*0.1) + 0.4) - 10; + else + diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad + 0.0001) * 10 + 0.4) - 10; + } + else /* curr_mad <= average_mad*1.1 */ + //diff_counter_BTsrc = 10 - (int)((sqrt(curr_mad/pMP->aver_mad) + pow(curr_mad/pMP->aver_mad, 1.0/3.0))/(2.0*0.1) + 0.4); + diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.5); + + /* actively fill in the possible gap */ + if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 && + curr_mad <= pMP->aver_mad*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst) + diff_counter_BTsrc = 1; + + } + else if (pMP->overlapped_win_size > 0) + { + /* transition time: use previous average mad "pMP->aver_mad_prev" instead of the current average mad "pMP->aver_mad" */ + if (curr_mad > pMP->aver_mad_prev*1.1) + { + if (curr_mad / pMP->aver_mad_prev > 2) + diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.4) - 10; + //diff_counter_BTdst = (int)((M4VENC_SQRT(curr_mad/pMP->aver_mad_prev)*2+curr_mad/pMP->aver_mad_prev)/(3*0.1) + 0.4) - 10; + else + diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad_prev + 0.0001) * 10 + 0.4) - 10; + } + else /* curr_mad <= average_mad*1.1 */ + //diff_counter_BTsrc = 10 - (Int)((sqrt(curr_mad/pMP->aver_mad_prev) + pow(curr_mad/pMP->aver_mad_prev, 1.0/3.0))/(2.0*0.1) + 0.4); + diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.5); + + /* actively fill in the possible gap */ + if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 && + curr_mad <= pMP->aver_mad_prev*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst) + diff_counter_BTsrc = 1; + + if (--pMP->overlapped_win_size <= 0) pMP->overlapped_win_size = 0; + } + + + /* if difference is too much, do clipping */ + /* First, set the upper bound for current bit allocation variance: 80% of available buffer */ + bound = (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.6 / (pMP->target_bits_per_frame / 10)); /* rateCtrl->Bs */ + diff_counter_BTsrc = AVC_MIN(diff_counter_BTsrc, bound); + diff_counter_BTdst = AVC_MIN(diff_counter_BTdst, bound); + + /* Second, set another upper bound for current bit allocation: 4-5*bitrate/framerate */ + bound = 50; +// if(video->encParams->RC_Type == CBR_LOWDELAY) +// not necessary bound = 10; -- For Low delay */ + + diff_counter_BTsrc = AVC_MIN(diff_counter_BTsrc, bound); + diff_counter_BTdst = AVC_MIN(diff_counter_BTdst, bound); + + + /* Third, check the buffer */ + prev_counter_diff = pMP->counter_BTdst - pMP->counter_BTsrc; + curr_counter_diff = prev_counter_diff + (diff_counter_BTdst - diff_counter_BTsrc); + + if (AVC_ABS(prev_counter_diff) >= rateCtrl->max_BitVariance_num || AVC_ABS(curr_counter_diff) >= rateCtrl->max_BitVariance_num) + { //diff_counter_BTsrc = diff_counter_BTdst = 0; + + if (curr_counter_diff > rateCtrl->max_BitVariance_num && diff_counter_BTdst) + { + diff_counter_BTdst = (rateCtrl->max_BitVariance_num - prev_counter_diff) + diff_counter_BTsrc; + if (diff_counter_BTdst < 0) diff_counter_BTdst = 0; + } + + else if (curr_counter_diff < -rateCtrl->max_BitVariance_num && diff_counter_BTsrc) + { + diff_counter_BTsrc = diff_counter_BTdst - (-rateCtrl->max_BitVariance_num - prev_counter_diff); + if (diff_counter_BTsrc < 0) diff_counter_BTsrc = 0; + } + } + + + /*3.diff_counter_BTsrc, diff_counter_BTdst ==> TMN_TH */ + rateCtrl->TMN_TH = (int)(pMP->target_bits_per_frame); + pMP->diff_counter = 0; + + if (diff_counter_BTsrc) + { + rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame * diff_counter_BTsrc * 0.1); + pMP->diff_counter = -diff_counter_BTsrc; + } + else if (diff_counter_BTdst) + { + rateCtrl->TMN_TH += (int)(pMP->target_bits_per_frame * diff_counter_BTdst * 0.1); + pMP->diff_counter = diff_counter_BTdst; + } + + + /*4.update pMP->counter_BTsrc, pMP->counter_BTdst */ + pMP->counter_BTsrc += diff_counter_BTsrc; + pMP->counter_BTdst += diff_counter_BTdst; + + + /*5.target bit calculation */ + rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W; + + return ; +} + +void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP) +{ + if (rateCtrl->skip_next_frame > 0) /* skip next frame */ + { + pMP->counter_BTsrc += 10 * rateCtrl->skip_next_frame; + + } + else if (rateCtrl->skip_next_frame == -1) /* skip current frame */ + { + pMP->counter_BTdst -= pMP->diff_counter; + pMP->counter_BTsrc += 10; + + pMP->sum_mad -= pMP->mad; + pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames - pMP->mad) / (pMP->encoded_frames - 1 + 0.0001); + pMP->sum_QP -= pMP->QP; + pMP->encoded_frames --; + } + /* some stuff in update VBV_fullness remains here */ + //if(rateCtrl->VBV_fullness < -rateCtrl->Bs/2) /* rateCtrl->Bs */ + if (rateCtrl->VBV_fullness < rateCtrl->low_bound) + { + rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2; + rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound; + pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10)); + } +} + + +void RCInitChromaQP(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + int q_bits; + + /* we have to do the same thing for AVC_CLIP3(0,51,video->QSy) */ + + video->QPy_div_6 = (currMB->QPy * 43) >> 8; + video->QPy_mod_6 = currMB->QPy - 6 * video->QPy_div_6; + currMB->QPc = video->QPc = mapQPi2QPc[AVC_CLIP3(0, 51, currMB->QPy + video->currPicParams->chroma_qp_index_offset)]; + video->QPc_div_6 = (video->QPc * 43) >> 8; + video->QPc_mod_6 = video->QPc - 6 * video->QPc_div_6; + + /* pre-calculate this to save computation */ + q_bits = 4 + video->QPy_div_6; + if (video->slice_type == AVC_I_SLICE) + { + encvid->qp_const = 682 << q_bits; // intra + } + else + { + encvid->qp_const = 342 << q_bits; // inter + } + + q_bits = 4 + video->QPc_div_6; + if (video->slice_type == AVC_I_SLICE) + { + encvid->qp_const_c = 682 << q_bits; // intra + } + else + { + encvid->qp_const_c = 342 << q_bits; // inter + } + + encvid->lambda_mode = QP2QUANT[AVC_MAX(0, currMB->QPy-SHIFT_QP)]; + encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode); + + return ; +} + + +void RCInitMBQP(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + + currMB->QPy = video->QPy; /* set to previous value or picture level */ + + RCInitChromaQP(encvid); + +} + +void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits) +{ + OSCL_UNUSED_ARG(video); + rateCtrl->numMBHeaderBits = num_header_bits; + rateCtrl->numMBTextureBits = num_texture_bits; + rateCtrl->NumberofHeaderBits += rateCtrl->numMBHeaderBits; + rateCtrl->NumberofTextureBits += rateCtrl->numMBTextureBits; +} + +void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid) +{ + currMB->QPy = video->QPy; /* use previous QP */ + RCInitChromaQP(encvid); + + return ; +} + + +void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch) +{ + AVCCommonObj *video = encvid->common; + AVCRateControl *rateCtrl = encvid->rateCtrl; + uint32 dmin_lx; + + if (rateCtrl->rcEnable == TRUE) + { + if (currMB->mb_intra) + { + if (currMB->mbMode == AVC_I16) + { + dmin_lx = (0xFFFF << 16) | orgPitch; + rateCtrl->MADofMB[video->mbNum] = AVCSAD_Macroblock_C(orgL, + encvid->pred_i16[currMB->i16Mode], dmin_lx, NULL); + } + else /* i4 */ + { + rateCtrl->MADofMB[video->mbNum] = encvid->i4_sad / 256.; + } + } + /* for INTER, we have already saved it with the MV search */ + } + + return ; +} + + + +AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + AVCRateControl *rateCtrl = encvid->rateCtrl; + AVCEnc_Status status = AVCENC_SUCCESS; + MultiPass *pMP = rateCtrl->pMP; + int diff_BTCounter; + int nal_type = video->nal_unit_type; + + /* update the complexity weight of I, P, B frame */ + + if (rateCtrl->rcEnable == TRUE) + { + pMP->actual_bits = rateCtrl->numFrameBits; + pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl); + + AVCSaveRDSamples(pMP, 0); + + pMP->encoded_frames++; + + /* for pMP->samplesPerFrame */ + pMP->samplesPerFrame[pMP->framePos] = 0; + + pMP->sum_QP += pMP->QP; + + /* update pMP->counter_BTsrc, pMP->counter_BTdst */ + /* re-allocate the target bit again and then stop encoding */ + diff_BTCounter = (int)((OsclFloat)(rateCtrl->TMN_TH - rateCtrl->TMN_W - pMP->actual_bits) / + (pMP->bitrate / (pMP->framerate + 0.0001) + 0.0001) / 0.1); + if (diff_BTCounter >= 0) + pMP->counter_BTsrc += diff_BTCounter; /* pMP->actual_bits is smaller */ + else + pMP->counter_BTdst -= diff_BTCounter; /* pMP->actual_bits is bigger */ + + rateCtrl->TMN_TH -= (int)((OsclFloat)pMP->bitrate / (pMP->framerate + 0.0001) * (diff_BTCounter * 0.1)); + rateCtrl->T = pMP->target_bits = rateCtrl->TMN_TH - rateCtrl->TMN_W; + pMP->diff_counter -= diff_BTCounter; + + rateCtrl->Rc = rateCtrl->numFrameBits; /* Total Bits for current frame */ + rateCtrl->Hc = rateCtrl->NumberofHeaderBits; /* Total Bits in Header and Motion Vector */ + + /* BX_RC */ + updateRateControl(rateCtrl, nal_type); + if (rateCtrl->skip_next_frame == -1) // skip current frame + { + status = AVCENC_SKIPPED_PICTURE; + } + } + + rateCtrl->first_frame = 0; // reset here after we encode the first frame. + + return status; +} + +void AVCSaveRDSamples(MultiPass *pMP, int counter_samples) +{ + /* for pMP->pRDSamples */ + pMP->pRDSamples[pMP->framePos][counter_samples].QP = pMP->QP; + pMP->pRDSamples[pMP->framePos][counter_samples].actual_bits = pMP->actual_bits; + pMP->pRDSamples[pMP->framePos][counter_samples].mad = pMP->mad; + pMP->pRDSamples[pMP->framePos][counter_samples].R_D = (OsclFloat)pMP->actual_bits / (pMP->mad + 0.0001); + + return ; +} + +void updateRateControl(AVCRateControl *rateCtrl, int nal_type) +{ + int frame_bits; + MultiPass *pMP = rateCtrl->pMP; + + /* BX rate contro\l */ + frame_bits = (int)(rateCtrl->bitRate / rateCtrl->frame_rate); + rateCtrl->TMN_W += (rateCtrl->Rc - rateCtrl->TMN_TH); + rateCtrl->VBV_fullness += (rateCtrl->Rc - frame_bits); //rateCtrl->Rp); + //if(rateCtrl->VBV_fullness < 0) rateCtrl->VBV_fullness = -1; + + rateCtrl->encoded_frames++; + + /* frame dropping */ + rateCtrl->skip_next_frame = 0; + + if ((rateCtrl->VBV_fullness > rateCtrl->Bs / 2) && nal_type != AVC_NALTYPE_IDR) /* skip the current frame */ /* rateCtrl->Bs */ + { + rateCtrl->TMN_W -= (rateCtrl->Rc - rateCtrl->TMN_TH); + rateCtrl->VBV_fullness -= rateCtrl->Rc; + rateCtrl->skip_next_frame = -1; + } + else if ((OsclFloat)(rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95) /* skip next frame */ + { + rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp; + rateCtrl->skip_next_frame = 1; + pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10)); + /* BX_1, skip more than 1 frames */ + //while(rateCtrl->VBV_fullness > rateCtrl->Bs*0.475) + while ((rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95) + { + rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp; + rateCtrl->skip_next_frame++; + pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10)); + } + + /* END BX_1 */ + } +} + + +double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl) +{ + double TotalMAD; + int i; + TotalMAD = 0.0; + for (i = 0; i < (int)video->PicSizeInMbs; i++) + TotalMAD += rateCtrl->MADofMB[i]; + TotalMAD /= video->PicSizeInMbs; + return TotalMAD; +} + + + + + +/* convert from QP to Qstep */ +double QP2Qstep(int QP) +{ + int i; + double Qstep; + static const double QP2QSTEP[6] = { 0.625, 0.6875, 0.8125, 0.875, 1.0, 1.125 }; + + Qstep = QP2QSTEP[QP % 6]; + for (i = 0; i < (QP / 6); i++) + Qstep *= 2; + + return Qstep; +} + +/* convert from step size to QP */ +int Qstep2QP(double Qstep) +{ + int q_per = 0, q_rem = 0; + + // assert( Qstep >= QP2Qstep(0) && Qstep <= QP2Qstep(51) ); + if (Qstep < QP2Qstep(0)) + return 0; + else if (Qstep > QP2Qstep(51)) + return 51; + + while (Qstep > QP2Qstep(5)) + { + Qstep /= 2; + q_per += 1; + } + + if (Qstep <= (0.625 + 0.6875) / 2) + { + Qstep = 0.625; + q_rem = 0; + } + else if (Qstep <= (0.6875 + 0.8125) / 2) + { + Qstep = 0.6875; + q_rem = 1; + } + else if (Qstep <= (0.8125 + 0.875) / 2) + { + Qstep = 0.8125; + q_rem = 2; + } + else if (Qstep <= (0.875 + 1.0) / 2) + { + Qstep = 0.875; + q_rem = 3; + } + else if (Qstep <= (1.0 + 1.125) / 2) + { + Qstep = 1.0; + q_rem = 4; + } + else + { + Qstep = 1.125; + q_rem = 5; + } + + return (q_per * 6 + q_rem); +} + + + diff --git a/media/libstagefright/codecs/avc/enc/src/residual.cpp b/media/libstagefright/codecs/avc/enc/src/residual.cpp new file mode 100644 index 0000000..42eb910 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/residual.cpp @@ -0,0 +1,389 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +AVCEnc_Status EncodeIntraPCM(AVCEncObject *encvid) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + AVCCommonObj *video = encvid->common; + AVCFrameIO *currInput = encvid->currInput; + AVCEncBitstream *stream = encvid->bitstream; + int x_position = (video->mb_x << 4); + int y_position = (video->mb_y << 4); + int orgPitch = currInput->pitch; + int offset1 = y_position * orgPitch + x_position; + int i, j; + int offset; + uint8 *pDst, *pSrc; + uint code; + + ue_v(stream, 25); + + i = stream->bit_left & 0x7; + if (i) /* not byte-aligned */ + { + BitstreamWriteBits(stream, 0, i); + } + + pSrc = currInput->YCbCr[0] + offset1; + pDst = video->currPic->Sl + offset1; + offset = video->PicWidthInSamplesL - 16; + + /* at this point bitstream is byte-aligned */ + j = 16; + while (j > 0) + { +#if (WORD_SIZE==32) + for (i = 0; i < 4; i++) + { + code = *((uint*)pSrc); + pSrc += 4; + *((uint*)pDst) = code; + pDst += 4; + status = BitstreamWriteBits(stream, 32, code); + } +#else + for (i = 0; i < 8; i++) + { + code = *((uint*)pSrc); + pSrc += 2; + *((uint*)pDst) = code; + pDst += 2; + status = BitstreamWriteBits(stream, 16, code); + } +#endif + pDst += offset; + pSrc += offset; + j--; + } + if (status != AVCENC_SUCCESS) /* check only once per line */ + return status; + + pDst = video->currPic->Scb + ((offset1 + x_position) >> 2); + pSrc = currInput->YCbCr[1] + ((offset1 + x_position) >> 2); + offset >>= 1; + + j = 8; + while (j > 0) + { +#if (WORD_SIZE==32) + for (i = 0; i < 2; i++) + { + code = *((uint*)pSrc); + pSrc += 4; + *((uint*)pDst) = code; + pDst += 4; + status = BitstreamWriteBits(stream, 32, code); + } +#else + for (i = 0; i < 4; i++) + { + code = *((uint*)pSrc); + pSrc += 2; + *((uint*)pDst) = code; + pDst += 2; + status = BitstreamWriteBits(stream, 16, code); + } +#endif + pDst += offset; + pSrc += offset; + j--; + } + + if (status != AVCENC_SUCCESS) /* check only once per line */ + return status; + + pDst = video->currPic->Scr + ((offset1 + x_position) >> 2); + pSrc = currInput->YCbCr[2] + ((offset1 + x_position) >> 2); + + j = 8; + while (j > 0) + { +#if (WORD_SIZE==32) + for (i = 0; i < 2; i++) + { + code = *((uint*)pSrc); + pSrc += 4; + *((uint*)pDst) = code; + pDst += 4; + status = BitstreamWriteBits(stream, 32, code); + } +#else + for (i = 0; i < 4; i++) + { + code = *((uint*)pSrc); + pSrc += 2; + *((uint*)pDst) = code; + pDst += 2; + status = BitstreamWriteBits(stream, 16, code); + } +#endif + pDst += offset; + pSrc += offset; + j--; + } + + return status; +} + + +AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int cindx, AVCMacroblock *currMB) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + AVCCommonObj *video = encvid->common; + int i, maxNumCoeff, nC; + int cdc = 0, cac = 0; + int TrailingOnes; + AVCEncBitstream *stream = encvid->bitstream; + uint trailing_ones_sign_flag; + int zerosLeft; + int *level, *run; + int TotalCoeff; + const static int incVlc[] = {0, 3, 6, 12, 24, 48, 32768}; // maximum vlc = 6 + int escape, numPrefix, sufmask, suffix, shift, sign, value, absvalue, vlcnum, level_two_or_higher; + int bindx = blkIdx2blkXY[cindx>>2][cindx&3] ; // raster scan index + + switch (type) + { + case AVC_Luma: + maxNumCoeff = 16; + level = encvid->level[cindx]; + run = encvid->run[cindx]; + TotalCoeff = currMB->nz_coeff[bindx]; + break; + case AVC_Intra16DC: + maxNumCoeff = 16; + level = encvid->leveldc; + run = encvid->rundc; + TotalCoeff = cindx; /* special case */ + bindx = 0; + cindx = 0; + break; + case AVC_Intra16AC: + maxNumCoeff = 15; + level = encvid->level[cindx]; + run = encvid->run[cindx]; + TotalCoeff = currMB->nz_coeff[bindx]; + break; + case AVC_ChromaDC: /* how to differentiate Cb from Cr */ + maxNumCoeff = 4; + cdc = 1; + if (cindx >= 8) + { + level = encvid->levelcdc + 4; + run = encvid->runcdc + 4; + TotalCoeff = cindx - 8; /* special case */ + } + else + { + level = encvid->levelcdc; + run = encvid->runcdc; + TotalCoeff = cindx; /* special case */ + } + break; + case AVC_ChromaAC: + maxNumCoeff = 15; + cac = 1; + level = encvid->level[cindx]; + run = encvid->run[cindx]; + cindx -= 16; + bindx = 16 + blkIdx2blkXY[cindx>>2][cindx&3]; + cindx += 16; + TotalCoeff = currMB->nz_coeff[bindx]; + break; + default: + return AVCENC_FAIL; + } + + + /* find TrailingOnes */ + TrailingOnes = 0; + zerosLeft = 0; + i = TotalCoeff - 1; + nC = 1; + while (i >= 0) + { + zerosLeft += run[i]; + if (nC && (level[i] == 1 || level[i] == -1)) + { + TrailingOnes++; + } + else + { + nC = 0; + } + i--; + } + if (TrailingOnes > 3) + { + TrailingOnes = 3; /* clip it */ + } + + if (!cdc) + { + if (!cac) /* not chroma */ + { + nC = predict_nnz(video, bindx & 3, bindx >> 2); + } + else /* chroma ac but not chroma dc */ + { + nC = predict_nnz_chroma(video, bindx & 3, bindx >> 2); + } + + status = ce_TotalCoeffTrailingOnes(stream, TrailingOnes, TotalCoeff, nC); + } + else + { + nC = -1; /* Chroma DC level */ + status = ce_TotalCoeffTrailingOnesChromaDC(stream, TrailingOnes, TotalCoeff); + } + + /* This part is done quite differently in ReadCoef4x4_CAVLC() */ + if (TotalCoeff > 0) + { + + i = TotalCoeff - 1; + + if (TrailingOnes) /* keep reading the sign of those trailing ones */ + { + nC = TrailingOnes; + trailing_ones_sign_flag = 0; + while (nC) + { + trailing_ones_sign_flag <<= 1; + trailing_ones_sign_flag |= ((uint32)level[i--] >> 31); /* 0 or positive, 1 for negative */ + nC--; + } + + /* instead of writing one bit at a time, read the whole thing at once */ + status = BitstreamWriteBits(stream, TrailingOnes, trailing_ones_sign_flag); + } + + level_two_or_higher = 1; + if (TotalCoeff > 3 && TrailingOnes == 3) + { + level_two_or_higher = 0; + } + + if (TotalCoeff > 10 && TrailingOnes < 3) + { + vlcnum = 1; + } + else + { + vlcnum = 0; + } + + /* then do this TotalCoeff-TrailingOnes times */ + for (i = TotalCoeff - TrailingOnes - 1; i >= 0; i--) + { + value = level[i]; + absvalue = (value >= 0) ? value : -value; + + if (level_two_or_higher) + { + if (value > 0) value--; + else value++; + level_two_or_higher = 0; + } + + if (value >= 0) + { + sign = 0; + } + else + { + sign = 1; + value = -value; + } + + if (vlcnum == 0) // VLC1 + { + if (value < 8) + { + status = BitstreamWriteBits(stream, value * 2 + sign - 1, 1); + } + else if (value < 8 + 8) + { + status = BitstreamWriteBits(stream, 14 + 1 + 4, (1 << 4) | ((value - 8) << 1) | sign); + } + else + { + status = BitstreamWriteBits(stream, 14 + 2 + 12, (1 << 12) | ((value - 16) << 1) | sign) ; + } + } + else // VLCN + { + shift = vlcnum - 1; + escape = (15 << shift) + 1; + numPrefix = (value - 1) >> shift; + sufmask = ~((0xffffffff) << shift); + suffix = (value - 1) & sufmask; + if (value < escape) + { + status = BitstreamWriteBits(stream, numPrefix + vlcnum + 1, (1 << (shift + 1)) | (suffix << 1) | sign); + } + else + { + status = BitstreamWriteBits(stream, 28, (1 << 12) | ((value - escape) << 1) | sign); + } + + } + + if (absvalue > incVlc[vlcnum]) + vlcnum++; + + if (i == TotalCoeff - TrailingOnes - 1 && absvalue > 3) + vlcnum = 2; + } + + if (status != AVCENC_SUCCESS) /* occasionally check the bitstream */ + { + return status; + } + if (TotalCoeff < maxNumCoeff) + { + if (!cdc) + { + ce_TotalZeros(stream, zerosLeft, TotalCoeff); + } + else + { + ce_TotalZerosChromaDC(stream, zerosLeft, TotalCoeff); + } + } + else + { + zerosLeft = 0; + } + + i = TotalCoeff - 1; + while (i > 0) /* don't do the last one */ + { + if (zerosLeft > 0) + { + ce_RunBefore(stream, run[i], zerosLeft); + } + + zerosLeft = zerosLeft - run[i]; + i--; + } + } + + return status; +} diff --git a/media/libstagefright/codecs/avc/enc/src/sad.cpp b/media/libstagefright/codecs/avc/enc/src/sad.cpp new file mode 100644 index 0000000..ae7acd2 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/sad.cpp @@ -0,0 +1,290 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" +#include "sad_inline.h" + +#define Cached_lx 176 + +#ifdef _SAD_STAT +uint32 num_sad_MB = 0; +uint32 num_sad_Blk = 0; +uint32 num_sad_MB_call = 0; +uint32 num_sad_Blk_call = 0; + +#define NUM_SAD_MB_CALL() num_sad_MB_call++ +#define NUM_SAD_MB() num_sad_MB++ +#define NUM_SAD_BLK_CALL() num_sad_Blk_call++ +#define NUM_SAD_BLK() num_sad_Blk++ + +#else + +#define NUM_SAD_MB_CALL() +#define NUM_SAD_MB() +#define NUM_SAD_BLK_CALL() +#define NUM_SAD_BLK() + +#endif + + +/* consist of +int AVCSAD_Macroblock_C(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info) +int AVCSAD_MB_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info) +int AVCSAD_MB_HTFM(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info) +*/ + + +/*================================================================== + Function: SAD_Macroblock + Date: 09/07/2000 + Purpose: Compute SAD 16x16 between blk and ref. + To do: Uniform subsampling will be inserted later! + Hypothesis Testing Fast Matching to be used later! + Changes: + 11/7/00: implemented MMX + 1/24/01: implemented SSE +==================================================================*/ +/********** C ************/ +int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info) +{ + (void)(extra_info); + + int32 x10; + int dmin = (uint32)dmin_lx >> 16; + int lx = dmin_lx & 0xFFFF; + + NUM_SAD_MB_CALL(); + + x10 = simd_sad_mb(ref, blk, dmin, lx); + + return x10; +} + +#ifdef HTFM /* HTFM with uniform subsampling implementation 2/28/01 */ +/*=============================================================== + Function: AVCAVCSAD_MB_HTFM_Collect and AVCSAD_MB_HTFM + Date: 3/2/1 + Purpose: Compute the SAD on a 16x16 block using + uniform subsampling and hypothesis testing fast matching + for early dropout. SAD_MB_HP_HTFM_Collect is to collect + the statistics to compute the thresholds to be used in + SAD_MB_HP_HTFM. + Input/Output: + Changes: + ===============================================================*/ + +int AVCAVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info) +{ + int i; + int sad = 0; + uint8 *p1; + int lx4 = (dmin_lx << 2) & 0x3FFFC; + uint32 cur_word; + int saddata[16], tmp, tmp2; /* used when collecting flag (global) is on */ + int difmad; + int madstar; + HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; + int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); + uint *countbreak = &(htfm_stat->countbreak); + int *offsetRef = htfm_stat->offsetRef; + + madstar = (uint32)dmin_lx >> 20; + + NUM_SAD_MB_CALL(); + + blk -= 4; + for (i = 0; i < 16; i++) + { + p1 = ref + offsetRef[i]; + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + NUM_SAD_MB(); + + saddata[i] = sad; + + if (i > 0) + { + if ((uint32)sad > ((uint32)dmin_lx >> 16)) + { + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + return sad; + } + } + } + + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + return sad; +} + +int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info) +{ + int sad = 0; + uint8 *p1; + + int i; + int tmp, tmp2; + int lx4 = (dmin_lx << 2) & 0x3FFFC; + int sadstar = 0, madstar; + int *nrmlz_th = (int*) extra_info; + int *offsetRef = (int*) extra_info + 32; + uint32 cur_word; + + madstar = (uint32)dmin_lx >> 20; + + NUM_SAD_MB_CALL(); + + blk -= 4; + for (i = 0; i < 16; i++) + { + p1 = ref + offsetRef[i]; + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = (cur_word >> 24) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[8]; + tmp2 = (cur_word >> 16) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[4]; + tmp2 = (cur_word >> 8) & 0xFF; + sad = SUB_SAD(sad, tmp, tmp2); + tmp = p1[0]; + p1 += lx4; + tmp2 = (cur_word & 0xFF); + sad = SUB_SAD(sad, tmp, tmp2); + + NUM_SAD_MB(); + + sadstar += madstar; + if (((uint32)sad <= ((uint32)dmin_lx >> 16)) && (sad <= (sadstar - *nrmlz_th++))) + ; + else + return 65536; + } + + return sad; +} +#endif /* HTFM */ + + + diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp new file mode 100644 index 0000000..faf2198 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp @@ -0,0 +1,629 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +/* contains +int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh) +int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width) +int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh) +int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width) + +int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) +int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) +int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) +int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) +*/ + +#include "avcenc_lib.h" +#include "sad_halfpel_inline.h" + +#ifdef _SAD_STAT +uint32 num_sad_HP_MB = 0; +uint32 num_sad_HP_Blk = 0; +uint32 num_sad_HP_MB_call = 0; +uint32 num_sad_HP_Blk_call = 0; +#define NUM_SAD_HP_MB_CALL() num_sad_HP_MB_call++ +#define NUM_SAD_HP_MB() num_sad_HP_MB++ +#define NUM_SAD_HP_BLK_CALL() num_sad_HP_Blk_call++ +#define NUM_SAD_HP_BLK() num_sad_HP_Blk++ +#else +#define NUM_SAD_HP_MB_CALL() +#define NUM_SAD_HP_MB() +#define NUM_SAD_HP_BLK_CALL() +#define NUM_SAD_HP_BLK() +#endif + + + +/*=============================================================== + Function: SAD_MB_HalfPel + Date: 09/17/2000 + Purpose: Compute the SAD on the half-pel resolution + Input/Output: hmem is assumed to be a pointer to the starting + point of the search in the 33x33 matrix search region + Changes: + 11/7/00: implemented MMX + ===============================================================*/ +/*================================================================== + Function: AVCSAD_MB_HalfPel_C + Date: 04/30/2001 + Purpose: Compute SAD 16x16 between blk and ref in halfpel + resolution, + Changes: + ==================================================================*/ +/* One component is half-pel */ +int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + (void)(extra_info); + + int i, j; + int sad = 0; + uint8 *kk, *p1, *p2, *p3, *p4; +// int sumref=0; + int temp; + int rx = dmin_rx & 0xFFFF; + + NUM_SAD_HP_MB_CALL(); + + p1 = ref; + p2 = ref + 1; + p3 = ref + rx; + p4 = ref + rx + 1; + kk = blk; + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + + temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++; + sad += AVC_ABS(temp); + } + + NUM_SAD_HP_MB(); + + if (sad > (int)((uint32)dmin_rx >> 16)) + return sad; + + p1 += rx; + p3 += rx; + p2 += rx; + p4 += rx; + } + return sad; +} + +int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + (void)(extra_info); + + int i, j; + int sad = 0; + uint8 *kk, *p1, *p2; +// int sumref=0; + int temp; + int rx = dmin_rx & 0xFFFF; + + NUM_SAD_HP_MB_CALL(); + + p1 = ref; + p2 = ref + rx; /* either left/right or top/bottom pixel */ + kk = blk; + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + + temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++; + sad += AVC_ABS(temp); + } + + NUM_SAD_HP_MB(); + + if (sad > (int)((uint32)dmin_rx >> 16)) + return sad; + p1 += rx; + p2 += rx; + } + return sad; +} + +int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + (void)(extra_info); + + int i, j; + int sad = 0; + uint8 *kk, *p1; + int temp; + int rx = dmin_rx & 0xFFFF; + + NUM_SAD_HP_MB_CALL(); + + p1 = ref; + kk = blk; + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + + temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++; + sad += AVC_ABS(temp); + } + + NUM_SAD_HP_MB(); + + if (sad > (int)((uint32)dmin_rx >> 16)) + return sad; + p1 += rx; + } + return sad; +} + +#ifdef HTFM /* HTFM with uniform subsampling implementation, 2/28/01 */ + +//Checheck here +int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0; + uint8 *p1, *p2; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int saddata[16]; /* used when collecting flag (global) is on */ + int difmad, tmp, tmp2; + int madstar; + HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; + int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); + UInt *countbreak = &(htfm_stat->countbreak); + int *offsetRef = htfm_stat->offsetRef; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + p2 = p1 + rx; + + j = 4;/* 4 lines */ + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12] + p2[12]; + tmp2 = p1[13] + p2[13]; + tmp += tmp2; + tmp2 = (cur_word >> 24) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8] + p2[8]; + tmp2 = p1[9] + p2[9]; + tmp += tmp2; + tmp2 = (cur_word >> 16) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4] + p2[4]; + tmp2 = p1[5] + p2[5]; + tmp += tmp2; + tmp2 = (cur_word >> 8) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp2 = p1[1] + p2[1]; + tmp = p1[0] + p2[0]; + p1 += refwx4; + p2 += refwx4; + tmp += tmp2; + tmp2 = (cur_word & 0xFF); + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + + saddata[i] = sad; + + if (i > 0) + { + if (sad > ((uint32)dmin_rx >> 16)) + { + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + return sad; + } + } + } + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + + return sad; +} + +int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0; + uint8 *p1, *p2; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int saddata[16]; /* used when collecting flag (global) is on */ + int difmad, tmp, tmp2; + int madstar; + HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; + int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); + UInt *countbreak = &(htfm_stat->countbreak); + int *offsetRef = htfm_stat->offsetRef; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + p2 = p1 + rx; + j = 4; + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = p2[12]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 24) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8]; + tmp2 = p2[8]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 16) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4]; + tmp2 = p2[4]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 8) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[0]; + p1 += refwx4; + tmp2 = p2[0]; + p2 += refwx4; + tmp++; + tmp2 += tmp; + tmp = (cur_word & 0xFF); + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + + saddata[i] = sad; + + if (i > 0) + { + if (sad > ((uint32)dmin_rx >> 16)) + { + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + return sad; + } + } + } + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + + return sad; +} + +int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0; + uint8 *p1; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int saddata[16]; /* used when collecting flag (global) is on */ + int difmad, tmp, tmp2; + int madstar; + HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; + int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); + UInt *countbreak = &(htfm_stat->countbreak); + int *offsetRef = htfm_stat->offsetRef; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + + j = 4; /* 4 lines */ + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = p1[13]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 24) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8]; + tmp2 = p1[9]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 16) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4]; + tmp2 = p1[5]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 8) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[0]; + tmp2 = p1[1]; + p1 += refwx4; + tmp++; + tmp2 += tmp; + tmp = (cur_word & 0xFF); + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + + saddata[i] = sad; + + if (i > 0) + { + if (sad > ((uint32)dmin_rx >> 16)) + { + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + return sad; + } + } + } + difmad = saddata[0] - ((saddata[1] + 1) >> 1); + (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); + (*countbreak)++; + + return sad; +} + +int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0, tmp, tmp2; + uint8 *p1, *p2; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int sadstar = 0, madstar; + int *nrmlz_th = (int*) extra_info; + int *offsetRef = nrmlz_th + 32; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + p2 = p1 + rx; + + j = 4; /* 4 lines */ + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12] + p2[12]; + tmp2 = p1[13] + p2[13]; + tmp += tmp2; + tmp2 = (cur_word >> 24) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8] + p2[8]; + tmp2 = p1[9] + p2[9]; + tmp += tmp2; + tmp2 = (cur_word >> 16) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4] + p2[4]; + tmp2 = p1[5] + p2[5]; + tmp += tmp2; + tmp2 = (cur_word >> 8) & 0xFF; + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + tmp2 = p1[1] + p2[1]; + tmp = p1[0] + p2[0]; + p1 += refwx4; + p2 += refwx4; + tmp += tmp2; + tmp2 = (cur_word & 0xFF); + tmp += 2; + sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + + sadstar += madstar; + if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) + { + return 65536; + } + } + + return sad; +} + +int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0, tmp, tmp2; + uint8 *p1, *p2; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int sadstar = 0, madstar; + int *nrmlz_th = (int*) extra_info; + int *offsetRef = nrmlz_th + 32; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + p2 = p1 + rx; + j = 4; + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = p2[12]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 24) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8]; + tmp2 = p2[8]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 16) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4]; + tmp2 = p2[4]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 8) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[0]; + p1 += refwx4; + tmp2 = p2[0]; + p2 += refwx4; + tmp++; + tmp2 += tmp; + tmp = (cur_word & 0xFF); + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + sadstar += madstar; + if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) + { + return 65536; + } + } + + return sad; +} + +int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) +{ + int i, j; + int sad = 0, tmp, tmp2; + uint8 *p1; + int rx = dmin_rx & 0xFFFF; + int refwx4 = rx << 2; + int sadstar = 0, madstar; + int *nrmlz_th = (int*) extra_info; + int *offsetRef = nrmlz_th + 32; + uint32 cur_word; + + madstar = (uint32)dmin_rx >> 20; + + NUM_SAD_HP_MB_CALL(); + + blk -= 4; + + for (i = 0; i < 16; i++) /* 16 stages */ + { + p1 = ref + offsetRef[i]; + + j = 4;/* 4 lines */ + do + { + cur_word = *((uint32*)(blk += 4)); + tmp = p1[12]; + tmp2 = p1[13]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 24) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[8]; + tmp2 = p1[9]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 16) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[4]; + tmp2 = p1[5]; + tmp++; + tmp2 += tmp; + tmp = (cur_word >> 8) & 0xFF; + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + tmp = p1[0]; + tmp2 = p1[1]; + p1 += refwx4; + tmp++; + tmp2 += tmp; + tmp = (cur_word & 0xFF); + sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; + } + while (--j); + + NUM_SAD_HP_MB(); + + sadstar += madstar; + if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) + { + return 65536; + } + } + + return sad; +} + +#endif /* HTFM */ + + + + + diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h new file mode 100644 index 0000000..3a21647 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h @@ -0,0 +1,96 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#ifndef _SAD_HALFPEL_INLINE_H_ +#define _SAD_HALFPEL_INLINE_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + + __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + tmp = (tmp2 >> 1) - tmp; + if (tmp > 0) sad += tmp; + else sad -= tmp; + + return sad; + } + + __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + tmp = (tmp >> 2) - tmp2; + if (tmp > 0) sad += tmp; + else sad -= tmp; + + return sad; + } + +#elif defined(__CC_ARM) /* only work with arm v5 */ + + __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + __asm + { + rsbs tmp, tmp, tmp2, asr #1 ; + rsbmi tmp, tmp, #0 ; + add sad, sad, tmp ; + } + + return sad; + } + + __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + __asm + { + rsbs tmp, tmp2, tmp, asr #2 ; + rsbmi tmp, tmp, #0 ; + add sad, sad, tmp ; + } + + return sad; + } + +#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + + __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { +__asm__ volatile("rsbs %1, %1, %2, asr #1\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2)); + + return sad; + } + + __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { +__asm__ volatile("rsbs %1, %2, %1, asr #2\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2)); + + return sad; + } + +#endif + +#ifdef __cplusplus +} +#endif + +#endif //_SAD_HALFPEL_INLINE_H_ + diff --git a/media/libstagefright/codecs/avc/enc/src/sad_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_inline.h new file mode 100644 index 0000000..f39794f --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/sad_inline.h @@ -0,0 +1,488 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#ifndef _SAD_INLINE_H_ +#define _SAD_INLINE_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + + __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + tmp = tmp - tmp2; + if (tmp > 0) sad += tmp; + else sad -= tmp; + + return sad; + } + + __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) + { + int32 x7; + + x7 = src2 ^ src1; /* check odd/even combination */ + if ((uint32)src2 >= (uint32)src1) + { + src1 = src2 - src1; /* subs */ + } + else + { + src1 = src1 - src2; + } + x7 = x7 ^ src1; /* only odd bytes need to add carry */ + x7 = mask & ((uint32)x7 >> 1); + x7 = (x7 << 8) - x7; + src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */ + src1 = src1 ^(x7 >> 7); /* take absolute value of negative byte */ + + return src1; + } + +#define NUMBER 3 +#define SHIFT 24 + +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 2 +#undef SHIFT +#define SHIFT 16 +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 1 +#undef SHIFT +#define SHIFT 8 +#include "sad_mb_offset.h" + + + __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) + { + int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; + + x9 = 0x80808080; /* const. */ + + x8 = (uint32)ref & 0x3; + if (x8 == 3) + goto SadMBOffset3; + if (x8 == 2) + goto SadMBOffset2; + if (x8 == 1) + goto SadMBOffset1; + +// x5 = (x4<<8)-x4; /* x5 = x4*255; */ + x4 = x5 = 0; + + x6 = 0xFFFF00FF; + + ref -= lx; + blk -= 16; + + x8 = 16; + +LOOP_SAD0: + /****** process 8 pixels ******/ + x10 = *((uint32*)(ref += lx)); + x11 = *((uint32*)(ref + 4)); + x12 = *((uint32*)(blk += 16)); + x14 = *((uint32*)(blk + 4)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****** process 8 pixels ******/ + x10 = *((uint32*)(ref + 8)); + x11 = *((uint32*)(ref + 12)); + x12 = *((uint32*)(blk + 8)); + x14 = *((uint32*)(blk + 12)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */ + { + if (--x8) + { + goto LOOP_SAD0; + } + + } + + return ((uint32)x10 >> 16); + +SadMBOffset3: + + return sad_mb_offset3(ref, blk, lx, dmin); + +SadMBOffset2: + + return sad_mb_offset2(ref, blk, lx, dmin); + +SadMBOffset1: + + return sad_mb_offset1(ref, blk, lx, dmin); + + } + +#elif defined(__CC_ARM) /* only work with arm v5 */ + + __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { + __asm + { + rsbs tmp, tmp, tmp2 ; + rsbmi tmp, tmp, #0 ; + add sad, sad, tmp ; + } + + return sad; + } + + __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) + { + int32 x7; + + __asm + { + EOR x7, src2, src1; /* check odd/even combination */ + SUBS src1, src2, src1; + EOR x7, x7, src1; + AND x7, mask, x7, lsr #1; + ORRCC x7, x7, #0x80000000; + RSB x7, x7, x7, lsl #8; + ADD src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ + EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ + } + + return src1; + } + + __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) + { + int32 x7; + + __asm + { + EOR x7, src2, src1; /* check odd/even combination */ + ADDS src1, src2, src1; + EOR x7, x7, src1; /* only odd bytes need to add carry */ + ANDS x7, mask, x7, rrx; + RSB x7, x7, x7, lsl #8; + SUB src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */ + EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */ + } + + return src1; + } + +#define sum_accumulate __asm{ SBC x5, x5, x10; /* accumulate low bytes */ \ + BIC x10, x6, x10; /* x10 & 0xFF00FF00 */ \ + ADD x4, x4, x10,lsr #8; /* accumulate high bytes */ \ + SBC x5, x5, x11; /* accumulate low bytes */ \ + BIC x11, x6, x11; /* x11 & 0xFF00FF00 */ \ + ADD x4, x4, x11,lsr #8; } /* accumulate high bytes */ + + +#define NUMBER 3 +#define SHIFT 24 +#define INC_X8 0x08000001 + +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 2 +#undef SHIFT +#define SHIFT 16 +#undef INC_X8 +#define INC_X8 0x10000001 +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 1 +#undef SHIFT +#define SHIFT 8 +#undef INC_X8 +#define INC_X8 0x08000001 +#include "sad_mb_offset.h" + + + __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) + { + int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; + + x9 = 0x80808080; /* const. */ + x4 = x5 = 0; + + __asm + { + MOVS x8, ref, lsl #31 ; + BHI SadMBOffset3; + BCS SadMBOffset2; + BMI SadMBOffset1; + + MVN x6, #0xFF00; + } +LOOP_SAD0: + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 12)); + x10 = *((int32*)(ref + 8)); + x14 = *((int32*)(blk + 12)); + x12 = *((int32*)(blk + 8)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + __asm + { + /****** process 8 pixels ******/ + LDR x11, [ref, #4]; + LDR x10, [ref], lx ; + LDR x14, [blk, #4]; + LDR x12, [blk], #16 ; + } + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + __asm + { + /****************/ + RSBS x11, dmin, x10, lsr #16; + ADDLSS x8, x8, #0x10000001; + BLS LOOP_SAD0; + } + + return ((uint32)x10 >> 16); + +SadMBOffset3: + + return sad_mb_offset3(ref, blk, lx, dmin, x8); + +SadMBOffset2: + + return sad_mb_offset2(ref, blk, lx, dmin, x8); + +SadMBOffset1: + + return sad_mb_offset1(ref, blk, lx, dmin, x8); + } + + +#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + + __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2) + { +__asm__ volatile("rsbs %1, %1, %2\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad): "r"(tmp), "r"(tmp2)); + return sad; + } + + __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask) + { + int32 x7; + +__asm__ volatile("EOR %1, %2, %0\n\tSUBS %0, %2, %0\n\tEOR %1, %1, %0\n\tAND %1, %3, %1, lsr #1\n\tORRCC %1, %1, #0x80000000\n\tRSB %1, %1, %1, lsl #8\n\tADD %0, %0, %1, asr #7\n\tEOR %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask)); + + return src1; + } + + __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask) + { + int32 x7; + +__asm__ volatile("EOR %1, %2, %0\n\tADDS %0, %2, %0\n\tEOR %1, %1, %0\n\tANDS %1, %3, %1, rrx\n\tRSB %1, %1, %1, lsl #8\n\tSUB %0, %0, %1, asr #7\n\tEOR %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask)); + + return src1; + } + +#define sum_accumulate __asm__ volatile("SBC %0, %0, %1\n\tBIC %1, %4, %1\n\tADD %2, %2, %1, lsr #8\n\tSBC %0, %0, %3\n\tBIC %3, %4, %3\n\tADD %2, %2, %3, lsr #8": "=&r" (x5), "=&r" (x10), "=&r" (x4), "=&r" (x11): "r" (x6)); + +#define NUMBER 3 +#define SHIFT 24 +#define INC_X8 0x08000001 + +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 2 +#undef SHIFT +#define SHIFT 16 +#undef INC_X8 +#define INC_X8 0x10000001 +#include "sad_mb_offset.h" + +#undef NUMBER +#define NUMBER 1 +#undef SHIFT +#define SHIFT 8 +#undef INC_X8 +#define INC_X8 0x08000001 +#include "sad_mb_offset.h" + + + __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx) + { + int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; + + x9 = 0x80808080; /* const. */ + x4 = x5 = 0; + + x8 = (uint32)ref & 0x3; + if (x8 == 3) + goto SadMBOffset3; + if (x8 == 2) + goto SadMBOffset2; + if (x8 == 1) + goto SadMBOffset1; + + x8 = 16; +/// +__asm__ volatile("MVN %0, #0xFF00": "=r"(x6)); + +LOOP_SAD0: + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 12)); + x10 = *((int32*)(ref + 8)); + x14 = *((int32*)(blk + 12)); + x12 = *((int32*)(blk + 8)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 4)); +__asm__ volatile("LDR %0, [%1], %2": "=&r"(x10), "=r"(ref): "r"(lx)); + //x10 = *((int32*)ref); ref+=lx; + x14 = *((int32*)(blk + 4)); +__asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "=r"(blk)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + /****************/ + + if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */ + { + if (--x8) + { + goto LOOP_SAD0; + } + + } + + return ((uint32)x10 >> 16); + +SadMBOffset3: + + return sad_mb_offset3(ref, blk, lx, dmin); + +SadMBOffset2: + + return sad_mb_offset2(ref, blk, lx, dmin); + +SadMBOffset1: + + return sad_mb_offset1(ref, blk, lx, dmin); + } + + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // _SAD_INLINE_H_ + diff --git a/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h new file mode 100644 index 0000000..d5d4a42 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h @@ -0,0 +1,311 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + +#if (NUMBER==3) +__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) +#elif (NUMBER==2) +__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) +#elif (NUMBER==1) +__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) +#endif +{ + int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; + + // x5 = (x4<<8) - x4; + x4 = x5 = 0; + x6 = 0xFFFF00FF; + x9 = 0x80808080; /* const. */ + ref -= NUMBER; /* bic ref, ref, #3 */ + ref -= lx; + blk -= 16; + x8 = 16; + +#if (NUMBER==3) +LOOP_SAD3: +#elif (NUMBER==2) +LOOP_SAD2: +#elif (NUMBER==1) +LOOP_SAD1: +#endif + /****** process 8 pixels ******/ + x10 = *((uint32*)(ref += lx)); /* D C B A */ + x11 = *((uint32*)(ref + 4)); /* H G F E */ + x12 = *((uint32*)(ref + 8)); /* L K J I */ + + x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */ + x10 = x10 | (x11 << (32 - SHIFT)); /* G F E D */ + x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */ + x11 = x11 | (x12 << (32 - SHIFT)); /* K J I H */ + + x12 = *((uint32*)(blk += 16)); + x14 = *((uint32*)(blk + 4)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****** process 8 pixels ******/ + x10 = *((uint32*)(ref + 8)); /* D C B A */ + x11 = *((uint32*)(ref + 12)); /* H G F E */ + x12 = *((uint32*)(ref + 16)); /* L K J I */ + + x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24 = 0xFF 0xFF 0xFF ~D */ + x10 = x10 | (x11 << (32 - SHIFT)); /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */ + x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */ + x11 = x11 | (x12 << (32 - SHIFT)); /* ~K ~J ~I ~H */ + + x12 = *((uint32*)(blk + 8)); + x14 = *((uint32*)(blk + 12)); + + /* process x11 & x14 */ + x11 = sad_4pixel(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixel(x10, x12, x9); + + x5 = x5 + x10; /* accumulate low bytes */ + x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */ + x5 = x5 + x11; /* accumulate low bytes */ + x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */ + x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */ + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */ + { + if (--x8) + { +#if (NUMBER==3) + goto LOOP_SAD3; +#elif (NUMBER==2) + goto LOOP_SAD2; +#elif (NUMBER==1) + goto LOOP_SAD1; +#endif + } + + } + + return ((uint32)x10 >> 16); +} + +#elif defined(__CC_ARM) /* only work with arm v5 */ + +#if (NUMBER==3) +__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) +#elif (NUMBER==2) +__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) +#elif (NUMBER==1) +__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8) +#endif +{ + int32 x4, x5, x6, x9, x10, x11, x12, x14; + + x9 = 0x80808080; /* const. */ + x4 = x5 = 0; + + __asm{ + MVN x6, #0xff0000; +#if (NUMBER==3) +LOOP_SAD3: +#elif (NUMBER==2) +LOOP_SAD2: +#elif (NUMBER==1) +LOOP_SAD1: +#endif + BIC ref, ref, #3; + } + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 12)); + x12 = *((int32*)(ref + 16)); + x10 = *((int32*)(ref + 8)); + x14 = *((int32*)(blk + 12)); + + __asm{ + MVN x10, x10, lsr #SHIFT; + BIC x10, x10, x11, lsl #(32-SHIFT); + MVN x11, x11, lsr #SHIFT; + BIC x11, x11, x12, lsl #(32-SHIFT); + + LDR x12, [blk, #8]; + } + + /* process x11 & x14 */ + x11 = sad_4pixelN(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixelN(x10, x12, x9); + + sum_accumulate; + + __asm{ + /****** process 8 pixels ******/ + LDR x11, [ref, #4]; + LDR x12, [ref, #8]; + LDR x10, [ref], lx ; + LDR x14, [blk, #4]; + + MVN x10, x10, lsr #SHIFT; + BIC x10, x10, x11, lsl #(32-SHIFT); + MVN x11, x11, lsr #SHIFT; + BIC x11, x11, x12, lsl #(32-SHIFT); + + LDR x12, [blk], #16; + } + + /* process x11 & x14 */ + x11 = sad_4pixelN(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixelN(x10, x12, x9); + + sum_accumulate; + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + __asm{ + RSBS x11, dmin, x10, lsr #16 + ADDLSS x8, x8, #INC_X8 +#if (NUMBER==3) + BLS LOOP_SAD3; +#elif (NUMBER==2) +BLS LOOP_SAD2; +#elif (NUMBER==1) +BLS LOOP_SAD1; +#endif + } + + return ((uint32)x10 >> 16); +} + +#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */ + +#if (NUMBER==3) +__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin) +#elif (NUMBER==2) +__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin) +#elif (NUMBER==1) +__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin) +#endif +{ + int32 x4, x5, x6, x8, x9, x10, x11, x12, x14; + + x9 = 0x80808080; /* const. */ + x4 = x5 = 0; + x8 = 16; //<<===========******* + +__asm__ volatile("MVN %0, #0xFF0000": "=r"(x6)); + +#if (NUMBER==3) +LOOP_SAD3: +#elif (NUMBER==2) +LOOP_SAD2: +#elif (NUMBER==1) +LOOP_SAD1: +#endif +__asm__ volatile("BIC %0, %0, #3": "=r"(ref)); + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 12)); + x12 = *((int32*)(ref + 16)); + x10 = *((int32*)(ref + 8)); + x14 = *((int32*)(blk + 12)); + +#if (SHIFT==8) +__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12)); +#elif (SHIFT==16) +__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12)); +#elif (SHIFT==24) +__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12)); +#endif + + x12 = *((int32*)(blk + 8)); + + /* process x11 & x14 */ + x11 = sad_4pixelN(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixelN(x10, x12, x9); + + sum_accumulate; + + /****** process 8 pixels ******/ + x11 = *((int32*)(ref + 4)); + x12 = *((int32*)(ref + 8)); + x10 = *((int32*)ref); ref += lx; + x14 = *((int32*)(blk + 4)); + +#if (SHIFT==8) +__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12)); +#elif (SHIFT==16) +__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12)); +#elif (SHIFT==24) +__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12)); +#endif +__asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "=r"(blk)); + + /* process x11 & x14 */ + x11 = sad_4pixelN(x11, x14, x9); + + /* process x12 & x10 */ + x10 = sad_4pixelN(x10, x12, x9); + + sum_accumulate; + + /****************/ + x10 = x5 - (x4 << 8); /* extract low bytes */ + x10 = x10 + x4; /* add with high bytes */ + x10 = x10 + (x10 << 16); /* add with lower half word */ + + if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */ + { + if (--x8) + { +#if (NUMBER==3) + goto LOOP_SAD3; +#elif (NUMBER==2) +goto LOOP_SAD2; +#elif (NUMBER==1) +goto LOOP_SAD1; +#endif + } + + } + + return ((uint32)x10 >> 16); +} + +#endif + diff --git a/media/libstagefright/codecs/avc/enc/src/slice.cpp b/media/libstagefright/codecs/avc/enc/src/slice.cpp new file mode 100644 index 0000000..f6d066e --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/slice.cpp @@ -0,0 +1,1025 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + + +AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + AVCCommonObj *video = encvid->common; + AVCPicParamSet *pps = video->currPicParams; + AVCSliceHeader *sliceHdr = video->sliceHdr; + AVCMacroblock *currMB ; + AVCEncBitstream *stream = encvid->bitstream; + uint slice_group_id; + int CurrMbAddr, slice_type; + + slice_type = video->slice_type; + + /* set the first mb in slice */ + video->mbNum = CurrMbAddr = sliceHdr->first_mb_in_slice;// * (1+video->MbaffFrameFlag); + slice_group_id = video->MbToSliceGroupMap[CurrMbAddr]; + + video->mb_skip_run = 0; + + /* while loop , see subclause 7.3.4 */ + while (1) + { + video->mbNum = CurrMbAddr; + currMB = video->currMB = &(video->mblock[CurrMbAddr]); + currMB->slice_id = video->slice_id; // for deblocking + + video->mb_x = CurrMbAddr % video->PicWidthInMbs; + video->mb_y = CurrMbAddr / video->PicWidthInMbs; + + /* initialize QP for this MB here*/ + /* calculate currMB->QPy */ + RCInitMBQP(encvid); + + /* check the availability of neighboring macroblocks */ + InitNeighborAvailability(video, CurrMbAddr); + + /* Assuming that InitNeighborAvailability has been called prior to this function */ + video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0; + /* this is necessary for all subsequent intra search */ + + if (!video->currPicParams->constrained_intra_pred_flag) + { + video->intraAvailA = video->mbAvailA; + video->intraAvailB = video->mbAvailB; + video->intraAvailC = video->mbAvailC; + video->intraAvailD = video->mbAvailD; + } + else + { + if (video->mbAvailA) + { + video->intraAvailA = video->mblock[video->mbAddrA].mb_intra; + } + if (video->mbAvailB) + { + video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ; + } + if (video->mbAvailC) + { + video->intraAvailC = video->mblock[video->mbAddrC].mb_intra; + } + if (video->mbAvailD) + { + video->intraAvailD = video->mblock[video->mbAddrD].mb_intra; + } + } + + /* encode_one_macroblock() */ + status = EncodeMB(encvid); + if (status != AVCENC_SUCCESS) + { + break; + } + + /* go to next MB */ + CurrMbAddr++; + + while ((uint)video->MbToSliceGroupMap[CurrMbAddr] != slice_group_id && + (uint)CurrMbAddr < video->PicSizeInMbs) + { + CurrMbAddr++; + } + + if ((uint)CurrMbAddr >= video->PicSizeInMbs) + { + /* end of slice, return, but before that check to see if there are other slices + to be encoded. */ + encvid->currSliceGroup++; + if (encvid->currSliceGroup > (int)pps->num_slice_groups_minus1) /* no more slice group */ + { + status = AVCENC_PICTURE_READY; + break; + } + else + { + /* find first_mb_num for the next slice */ + CurrMbAddr = 0; + while (video->MbToSliceGroupMap[CurrMbAddr] != encvid->currSliceGroup && + (uint)CurrMbAddr < video->PicSizeInMbs) + { + CurrMbAddr++; + } + if ((uint)CurrMbAddr >= video->PicSizeInMbs) + { + status = AVCENC_SLICE_EMPTY; /* error, one slice group has no MBs in it */ + } + + video->mbNum = CurrMbAddr; + status = AVCENC_SUCCESS; + break; + } + } + } + + if (video->mb_skip_run > 0) + { + /* write skip_run */ + if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE) + { + ue_v(stream, video->mb_skip_run); + video->mb_skip_run = 0; + } + else /* shouldn't happen */ + { + status = AVCENC_FAIL; + } + } + + return status; +} + + +AVCEnc_Status EncodeMB(AVCEncObject *encvid) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + AVCCommonObj *video = encvid->common; + AVCPictureData *currPic = video->currPic; + AVCFrameIO *currInput = encvid->currInput; + AVCMacroblock *currMB = video->currMB; + AVCMacroblock *MB_A, *MB_B; + AVCEncBitstream *stream = encvid->bitstream; + AVCRateControl *rateCtrl = encvid->rateCtrl; + uint8 *cur, *curL, *curCb, *curCr; + uint8 *orgL, *orgCb, *orgCr, *org4; + int CurrMbAddr = video->mbNum; + int picPitch = currPic->pitch; + int orgPitch = currInput->pitch; + int x_position = (video->mb_x << 4); + int y_position = (video->mb_y << 4); + int offset; + int b8, b4, blkidx; + AVCResidualType resType; + int slice_type; + int numcoeff; /* output from residual_block_cavlc */ + int cost16, cost8; + + int num_bits, start_mb_bits, start_text_bits; + + slice_type = video->slice_type; + + /* now, point to the reconstructed frame */ + offset = y_position * picPitch + x_position; + curL = currPic->Sl + offset; + orgL = currInput->YCbCr[0] + offset; + offset = (offset + x_position) >> 2; + curCb = currPic->Scb + offset; + curCr = currPic->Scr + offset; + orgCb = currInput->YCbCr[1] + offset; + orgCr = currInput->YCbCr[2] + offset; + + if (orgPitch != picPitch) + { + offset = y_position * (orgPitch - picPitch); + orgL += offset; + offset >>= 2; + orgCb += offset; + orgCr += offset; + } + + /******* determine MB prediction mode *******/ + if (encvid->intraSearch[CurrMbAddr]) + { + MBIntraSearch(encvid, CurrMbAddr, curL, picPitch); + } + /******* This part should be determined somehow ***************/ + if (currMB->mbMode == AVC_I_PCM) + { + /* write down mb_type and PCM data */ + /* and copy from currInput to currPic */ + status = EncodeIntraPCM(encvid); + + + return status; + } + + /****** for intra prediction, pred is already done *******/ + /****** for I4, the recon is ready and Xfrm coefs are ready to be encoded *****/ + + //RCCalculateMAD(encvid,currMB,orgL,orgPitch); // no need to re-calculate MAD for Intra + // not used since totalSAD is used instead + + /* compute the prediction */ + /* output is video->pred_block */ + if (!currMB->mb_intra) + { + AVCMBMotionComp(encvid, video); /* perform prediction and residue calculation */ + /* we can do the loop here and call dct_luma */ + video->pred_pitch = picPitch; + currMB->CBP = 0; + cost16 = 0; + cur = curL; + org4 = orgL; + + for (b8 = 0; b8 < 4; b8++) + { + cost8 = 0; + + for (b4 = 0; b4 < 4; b4++) + { + blkidx = blkIdx2blkXY[b8][b4]; + video->pred_block = cur; + numcoeff = dct_luma(encvid, blkidx, cur, org4, &cost8); + currMB->nz_coeff[blkidx] = numcoeff; + if (numcoeff) + { + video->cbp4x4 |= (1 << blkidx); + currMB->CBP |= (1 << b8); + } + + if (b4&1) + { + cur += ((picPitch << 2) - 4); + org4 += ((orgPitch << 2) - 4); + } + else + { + cur += 4; + org4 += 4; + } + } + + /* move the IDCT part out of dct_luma to accommodate the check + for coeff_cost. */ + + if ((currMB->CBP&(1 << b8)) && (cost8 <= _LUMA_COEFF_COST_)) + { + cost8 = 0; // reset it + + currMB->CBP ^= (1 << b8); + blkidx = blkIdx2blkXY[b8][0]; + + currMB->nz_coeff[blkidx] = 0; + currMB->nz_coeff[blkidx+1] = 0; + currMB->nz_coeff[blkidx+4] = 0; + currMB->nz_coeff[blkidx+5] = 0; + } + + cost16 += cost8; + + if (b8&1) + { + cur -= 8; + org4 -= 8; + } + else + { + cur += (8 - (picPitch << 3)); + org4 += (8 - (orgPitch << 3)); + } + } + + /* after the whole MB, we do another check for coeff_cost */ + if ((currMB->CBP&0xF) && (cost16 <= _LUMA_MB_COEFF_COST_)) + { + currMB->CBP = 0; // reset it to zero + memset(currMB->nz_coeff, 0, sizeof(uint8)*16); + } + + // now we do IDCT + MBInterIdct(video, curL, currMB, picPitch); + +// video->pred_block = video->pred + 256; + } + else /* Intra prediction */ + { + encvid->numIntraMB++; + + if (currMB->mbMode == AVC_I16) /* do prediction for the whole macroblock */ + { + currMB->CBP = 0; + /* get the prediction from encvid->pred_i16 */ + dct_luma_16x16(encvid, curL, orgL); + } + video->pred_block = encvid->pred_ic[currMB->intra_chroma_pred_mode]; + } + + /* chrominance */ + /* not need to do anything, the result is in encvid->pred_ic + chroma dct must be aware that prediction block can come from either intra or inter. */ + + dct_chroma(encvid, curCb, orgCb, 0); + + dct_chroma(encvid, curCr, orgCr, 1); + + + /* 4.1 if there's nothing in there, video->mb_skip_run++ */ + /* 4.2 if coded, check if there is a run of skipped MB, encodes it, + set video->QPyprev = currMB->QPy; */ + + /* 5. vlc encode */ + + /* check for skipped macroblock, INTER only */ + if (!currMB->mb_intra) + { + /* decide whether this MB (for inter MB) should be skipped if there's nothing left. */ + if (!currMB->CBP && currMB->NumMbPart == 1 && currMB->QPy == video->QPy) + { + if (currMB->MBPartPredMode[0][0] == AVC_Pred_L0 && currMB->ref_idx_L0[0] == 0) + { + MB_A = &video->mblock[video->mbAddrA]; + MB_B = &video->mblock[video->mbAddrB]; + + if (!video->mbAvailA || !video->mbAvailB) + { + if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/ + { + currMB->mbMode = AVC_SKIP; + video->mvd_l0[0][0][0] = 0; + video->mvd_l0[0][0][1] = 0; + } + } + else + { + if ((MB_A->ref_idx_L0[1] == 0 && MB_A->mvL0[3] == 0) || + (MB_B->ref_idx_L0[2] == 0 && MB_B->mvL0[12] == 0)) + { + if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/ + { + currMB->mbMode = AVC_SKIP; + video->mvd_l0[0][0][0] = 0; + video->mvd_l0[0][0][1] = 0; + } + } + else if (video->mvd_l0[0][0][0] == 0 && video->mvd_l0[0][0][1] == 0) + { + currMB->mbMode = AVC_SKIP; + } + } + } + + if (currMB->mbMode == AVC_SKIP) + { + video->mb_skip_run++; + + /* set parameters */ + /* not sure whether we need the followings */ + if (slice_type == AVC_P_SLICE) + { + currMB->mbMode = AVC_SKIP; + currMB->MbPartWidth = currMB->MbPartHeight = 16; + currMB->MBPartPredMode[0][0] = AVC_Pred_L0; + currMB->NumMbPart = 1; + currMB->NumSubMbPart[0] = currMB->NumSubMbPart[1] = + currMB->NumSubMbPart[2] = currMB->NumSubMbPart[3] = 1; + currMB->SubMbPartWidth[0] = currMB->SubMbPartWidth[1] = + currMB->SubMbPartWidth[2] = currMB->SubMbPartWidth[3] = currMB->MbPartWidth; + currMB->SubMbPartHeight[0] = currMB->SubMbPartHeight[1] = + currMB->SubMbPartHeight[2] = currMB->SubMbPartHeight[3] = currMB->MbPartHeight; + + } + else if (slice_type == AVC_B_SLICE) + { + currMB->mbMode = AVC_SKIP; + currMB->MbPartWidth = currMB->MbPartHeight = 8; + currMB->MBPartPredMode[0][0] = AVC_Direct; + currMB->NumMbPart = -1; + } + + /* for skipped MB, always look at the first entry in RefPicList */ + currMB->RefIdx[0] = currMB->RefIdx[1] = + currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[0]->RefIdx; + + /* do not return yet, need to do some copies */ + } + } + } + /* non-skipped MB */ + + + /************* START ENTROPY CODING *************************/ + + start_mb_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left; + + /* encode mb_type, mb_pred, sub_mb_pred, CBP */ + if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE && currMB->mbMode != AVC_SKIP) + { + //if(!pps->entropy_coding_mode_flag) ALWAYS true + { + ue_v(stream, video->mb_skip_run); + video->mb_skip_run = 0; + } + } + + if (currMB->mbMode != AVC_SKIP) + { + status = EncodeMBHeader(currMB, encvid); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + + start_text_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left; + + /**** now decoding part *******/ + resType = AVC_Luma; + + /* DC transform for luma I16 mode */ + if (currMB->mbMode == AVC_I16) + { + /* vlc encode level/run */ + status = enc_residual_block(encvid, AVC_Intra16DC, encvid->numcoefdc, currMB); + if (status != AVCENC_SUCCESS) + { + return status; + } + resType = AVC_Intra16AC; + } + + /* VLC encoding for luma */ + for (b8 = 0; b8 < 4; b8++) + { + if (currMB->CBP&(1 << b8)) + { + for (b4 = 0; b4 < 4; b4++) + { + /* vlc encode level/run */ + status = enc_residual_block(encvid, resType, (b8 << 2) + b4, currMB); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + } + } + + /* chroma */ + if (currMB->CBP & (3 << 4)) /* chroma DC residual present */ + { + for (b8 = 0; b8 < 2; b8++) /* for iCbCr */ + { + /* vlc encode level/run */ + status = enc_residual_block(encvid, AVC_ChromaDC, encvid->numcoefcdc[b8] + (b8 << 3), currMB); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + } + + if (currMB->CBP & (2 << 4)) + { + /* AC part */ + for (b8 = 0; b8 < 2; b8++) /* for iCbCr */ + { + for (b4 = 0; b4 < 4; b4++) /* for each block inside Cb or Cr */ + { + /* vlc encode level/run */ + status = enc_residual_block(encvid, AVC_ChromaAC, 16 + (b8 << 2) + b4, currMB); + if (status != AVCENC_SUCCESS) + { + return status; + } + } + } + } + + + num_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left; + + RCPostMB(video, rateCtrl, start_text_bits - start_mb_bits, + num_bits - start_text_bits); + +// num_bits -= start_mb_bits; +// fprintf(fdebug,"MB #%d: %d bits\n",CurrMbAddr,num_bits); +// fclose(fdebug); + return status; +} + +/* copy the content from predBlock back to the reconstructed YUV frame */ +void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picPitch) +{ + int j, offset; + uint32 *dst, *dst2, *src; + + dst = (uint32*)curL; + src = (uint32*)predBlock; + + offset = (picPitch - 16) >> 2; + + for (j = 0; j < 16; j++) + { + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + + dst += offset; + } + + dst = (uint32*)curCb; + dst2 = (uint32*)curCr; + offset >>= 1; + + for (j = 0; j < 8; j++) + { + *dst++ = *src++; + *dst++ = *src++; + *dst2++ = *src++; + *dst2++ = *src++; + + dst += offset; + dst2 += offset; + } + return ; +} + +/* encode mb_type, mb_pred, sub_mb_pred, CBP */ +/* decide whether this MB (for inter MB) should be skipped */ +AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *encvid) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + uint mb_type; + AVCCommonObj *video = encvid->common; + AVCEncBitstream *stream = encvid->bitstream; + + if (currMB->CBP > 47) /* chroma CBP is 11 */ + { + currMB->CBP -= 16; /* remove the 5th bit from the right */ + } + + mb_type = InterpretMBType(currMB, video->slice_type); + + status = ue_v(stream, mb_type); + + if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0) + { + status = sub_mb_pred(video, currMB, stream); + } + else + { + status = mb_pred(video, currMB, stream) ; + } + + if (currMB->mbMode != AVC_I16) + { + /* decode coded_block_pattern */ + status = EncodeCBP(currMB, stream); + } + + /* calculate currMB->mb_qp_delta = currMB->QPy - video->QPyprev */ + if (currMB->CBP > 0 || currMB->mbMode == AVC_I16) + { + status = se_v(stream, currMB->QPy - video->QPy); + video->QPy = currMB->QPy; /* = (video->QPyprev + currMB->mb_qp_delta + 52)%52; */ + // no need video->QPc = currMB->QPc; + } + else + { + if (currMB->QPy != video->QPy) // current QP is not the same as previous QP + { + /* restore these values */ + RCRestoreQP(currMB, video, encvid); + } + } + + return status; +} + + +/* inputs are mbMode, mb_intra, i16Mode, CBP, NumMbPart, MbPartWidth, MbPartHeight */ +uint InterpretMBType(AVCMacroblock *currMB, int slice_type) +{ + int CBP_chrom; + int mb_type;// part1, part2, part3; +// const static int MapParts2Type[2][3][3]={{{4,8,12},{10,6,14},{16,18,20}}, +// {{5,9,13},{11,7,15},{17,19,21}}}; + + if (currMB->mb_intra) + { + if (currMB->mbMode == AVC_I4) + { + mb_type = 0; + } + else if (currMB->mbMode == AVC_I16) + { + CBP_chrom = (currMB->CBP & 0x30); + if (currMB->CBP&0xF) + { + currMB->CBP |= 0xF; /* either 0x0 or 0xF */ + mb_type = 13; + } + else + { + mb_type = 1; + } + mb_type += (CBP_chrom >> 2) + currMB->i16Mode; + } + else /* if(currMB->mbMode == AVC_I_PCM) */ + { + mb_type = 25; + } + } + else + { /* P-MB *//* note that the order of the enum AVCMBMode cannot be changed + since we use it here. */ + mb_type = currMB->mbMode - AVC_P16; + } + + if (slice_type == AVC_P_SLICE) + { + if (currMB->mb_intra) + { + mb_type += 5; + } + } + // following codes have not been tested yet, not needed. + /* else if(slice_type == AVC_B_SLICE) + { + if(currMB->mbMode == AVC_BDirect16) + { + mb_type = 0; + } + else if(currMB->mbMode == AVC_P16) + { + mb_type = currMB->MBPartPredMode[0][0] + 1; // 1 or 2 + } + else if(currMB->mbMode == AVC_P8) + { + mb_type = 26; + } + else if(currMB->mbMode == AVC_P8ref0) + { + mb_type = 27; + } + else + { + part1 = currMB->mbMode - AVC_P16x8; + part2 = currMB->MBPartPredMode[0][0]; + part3 = currMB->MBPartPredMode[1][0]; + mb_type = MapParts2Type[part1][part2][part3]; + } + } + + if(slice_type == AVC_SI_SLICE) + { + mb_type++; + } + */ + return (uint)mb_type; +} + +//const static int mbPart2raster[3][4] = {{0,0,0,0},{1,1,0,0},{1,0,1,0}}; + +/* see subclause 7.3.5.1 */ +AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + int mbPartIdx; + AVCSliceHeader *sliceHdr = video->sliceHdr; + int max_ref_idx; + uint code; + + if (currMB->mbMode == AVC_I4 || currMB->mbMode == AVC_I16) + { + if (currMB->mbMode == AVC_I4) + { + /* perform prediction to get the actual intra 4x4 pred mode */ + EncodeIntra4x4Mode(video, currMB, stream); + /* output will be in currMB->i4Mode[4][4] */ + } + + /* assume already set from MBPrediction() */ + status = ue_v(stream, currMB->intra_chroma_pred_mode); + } + else if (currMB->MBPartPredMode[0][0] != AVC_Direct) + { + + memset(currMB->ref_idx_L0, 0, sizeof(int16)*4); + + /* see subclause 7.4.5.1 for the range of ref_idx_lX */ + max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1; + /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag) + max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1; + */ + /* decode ref index for L0 */ + if (sliceHdr->num_ref_idx_l0_active_minus1 > 0) + { + for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) + { + if (/*(sliceHdr->num_ref_idx_l0_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1) + { + code = currMB->ref_idx_L0[mbPartIdx]; + status = te_v(stream, code, max_ref_idx); + } + } + } + + /* see subclause 7.4.5.1 for the range of ref_idx_lX */ + max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1; + /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag) + max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1; + */ + /* decode ref index for L1 */ + if (sliceHdr->num_ref_idx_l1_active_minus1 > 0) + { + for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) + { + if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0) + { + status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx); + } + } + } + + /* encode mvd_l0 */ + for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) + { + if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1) + { + status = se_v(stream, video->mvd_l0[mbPartIdx][0][0]); + status = se_v(stream, video->mvd_l0[mbPartIdx][0][1]); + } + } + /* encode mvd_l1 */ + for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) + { + if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0) + { + status = se_v(stream, video->mvd_l1[mbPartIdx][0][0]); + status = se_v(stream, video->mvd_l1[mbPartIdx][0][1]); + } + } + } + + return status; +} + +/* see subclause 7.3.5.2 */ +AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream) +{ + AVCEnc_Status status = AVCENC_SUCCESS; + int mbPartIdx, subMbPartIdx; + AVCSliceHeader *sliceHdr = video->sliceHdr; + uint max_ref_idx; + uint slice_type = video->slice_type; + uint sub_mb_type[4]; + + /* this should move somewhere else where we don't have to make this check */ + if (currMB->mbMode == AVC_P8ref0) + { + memset(currMB->ref_idx_L0, 0, sizeof(int16)*4); + } + + /* we have to check the values to make sure they are valid */ + /* assign values to currMB->sub_mb_type[] */ + if (slice_type == AVC_P_SLICE) + { + InterpretSubMBTypeP(currMB, sub_mb_type); + } + /* no need to check for B-slice + else if(slice_type == AVC_B_SLICE) + { + InterpretSubMBTypeB(currMB,sub_mb_type); + }*/ + + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + status = ue_v(stream, sub_mb_type[mbPartIdx]); + } + + /* see subclause 7.4.5.1 for the range of ref_idx_lX */ + max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1; + /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag) + max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1; */ + + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + if ((sliceHdr->num_ref_idx_l0_active_minus1 > 0 /*|| currMB->mb_field_decoding_flag*/) && + currMB->mbMode != AVC_P8ref0 && /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1) + { + status = te_v(stream, currMB->ref_idx_L0[mbPartIdx], max_ref_idx); + } + /* used in deblocking */ + currMB->RefIdx[mbPartIdx] = video->RefPicList0[currMB->ref_idx_L0[mbPartIdx]]->RefIdx; + } + /* see subclause 7.4.5.1 for the range of ref_idx_lX */ + max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1; + /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag) + max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1;*/ + + if (sliceHdr->num_ref_idx_l1_active_minus1 > 0) + { + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/ + /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0) + { + status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx); + } + } + } + + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1) + { + for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) + { + status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][0]); + status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][1]); + } + } + } + + for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) + { + if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/ + currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0) + { + for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) + { + status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][0]); + status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][1]); + } + } + } + + return status; +} + +/* input is mblock->sub_mb_type[] */ +void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type) +{ + int i; + /* see enum AVCMBType declaration */ + /*const static AVCSubMBMode map2subMbMode[4] = {AVC_8x8,AVC_8x4,AVC_4x8,AVC_4x4}; + const static int map2subPartWidth[4] = {8,8,4,4}; + const static int map2subPartHeight[4] = {8,4,8,4}; + const static int map2numSubPart[4] = {1,2,2,4};*/ + + for (i = 0; i < 4 ; i++) + { + sub_mb_type[i] = mblock->subMbMode[i] - AVC_8x8; + } + + return ; +} + +void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type) +{ + int i; + /* see enum AVCMBType declaration */ + /* const static AVCSubMBMode map2subMbMode[13] = {AVC_BDirect8,AVC_8x8,AVC_8x8, + AVC_8x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_4x4,AVC_4x4,AVC_4x4}; + const static int map2subPartWidth[13] = {4,8,8,8,8,4,8,4,8,4,4,4,4}; + const static int map2subPartHeight[13] = {4,8,8,8,4,8,4,8,4,8,4,4,4}; + const static int map2numSubPart[13] = {4,1,1,1,2,2,2,2,2,2,4,4,4}; + const static int map2predMode[13] = {3,0,1,2,0,0,1,1,2,2,0,1,2};*/ + + for (i = 0; i < 4 ; i++) + { + if (mblock->subMbMode[i] == AVC_BDirect8) + { + sub_mb_type[i] = 0; + } + else if (mblock->subMbMode[i] == AVC_8x8) + { + sub_mb_type[i] = 1 + mblock->MBPartPredMode[i][0]; + } + else if (mblock->subMbMode[i] == AVC_4x4) + { + sub_mb_type[i] = 10 + mblock->MBPartPredMode[i][0]; + } + else + { + sub_mb_type[i] = 4 + (mblock->MBPartPredMode[i][0] << 1) + (mblock->subMbMode[i] - AVC_8x4); + } + } + + return ; +} + +/* see subclause 8.3.1 */ +AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream) +{ + int intra4x4PredModeA = 0; + int intra4x4PredModeB, predIntra4x4PredMode; + int component, SubBlock_indx, block_x, block_y; + int dcOnlyPredictionFlag; + uint flag; + int rem = 0; + int mode; + int bindx = 0; + + for (component = 0; component < 4; component++) /* partition index */ + { + block_x = ((component & 1) << 1); + block_y = ((component >> 1) << 1); + + for (SubBlock_indx = 0; SubBlock_indx < 4; SubBlock_indx++) /* sub-partition index */ + { + dcOnlyPredictionFlag = 0; + if (block_x > 0) + { + intra4x4PredModeA = currMB->i4Mode[(block_y << 2) + block_x - 1 ]; + } + else + { + if (video->intraAvailA) + { + if (video->mblock[video->mbAddrA].mbMode == AVC_I4) + { + intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[(block_y << 2) + 3]; + } + else + { + intra4x4PredModeA = AVC_I4_DC; + } + } + else + { + dcOnlyPredictionFlag = 1; + } + } + + if (block_y > 0) + { + intra4x4PredModeB = currMB->i4Mode[((block_y-1) << 2) + block_x]; + } + else + { + if (video->intraAvailB) + { + if (video->mblock[video->mbAddrB].mbMode == AVC_I4) + { + intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[(3 << 2) + block_x]; + } + else + { + intra4x4PredModeB = AVC_I4_DC; + } + } + else + { + dcOnlyPredictionFlag = 1; + } + } + + if (dcOnlyPredictionFlag) + { + intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC; + } + + predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB); + + flag = 0; + mode = currMB->i4Mode[(block_y<<2)+block_x]; + + if (mode == (AVCIntra4x4PredMode)predIntra4x4PredMode) + { + flag = 1; + } + else if (mode < predIntra4x4PredMode) + { + rem = mode; + } + else + { + rem = mode - 1; + } + + BitstreamWrite1Bit(stream, flag); + + if (!flag) + { + BitstreamWriteBits(stream, 3, rem); + } + + bindx++; + block_y += (SubBlock_indx & 1) ; + block_x += (1 - 2 * (SubBlock_indx & 1)) ; + } + } + + return AVCENC_SUCCESS; +} + + + diff --git a/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp new file mode 100644 index 0000000..222e709 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp @@ -0,0 +1,336 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +/** +See algorithm in subclause 9.1, Table 9-1, Table 9-2. */ +AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum) +{ + if (AVCENC_SUCCESS != SetEGBitstring(bitstream, codeNum)) + return AVCENC_FAIL; + + return AVCENC_SUCCESS; +} + +/** +See subclause 9.1.1, Table 9-3 */ +AVCEnc_Status se_v(AVCEncBitstream *bitstream, int value) +{ + uint codeNum; + AVCEnc_Status status; + + if (value <= 0) + { + codeNum = -value * 2; + } + else + { + codeNum = value * 2 - 1; + } + + status = ue_v(bitstream, codeNum); + + return status; +} + +AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range) +{ + AVCEnc_Status status; + + if (range > 1) + { + return ue_v(bitstream, value); + } + else + { + status = BitstreamWrite1Bit(bitstream, 1 - value); + return status; + } +} + +/** +See subclause 9.1, Table 9-1, 9-2. */ +// compute leadingZeros and inforbits +//codeNum = (1<mbMode == AVC_I4) + { + codeNum = MapCBP2code[currMB->CBP][0]; + } + else + { + codeNum = MapCBP2code[currMB->CBP][1]; + } + + status = ue_v(stream, codeNum); + + return status; +} + +AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC) +{ + const static uint8 totCoeffTrailOne[3][4][17][2] = + { + { // 0702 + {{1, 1}, {6, 5}, {8, 7}, {9, 7}, {10, 7}, {11, 7}, {13, 15}, {13, 11}, {13, 8}, {14, 15}, {14, 11}, {15, 15}, {15, 11}, {16, 15}, {16, 11}, {16, 7}, {16, 4}}, + {{0, 0}, {2, 1}, {6, 4}, {8, 6}, {9, 6}, {10, 6}, {11, 6}, {13, 14}, {13, 10}, {14, 14}, {14, 10}, {15, 14}, {15, 10}, {15, 1}, {16, 14}, {16, 10}, {16, 6}}, + {{0, 0}, {0, 0}, {3, 1}, {7, 5}, {8, 5}, {9, 5}, {10, 5}, {11, 5}, {13, 13}, {13, 9}, {14, 13}, {14, 9}, {15, 13}, {15, 9}, {16, 13}, {16, 9}, {16, 5}}, + {{0, 0}, {0, 0}, {0, 0}, {5, 3}, {6, 3}, {7, 4}, {8, 4}, {9, 4}, {10, 4}, {11, 4}, {13, 12}, {14, 12}, {14, 8}, {15, 12}, {15, 8}, {16, 12}, {16, 8}}, + }, + { + {{2, 3}, {6, 11}, {6, 7}, {7, 7}, {8, 7}, {8, 4}, {9, 7}, {11, 15}, {11, 11}, {12, 15}, {12, 11}, {12, 8}, {13, 15}, {13, 11}, {13, 7}, {14, 9}, {14, 7}}, + {{0, 0}, {2, 2}, {5, 7}, {6, 10}, {6, 6}, {7, 6}, {8, 6}, {9, 6}, {11, 14}, {11, 10}, {12, 14}, {12, 10}, {13, 14}, {13, 10}, {14, 11}, {14, 8}, {14, 6}}, + {{0, 0}, {0, 0}, {3, 3}, {6, 9}, {6, 5}, {7, 5}, {8, 5}, {9, 5}, {11, 13}, {11, 9}, {12, 13}, {12, 9}, {13, 13}, {13, 9}, {13, 6}, {14, 10}, {14, 5}}, + {{0, 0}, {0, 0}, {0, 0}, {4, 5}, {4, 4}, {5, 6}, {6, 8}, {6, 4}, {7, 4}, {9, 4}, {11, 12}, {11, 8}, {12, 12}, {13, 12}, {13, 8}, {13, 1}, {14, 4}}, + }, + { + {{4, 15}, {6, 15}, {6, 11}, {6, 8}, {7, 15}, {7, 11}, {7, 9}, {7, 8}, {8, 15}, {8, 11}, {9, 15}, {9, 11}, {9, 8}, {10, 13}, {10, 9}, {10, 5}, {10, 1}}, + {{0, 0}, {4, 14}, {5, 15}, {5, 12}, {5, 10}, {5, 8}, {6, 14}, {6, 10}, {7, 14}, {8, 14}, {8, 10}, {9, 14}, {9, 10}, {9, 7}, {10, 12}, {10, 8}, {10, 4}}, + {{0, 0}, {0, 0}, {4, 13}, {5, 14}, {5, 11}, {5, 9}, {6, 13}, {6, 9}, {7, 13}, {7, 10}, {8, 13}, {8, 9}, {9, 13}, {9, 9}, {10, 11}, {10, 7}, {10, 3}}, + {{0, 0}, {0, 0}, {0, 0}, {4, 12}, {4, 11}, {4, 10}, {4, 9}, {4, 8}, {5, 13}, {6, 12}, {7, 12}, {8, 12}, {8, 8}, {9, 12}, {10, 10}, {10, 6}, {10, 2}} + } + }; + + + AVCEnc_Status status = AVCENC_SUCCESS; + uint code, len; + int vlcnum; + + if (TrailingOnes > 3) + { + return AVCENC_TRAILINGONES_FAIL; + } + + if (nC >= 8) + { + if (TotalCoeff) + { + code = ((TotalCoeff - 1) << 2) | (TrailingOnes); + } + else + { + code = 3; + } + status = BitstreamWriteBits(stream, 6, code); + } + else + { + if (nC < 2) + { + vlcnum = 0; + } + else if (nC < 4) + { + vlcnum = 1; + } + else + { + vlcnum = 2; + } + + len = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][0]; + code = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][1]; + status = BitstreamWriteBits(stream, len, code); + } + + return status; +} + +AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff) +{ + const static uint8 totCoeffTrailOneChrom[4][5][2] = + { + { {2, 1}, {6, 7}, {6, 4}, {6, 3}, {6, 2}}, + { {0, 0}, {1, 1}, {6, 6}, {7, 3}, {8, 3}}, + { {0, 0}, {0, 0}, {3, 1}, {7, 2}, {8, 2}}, + { {0, 0}, {0, 0}, {0, 0}, {6, 5}, {7, 0}}, + }; + + AVCEnc_Status status = AVCENC_SUCCESS; + uint code, len; + + len = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][0]; + code = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][1]; + status = BitstreamWriteBits(stream, len, code); + + return status; +} + +/* see Table 9-7 and 9-8 */ +AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff) +{ + const static uint8 lenTotalZeros[15][16] = + { + { 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9}, + { 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6}, + { 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6}, + { 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5}, + { 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5}, + { 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6}, + { 6, 5, 3, 3, 3, 2, 3, 4, 3, 6}, + { 6, 4, 5, 3, 2, 2, 3, 3, 6}, + { 6, 6, 4, 2, 2, 3, 2, 5}, + { 5, 5, 3, 2, 2, 2, 4}, + { 4, 4, 3, 3, 1, 3}, + { 4, 4, 2, 1, 3}, + { 3, 3, 1, 2}, + { 2, 2, 1}, + { 1, 1}, + }; + + const static uint8 codTotalZeros[15][16] = + { + {1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1}, + {7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0}, + {5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0}, + {3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0}, + {5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0}, + {1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0}, + {1, 1, 5, 4, 3, 3, 2, 1, 1, 0}, + {1, 1, 1, 3, 3, 2, 2, 1, 0}, + {1, 0, 1, 3, 2, 1, 1, 1, }, + {1, 0, 1, 3, 2, 1, 1, }, + {0, 1, 1, 2, 1, 3}, + {0, 1, 1, 1, 1}, + {0, 1, 1, 1}, + {0, 1, 1}, + {0, 1}, + }; + int len, code; + AVCEnc_Status status; + + len = lenTotalZeros[TotalCoeff-1][total_zeros]; + code = codTotalZeros[TotalCoeff-1][total_zeros]; + + status = BitstreamWriteBits(stream, len, code); + + return status; +} + +/* see Table 9-9 */ +AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff) +{ + const static uint8 lenTotalZerosChromaDC[3][4] = + { + { 1, 2, 3, 3, }, + { 1, 2, 2, 0, }, + { 1, 1, 0, 0, }, + }; + + const static uint8 codTotalZerosChromaDC[3][4] = + { + { 1, 1, 1, 0, }, + { 1, 1, 0, 0, }, + { 1, 0, 0, 0, }, + }; + + int len, code; + AVCEnc_Status status; + + len = lenTotalZerosChromaDC[TotalCoeff-1][total_zeros]; + code = codTotalZerosChromaDC[TotalCoeff-1][total_zeros]; + + status = BitstreamWriteBits(stream, len, code); + + return status; +} + +/* see Table 9-10 */ +AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft) +{ + const static uint8 lenRunBefore[7][16] = + { + {1, 1}, + {1, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 3, 3}, + {2, 2, 3, 3, 3, 3}, + {2, 3, 3, 3, 3, 3, 3}, + {3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + }; + + const static uint8 codRunBefore[7][16] = + { + {1, 0}, + {1, 1, 0}, + {3, 2, 1, 0}, + {3, 2, 1, 1, 0}, + {3, 2, 3, 2, 1, 0}, + {3, 0, 1, 3, 2, 5, 4}, + {7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + }; + + int len, code; + AVCEnc_Status status; + + if (zerosLeft <= 6) + { + len = lenRunBefore[zerosLeft-1][run_before]; + code = codRunBefore[zerosLeft-1][run_before]; + } + else + { + len = lenRunBefore[6][run_before]; + code = codRunBefore[6][run_before]; + } + + status = BitstreamWriteBits(stream, len, code); + + + return status; +} -- cgit v1.1