From 29a84457aed4c45bc900998b5e11c03023264208 Mon Sep 17 00:00:00 2001
From: James Dong <jdong@google.com>
Date: Fri, 2 Jul 2010 17:44:44 -0700
Subject: Initial checkin for software AVC encoder

- Since the software encoder assumes the input is YUV420 planar,
  color conversion needs to be added when the input color format
  does not meet the requirement. With this patch, I only added
  a single color conversion from YUV420 semi planar to YUV420
  planar. We can add more as we go.

Change-Id: If8640c9e5a4f73d385ae9bb2022e57f7f62b91b9
---
 .../codecs/avc/enc/src/avcenc_api.cpp              |  744 +++++++
 .../libstagefright/codecs/avc/enc/src/avcenc_api.h |  320 +++
 .../libstagefright/codecs/avc/enc/src/avcenc_int.h |  471 +++++
 .../libstagefright/codecs/avc/enc/src/avcenc_lib.h | 1020 +++++++++
 .../codecs/avc/enc/src/bitstream_io.cpp            |  336 +++
 media/libstagefright/codecs/avc/enc/src/block.cpp  | 1283 ++++++++++++
 .../codecs/avc/enc/src/findhalfpel.cpp             |  622 ++++++
 media/libstagefright/codecs/avc/enc/src/header.cpp |  917 ++++++++
 media/libstagefright/codecs/avc/enc/src/init.cpp   |  899 ++++++++
 .../codecs/avc/enc/src/intra_est.cpp               | 2199 ++++++++++++++++++++
 .../codecs/avc/enc/src/motion_comp.cpp             | 2156 +++++++++++++++++++
 .../codecs/avc/enc/src/motion_est.cpp              | 1774 ++++++++++++++++
 .../codecs/avc/enc/src/rate_control.cpp            |  981 +++++++++
 .../libstagefright/codecs/avc/enc/src/residual.cpp |  389 ++++
 media/libstagefright/codecs/avc/enc/src/sad.cpp    |  290 +++
 .../codecs/avc/enc/src/sad_halfpel.cpp             |  629 ++++++
 .../codecs/avc/enc/src/sad_halfpel_inline.h        |   96 +
 .../libstagefright/codecs/avc/enc/src/sad_inline.h |  488 +++++
 .../codecs/avc/enc/src/sad_mb_offset.h             |  311 +++
 media/libstagefright/codecs/avc/enc/src/slice.cpp  | 1025 +++++++++
 .../codecs/avc/enc/src/vlc_encode.cpp              |  336 +++
 21 files changed, 17286 insertions(+)
 create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_api.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_int.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/avcenc_lib.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/block.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/header.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/init.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/intra_est.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/motion_comp.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/motion_est.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/rate_control.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/residual.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/sad.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_inline.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h
 create mode 100644 media/libstagefright/codecs/avc/enc/src/slice.cpp
 create mode 100644 media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp

(limited to 'media/libstagefright/codecs/avc/enc/src')

diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp
new file mode 100644
index 0000000..d39885d
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp
@@ -0,0 +1,744 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_api.h"
+#include "avcenc_lib.h"
+
+/* ======================================================================== */
+/*  Function : PVAVCGetNALType()                                            */
+/*  Date     : 11/4/2003                                                    */
+/*  Purpose  : Sniff NAL type from the bitstream                            */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS if succeed, AVCENC_FAIL if fail.              */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetNALType(unsigned char *bitstream, int size,
+        int *nal_type, int *nal_ref_idc)
+{
+    int forbidden_zero_bit;
+    if (size > 0)
+    {
+        forbidden_zero_bit = bitstream[0] >> 7;
+        if (forbidden_zero_bit != 0)
+            return AVCENC_FAIL;
+        *nal_ref_idc = (bitstream[0] & 0x60) >> 5;
+        *nal_type = bitstream[0] & 0x1F;
+        return AVCENC_SUCCESS;
+    }
+
+    return AVCENC_FAIL;
+}
+
+
+/* ======================================================================== */
+/*  Function : PVAVCEncInitialize()                                         */
+/*  Date     : 3/18/2004                                                    */
+/*  Purpose  : Initialize the encoder library, allocate memory and verify   */
+/*              the profile/level support/settings.                         */
+/*  In/out   : Encoding parameters.                                         */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam,
+        void* extSPS, void* extPPS)
+{
+    AVCEnc_Status status;
+    AVCEncObject *encvid;
+    AVCCommonObj *video;
+    uint32 *userData = (uint32*) avcHandle->userData;
+    int framesize;
+
+    if (avcHandle->AVCObject != NULL)
+    {
+        return AVCENC_ALREADY_INITIALIZED; /* It's already initialized, need to cleanup first */
+    }
+
+    /* not initialized */
+
+    /* allocate videoObject */
+    avcHandle->AVCObject = (void*)avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncObject), DEFAULT_ATTR);
+    if (avcHandle->AVCObject == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    encvid = (AVCEncObject*) avcHandle->AVCObject;
+    memset(encvid, 0, sizeof(AVCEncObject)); /* reset everything */
+
+    encvid->enc_state = AVCEnc_Initializing;
+
+    encvid->avcHandle = avcHandle;
+
+    encvid->common = (AVCCommonObj*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCCommonObj), DEFAULT_ATTR);
+    if (encvid->common == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    video = encvid->common;
+    memset(video, 0, sizeof(AVCCommonObj));
+
+    /* allocate bitstream structure */
+    encvid->bitstream = (AVCEncBitstream*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncBitstream), DEFAULT_ATTR);
+    if (encvid->bitstream == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    encvid->bitstream->encvid = encvid; /* to point back for reallocation */
+
+    /* allocate sequence parameter set structure */
+    video->currSeqParams = (AVCSeqParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSeqParamSet), DEFAULT_ATTR);
+    if (video->currSeqParams == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(video->currSeqParams, 0, sizeof(AVCSeqParamSet));
+
+    /* allocate picture parameter set structure */
+    video->currPicParams = (AVCPicParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCPicParamSet), DEFAULT_ATTR);
+    if (video->currPicParams == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(video->currPicParams, 0, sizeof(AVCPicParamSet));
+
+    /* allocate slice header structure */
+    video->sliceHdr = (AVCSliceHeader*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSliceHeader), DEFAULT_ATTR);
+    if (video->sliceHdr == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(video->sliceHdr, 0, sizeof(AVCSliceHeader));
+
+    /* allocate encoded picture buffer structure*/
+    video->decPicBuf = (AVCDecPicBuffer*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCDecPicBuffer), DEFAULT_ATTR);
+    if (video->decPicBuf == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(video->decPicBuf, 0, sizeof(AVCDecPicBuffer));
+
+    /* allocate rate control structure */
+    encvid->rateCtrl = (AVCRateControl*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCRateControl), DEFAULT_ATTR);
+    if (encvid->rateCtrl == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(encvid->rateCtrl, 0, sizeof(AVCRateControl));
+
+    /* reset frame list, not really needed */
+    video->currPic = NULL;
+    video->currFS = NULL;
+    encvid->currInput = NULL;
+    video->prevRefPic = NULL;
+
+    /* now read encParams, and allocate dimension-dependent variables */
+    /* such as mblock */
+    status = SetEncodeParam(avcHandle, encParam, extSPS, extPPS); /* initialized variables to be used in SPS*/
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    if (encParam->use_overrun_buffer == AVC_ON)
+    {
+        /* allocate overrun buffer */
+        encvid->oBSize = encvid->rateCtrl->cpbSize;
+        if (encvid->oBSize > DEFAULT_OVERRUN_BUFFER_SIZE)
+        {
+            encvid->oBSize = DEFAULT_OVERRUN_BUFFER_SIZE;
+        }
+        encvid->overrunBuffer = (uint8*) avcHandle->CBAVC_Malloc(userData, encvid->oBSize, DEFAULT_ATTR);
+        if (encvid->overrunBuffer == NULL)
+        {
+            return AVCENC_MEMORY_FAIL;
+        }
+    }
+    else
+    {
+        encvid->oBSize = 0;
+        encvid->overrunBuffer = NULL;
+    }
+
+    /* allocate frame size dependent structures */
+    framesize = video->FrameHeightInMbs * video->PicWidthInMbs;
+
+    video->mblock = (AVCMacroblock*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMacroblock) * framesize, DEFAULT_ATTR);
+    if (video->mblock == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    video->MbToSliceGroupMap = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits * 2, DEFAULT_ATTR);
+    if (video->MbToSliceGroupMap == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    encvid->mot16x16 = (AVCMV*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMV) * framesize, DEFAULT_ATTR);
+    if (encvid->mot16x16 == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    memset(encvid->mot16x16, 0, sizeof(AVCMV)*framesize);
+
+    encvid->intraSearch = (uint8*) avcHandle->CBAVC_Malloc(userData, sizeof(uint8) * framesize, DEFAULT_ATTR);
+    if (encvid->intraSearch == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    encvid->min_cost = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(int) * framesize, DEFAULT_ATTR);
+    if (encvid->min_cost == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    /* initialize motion search related memory */
+    if (AVCENC_SUCCESS != InitMotionSearchModule(avcHandle))
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    if (AVCENC_SUCCESS != InitRateControlModule(avcHandle))
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    /* intialize function pointers */
+    encvid->functionPointer = (AVCEncFuncPtr*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncFuncPtr), DEFAULT_ATTR);
+    if (encvid->functionPointer == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+    encvid->functionPointer->SAD_Macroblock = &AVCSAD_Macroblock_C;
+    encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+    encvid->functionPointer->SAD_MB_HalfPel[1] = &AVCSAD_MB_HalfPel_Cxh;
+    encvid->functionPointer->SAD_MB_HalfPel[2] = &AVCSAD_MB_HalfPel_Cyh;
+    encvid->functionPointer->SAD_MB_HalfPel[3] = &AVCSAD_MB_HalfPel_Cxhyh;
+
+    /* initialize timing control */
+    encvid->modTimeRef = 0;     /* ALWAYS ASSUME THAT TIMESTAMP START FROM 0 !!!*/
+    video->prevFrameNum = 0;
+    encvid->prevCodedFrameNum = 0;
+    encvid->dispOrdPOCRef = 0;
+
+    if (encvid->outOfBandParamSet == TRUE)
+    {
+        encvid->enc_state = AVCEnc_Encoding_SPS;
+    }
+    else
+    {
+        encvid->enc_state = AVCEnc_Analyzing_Frame;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/*  Function : PVAVCEncGetMaxOutputSize()                                   */
+/*  Date     : 11/29/2008                                                   */
+/*  Purpose  : Return max output buffer size that apps should allocate for  */
+/*              output buffer.                                              */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :   size                                                       */
+/* ======================================================================== */
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size)
+{
+    AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+
+    if (encvid == NULL)
+    {
+        return AVCENC_UNINITIALIZED;
+    }
+
+    *size = encvid->rateCtrl->cpbSize;
+
+    return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/*  Function : PVAVCEncSetInput()                                           */
+/*  Date     : 4/18/2004                                                    */
+/*  Purpose  : To feed an unencoded original frame to the encoder library.  */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input)
+{
+    AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+    AVCCommonObj *video = encvid->common;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+
+    AVCEnc_Status status;
+    uint frameNum;
+
+    if (encvid == NULL)
+    {
+        return AVCENC_UNINITIALIZED;
+    }
+
+    if (encvid->enc_state == AVCEnc_WaitingForBuffer)
+    {
+        goto RECALL_INITFRAME;
+    }
+    else if (encvid->enc_state != AVCEnc_Analyzing_Frame)
+    {
+        return AVCENC_FAIL;
+    }
+
+    if (input->pitch > 0xFFFF)
+    {
+        return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch
+    }
+
+    /***********************************/
+
+    /* Let's rate control decide whether to encode this frame or not */
+    /* Also set video->nal_unit_type, sliceHdr->slice_type, video->slice_type */
+    if (AVCENC_SUCCESS != RCDetermineFrameNum(encvid, rateCtrl, input->coding_timestamp, &frameNum))
+    {
+        return AVCENC_SKIPPED_PICTURE; /* not time to encode, thus skipping */
+    }
+
+    /* we may not need this line */
+    //nextFrmModTime = (uint32)((((frameNum+1)*1000)/rateCtrl->frame_rate) + modTimeRef); /* rec. time */
+    //encvid->nextModTime = nextFrmModTime - (encvid->frameInterval>>1) - 1; /* between current and next frame */
+
+    encvid->currInput = input;
+    encvid->currInput->coding_order = frameNum;
+
+RECALL_INITFRAME:
+    /* initialize and analyze the frame */
+    status = InitFrame(encvid);
+
+    if (status == AVCENC_SUCCESS)
+    {
+        encvid->enc_state = AVCEnc_Encoding_Frame;
+    }
+    else if (status == AVCENC_NEW_IDR)
+    {
+        if (encvid->outOfBandParamSet == TRUE)
+        {
+            encvid->enc_state = AVCEnc_Encoding_Frame;
+        }
+        else // assuming that in-band paramset keeps sending new SPS and PPS.
+        {
+            encvid->enc_state = AVCEnc_Encoding_SPS;
+            //video->currSeqParams->seq_parameter_set_id++;
+            //if(video->currSeqParams->seq_parameter_set_id > 31) // range check
+            {
+                video->currSeqParams->seq_parameter_set_id = 0;  // reset
+            }
+        }
+
+        video->sliceHdr->idr_pic_id++;
+        if (video->sliceHdr->idr_pic_id > 65535) // range check
+        {
+            video->sliceHdr->idr_pic_id = 0;  // reset
+        }
+    }
+    /* the following logics need to be revisited */
+    else if (status == AVCENC_PICTURE_READY) // no buffers returned back to the encoder
+    {
+        encvid->enc_state = AVCEnc_WaitingForBuffer; // Input accepted but can't continue
+        // need to free up some memory before proceeding with Encode
+    }
+
+    return status; // return status, including the AVCENC_FAIL case and all 3 above.
+}
+
+/* ======================================================================== */
+/*  Function : PVAVCEncodeNAL()                                             */
+/*  Date     : 4/29/2004                                                    */
+/*  Purpose  : To encode one NAL/slice.                                     */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, unsigned char *buffer, unsigned int *buf_nal_size, int *nal_type)
+{
+    AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+    AVCCommonObj *video = encvid->common;
+    AVCEncBitstream *bitstream = encvid->bitstream;
+    AVCEnc_Status status;
+
+    if (encvid == NULL)
+    {
+        return AVCENC_UNINITIALIZED;
+    }
+
+    switch (encvid->enc_state)
+    {
+        case AVCEnc_Initializing:
+            return AVCENC_UNINITIALIZED;
+        case AVCEnc_Encoding_SPS:
+            /* initialized the structure */
+            BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0);
+            BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_SPS);
+
+            /* encode SPS */
+            status = EncodeSPS(encvid, bitstream);
+            if (status != AVCENC_SUCCESS)
+            {
+                return status;
+            }
+
+            /* closing the NAL with trailing bits */
+            status = BitstreamTrailingBits(bitstream, buf_nal_size);
+            if (status == AVCENC_SUCCESS)
+            {
+                encvid->enc_state = AVCEnc_Encoding_PPS;
+                video->currPicParams->seq_parameter_set_id = video->currSeqParams->seq_parameter_set_id;
+                video->currPicParams->pic_parameter_set_id++;
+                *nal_type = AVC_NALTYPE_SPS;
+                *buf_nal_size = bitstream->write_pos;
+            }
+            break;
+        case AVCEnc_Encoding_PPS:
+            /* initialized the structure */
+            BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0);
+            BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_PPS);
+
+            /* encode PPS */
+            status = EncodePPS(encvid, bitstream);
+            if (status != AVCENC_SUCCESS)
+            {
+                return status;
+            }
+
+            /* closing the NAL with trailing bits */
+            status = BitstreamTrailingBits(bitstream, buf_nal_size);
+            if (status == AVCENC_SUCCESS)
+            {
+                if (encvid->outOfBandParamSet == TRUE) // already extract PPS, SPS
+                {
+                    encvid->enc_state = AVCEnc_Analyzing_Frame;
+                }
+                else    // SetInput has been called before SPS and PPS.
+                {
+                    encvid->enc_state = AVCEnc_Encoding_Frame;
+                }
+
+                *nal_type = AVC_NALTYPE_PPS;
+                *buf_nal_size = bitstream->write_pos;
+            }
+            break;
+
+        case AVCEnc_Encoding_Frame:
+            /* initialized the structure */
+            BitstreamEncInit(bitstream, buffer, *buf_nal_size, encvid->overrunBuffer, encvid->oBSize);
+            BitstreamWriteBits(bitstream, 8, (video->nal_ref_idc << 5) | (video->nal_unit_type));
+
+            /* Re-order the reference list according to the ref_pic_list_reordering() */
+            /* We don't have to reorder the list for the encoder here. This can only be done
+            after we encode this slice. We can run thru a second-pass to see if new ordering
+            would save more bits. Too much delay !! */
+            /* status = ReOrderList(video);*/
+            status = InitSlice(encvid);
+            if (status != AVCENC_SUCCESS)
+            {
+                return status;
+            }
+
+            /* when we have everything, we encode the slice header */
+            status = EncodeSliceHeader(encvid, bitstream);
+            if (status != AVCENC_SUCCESS)
+            {
+                return status;
+            }
+
+            status = AVCEncodeSlice(encvid);
+
+            video->slice_id++;
+
+            /* closing the NAL with trailing bits */
+            BitstreamTrailingBits(bitstream, buf_nal_size);
+
+            *buf_nal_size = bitstream->write_pos;
+
+            encvid->rateCtrl->numFrameBits += ((*buf_nal_size) << 3);
+
+            *nal_type = video->nal_unit_type;
+
+            if (status == AVCENC_PICTURE_READY)
+            {
+                status = RCUpdateFrame(encvid);
+                if (status == AVCENC_SKIPPED_PICTURE) /* skip current frame */
+                {
+                    DPBReleaseCurrentFrame(avcHandle, video);
+                    encvid->enc_state = AVCEnc_Analyzing_Frame;
+
+                    return status;
+                }
+
+                /* perform loop-filtering on the entire frame */
+                DeblockPicture(video);
+
+                /* update the original frame array */
+                encvid->prevCodedFrameNum = encvid->currInput->coding_order;
+
+                /* store the encoded picture in the DPB buffer */
+                StorePictureInDPB(avcHandle, video);
+
+                if (video->currPic->isReference)
+                {
+                    video->PrevRefFrameNum = video->sliceHdr->frame_num;
+                }
+
+                /* update POC related variables */
+                PostPOC(video);
+
+                encvid->enc_state = AVCEnc_Analyzing_Frame;
+                status = AVCENC_PICTURE_READY;
+
+            }
+            break;
+        default:
+            status = AVCENC_WRONG_STATE;
+    }
+
+    return status;
+}
+
+/* ======================================================================== */
+/*  Function : PVAVCEncGetOverrunBuffer()                                   */
+/*  Purpose  : To retrieve the overrun buffer. Check whether overrun buffer */
+/*              is used or not before returning                             */
+/*  In/out   :                                                              */
+/*  Return   : Pointer to the internal overrun buffer.                      */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+    AVCEncBitstream *bitstream = encvid->bitstream;
+
+    if (bitstream->overrunBuffer == bitstream->bitstreamBuffer) /* OB is used */
+    {
+        return encvid->overrunBuffer;
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+
+/* ======================================================================== */
+/*  Function : PVAVCEncGetRecon()                                           */
+/*  Date     : 4/29/2004                                                    */
+/*  Purpose  : To retrieve the most recently encoded frame.                 */
+/*              assume that user will make a copy if they want to hold on   */
+/*              to it. Otherwise, it is not guaranteed to be reserved.      */
+/*              Most applications prefer to see original frame rather than  */
+/*              reconstructed frame. So, we are staying aware from complex  */
+/*              buffering mechanism. If needed, can be added later.         */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon)
+{
+    AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+    AVCCommonObj *video = encvid->common;
+    AVCFrameStore *currFS = video->currFS;
+
+    if (encvid == NULL)
+    {
+        return AVCENC_UNINITIALIZED;
+    }
+
+    recon->YCbCr[0] = currFS->frame.Sl;
+    recon->YCbCr[1] = currFS->frame.Scb;
+    recon->YCbCr[2] = currFS->frame.Scr;
+    recon->height = currFS->frame.height;
+    recon->pitch = currFS->frame.pitch;
+    recon->disp_order = currFS->PicOrderCnt;
+    recon->coding_order = currFS->FrameNum;
+    recon->id = (uint32) currFS->base_dpb; /* use the pointer as the id */
+
+    currFS->IsOutputted |= 1;
+
+    return AVCENC_SUCCESS;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+    OSCL_UNUSED_ARG(recon);
+
+    return AVCENC_SUCCESS; //for now
+}
+
+/* ======================================================================== */
+/*  Function : PVAVCCleanUpEncoder()                                        */
+/*  Date     : 4/18/2004                                                    */
+/*  Purpose  : To clean up memories allocated by PVAVCEncInitialize()       */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS for success.                                  */
+/*  Modified :                                                              */
+/* ======================================================================== */
+OSCL_EXPORT_REF void    PVAVCCleanUpEncoder(AVCHandle *avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCCommonObj *video;
+    uint32 *userData = (uint32*) avcHandle->userData;
+
+    if (encvid != NULL)
+    {
+        CleanMotionSearchModule(avcHandle);
+
+        CleanupRateControlModule(avcHandle);
+
+        if (encvid->functionPointer != NULL)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->functionPointer);
+        }
+
+        if (encvid->min_cost)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->min_cost);
+        }
+
+        if (encvid->intraSearch)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->intraSearch);
+        }
+
+        if (encvid->mot16x16)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->mot16x16);
+        }
+
+        if (encvid->rateCtrl)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->rateCtrl);
+        }
+
+        if (encvid->overrunBuffer)
+        {
+            avcHandle->CBAVC_Free(userData, (int)encvid->overrunBuffer);
+        }
+
+        video = encvid->common;
+        if (video != NULL)
+        {
+            if (video->MbToSliceGroupMap)
+            {
+                avcHandle->CBAVC_Free(userData, (int)video->MbToSliceGroupMap);
+            }
+            if (video->mblock != NULL)
+            {
+                avcHandle->CBAVC_Free(userData, (int)video->mblock);
+            }
+            if (video->decPicBuf != NULL)
+            {
+                CleanUpDPB(avcHandle, video);
+                avcHandle->CBAVC_Free(userData, (int)video->decPicBuf);
+            }
+            if (video->sliceHdr != NULL)
+            {
+                avcHandle->CBAVC_Free(userData, (int)video->sliceHdr);
+            }
+            if (video->currPicParams != NULL)
+            {
+                if (video->currPicParams->slice_group_id)
+                {
+                    avcHandle->CBAVC_Free(userData, (int)video->currPicParams->slice_group_id);
+                }
+
+                avcHandle->CBAVC_Free(userData, (int)video->currPicParams);
+            }
+            if (video->currSeqParams != NULL)
+            {
+                avcHandle->CBAVC_Free(userData, (int)video->currSeqParams);
+            }
+            if (encvid->bitstream != NULL)
+            {
+                avcHandle->CBAVC_Free(userData, (int)encvid->bitstream);
+            }
+            if (video != NULL)
+            {
+                avcHandle->CBAVC_Free(userData, (int)video);
+            }
+        }
+
+        avcHandle->CBAVC_Free(userData, (int)encvid);
+
+        avcHandle->AVCObject = NULL;
+    }
+
+    return ;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+    OSCL_UNUSED_ARG(bitrate);
+
+    return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+    OSCL_UNUSED_ARG(num);
+    OSCL_UNUSED_ARG(denom);
+
+    return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+    OSCL_UNUSED_ARG(IDRInterval);
+
+    return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+
+    return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB)
+{
+    OSCL_UNUSED_ARG(avcHandle);
+    OSCL_UNUSED_ARG(numMB);
+
+    return AVCENC_FAIL;
+}
+
+void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+
+    avcStats->avgFrameQP = GetAvgFrameQP(rateCtrl);
+    avcStats->numIntraMBs = encvid->numIntraMB;
+
+    return ;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.h b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h
new file mode 100644
index 0000000..628dec6
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h
@@ -0,0 +1,320 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains application function interfaces to the AVC encoder library
+and necessary type defitionitions and enumerations.
+@publishedAll
+*/
+
+#ifndef AVCENC_API_H_INCLUDED
+#define AVCENC_API_H_INCLUDED
+
+#ifndef AVCAPI_COMMON_H_INCLUDED
+#include "avcapi_common.h"
+#endif
+
+/**
+ This enumeration is used for the status returned from the library interface.
+*/
+typedef enum
+{
+    /**
+    Fail information, need to add more error code for more specific info
+    */
+    AVCENC_TRAILINGONES_FAIL = -35,
+    AVCENC_SLICE_EMPTY = -34,
+    AVCENC_POC_FAIL = -33,
+    AVCENC_CONSECUTIVE_NONREF = -32,
+    AVCENC_CABAC_FAIL = -31,
+    AVCENC_PRED_WEIGHT_TAB_FAIL = -30,
+    AVCENC_DEC_REF_PIC_MARK_FAIL = -29,
+    AVCENC_SPS_FAIL = -28,
+    AVCENC_BITSTREAM_BUFFER_FULL    = -27,
+    AVCENC_BITSTREAM_INIT_FAIL = -26,
+    AVCENC_CHROMA_QP_FAIL = -25,
+    AVCENC_INIT_QS_FAIL = -24,
+    AVCENC_INIT_QP_FAIL = -23,
+    AVCENC_WEIGHTED_BIPRED_FAIL = -22,
+    AVCENC_INVALID_INTRA_PERIOD = -21,
+    AVCENC_INVALID_CHANGE_RATE = -20,
+    AVCENC_INVALID_BETA_OFFSET = -19,
+    AVCENC_INVALID_ALPHA_OFFSET = -18,
+    AVCENC_INVALID_DEBLOCK_IDC = -17,
+    AVCENC_INVALID_REDUNDANT_PIC = -16,
+    AVCENC_INVALID_FRAMERATE = -15,
+    AVCENC_INVALID_NUM_SLICEGROUP = -14,
+    AVCENC_INVALID_POC_LSB = -13,
+    AVCENC_INVALID_NUM_REF = -12,
+    AVCENC_INVALID_FMO_TYPE = -11,
+    AVCENC_ENCPARAM_MEM_FAIL = -10,
+    AVCENC_LEVEL_NOT_SUPPORTED = -9,
+    AVCENC_LEVEL_FAIL = -8,
+    AVCENC_PROFILE_NOT_SUPPORTED = -7,
+    AVCENC_TOOLS_NOT_SUPPORTED = -6,
+    AVCENC_WRONG_STATE = -5,
+    AVCENC_UNINITIALIZED = -4,
+    AVCENC_ALREADY_INITIALIZED = -3,
+    AVCENC_NOT_SUPPORTED = -2,
+    AVCENC_MEMORY_FAIL = AVC_MEMORY_FAIL,
+    AVCENC_FAIL = AVC_FAIL,
+    /**
+    Generic success value
+    */
+    AVCENC_SUCCESS = AVC_SUCCESS,
+    AVCENC_PICTURE_READY = 2,
+    AVCENC_NEW_IDR = 3, /* upon getting this, users have to call PVAVCEncodeSPS and PVAVCEncodePPS to get a new SPS and PPS*/
+    AVCENC_SKIPPED_PICTURE = 4 /* continuable error message */
+
+} AVCEnc_Status;
+
+#define MAX_NUM_SLICE_GROUP  8      /* maximum for all the profiles */
+
+/**
+This structure contains the encoding parameters.
+*/
+typedef struct tagAVCEncParam
+{
+    /* if profile/level is set to zero, encoder will choose the closest one for you */
+    AVCProfile profile; /* profile of the bitstream to be compliant with*/
+    AVCLevel   level;   /* level of the bitstream to be compliant with*/
+
+    int width;      /* width of an input frame in pixel */
+    int height;     /* height of an input frame in pixel */
+
+    int poc_type; /* picture order count mode, 0,1 or 2 */
+    /* for poc_type == 0 */
+    uint log2_max_poc_lsb_minus_4; /* specify maximum value of POC Lsb, range 0..12*/
+    /* for poc_type == 1 */
+    uint delta_poc_zero_flag; /* delta POC always zero */
+    int offset_poc_non_ref; /* offset for non-reference pic */
+    int offset_top_bottom; /* offset between top and bottom field */
+    uint num_ref_in_cycle; /* number of reference frame in one cycle */
+    int *offset_poc_ref; /* array of offset for ref pic, dimension [num_ref_in_cycle] */
+
+    int num_ref_frame;  /* number of reference frame used */
+    int num_slice_group;  /* number of slice group */
+    int fmo_type;   /* 0: interleave, 1: dispersed, 2: foreground with left-over
+                    3: box-out, 4:raster scan, 5:wipe, 6:explicit */
+    /* for fmo_type == 0 */
+    uint run_length_minus1[MAX_NUM_SLICE_GROUP];   /* array of size num_slice_group, in round robin fasion */
+    /* fmo_type == 2*/
+    uint top_left[MAX_NUM_SLICE_GROUP-1];           /* array of co-ordinates of each slice_group */
+    uint bottom_right[MAX_NUM_SLICE_GROUP-1];       /* except the last one which is the background. */
+    /* fmo_type == 3,4,5 */
+    AVCFlag change_dir_flag;  /* slice group change direction flag */
+    uint change_rate_minus1;
+    /* fmo_type == 6 */
+    uint *slice_group; /* array of size MBWidth*MBHeight */
+
+    AVCFlag db_filter;  /* enable deblocking loop filter */
+    int disable_db_idc;  /* 0: filter everywhere, 1: no filter, 2: no filter across slice boundary */
+    int alpha_offset;   /* alpha offset range -6,...,6 */
+    int beta_offset;    /* beta offset range -6,...,6 */
+
+    AVCFlag constrained_intra_pred; /* constrained intra prediction flag */
+
+    AVCFlag auto_scd;   /* scene change detection on or off */
+    int idr_period; /* idr frame refresh rate in number of target encoded frame (no concept of actual time).*/
+    int intramb_refresh;    /* minimum number of intra MB per frame */
+    AVCFlag data_par;   /* enable data partitioning */
+
+    AVCFlag fullsearch; /* enable full-pel full-search mode */
+    int search_range;   /* search range for motion vector in (-search_range,+search_range) pixels */
+    AVCFlag sub_pel;    /* enable sub pel prediction */
+    AVCFlag submb_pred; /* enable sub MB partition mode */
+    AVCFlag rdopt_mode; /* RD optimal mode selection */
+    AVCFlag bidir_pred; /* enable bi-directional for B-slice, this flag forces the encoder to encode
+                        any frame with POC less than the previously encoded frame as a B-frame.
+                        If it's off, then such frames will remain P-frame. */
+
+    AVCFlag rate_control; /* rate control enable, on: RC on, off: constant QP */
+    int initQP;     /* initial QP */
+    uint32 bitrate;    /* target encoding bit rate in bits/second */
+    uint32 CPB_size;  /* coded picture buffer in number of bits */
+    uint32 init_CBP_removal_delay; /* initial CBP removal delay in msec */
+
+    uint32 frame_rate;  /* frame rate in the unit of frames per 1000 second */
+    /* note, frame rate is only needed by the rate control, AVC is timestamp agnostic. */
+
+    AVCFlag out_of_band_param_set; /* flag to set whether param sets are to be retrieved up front or not */
+
+    AVCFlag use_overrun_buffer;  /* do not throw away the frame if output buffer is not big enough.
+                                    copy excess bits to the overrun buffer */
+} AVCEncParams;
+
+
+/**
+This structure contains current frame encoding statistics for debugging purpose.
+*/
+typedef struct tagAVCEncFrameStats
+{
+    int avgFrameQP;   /* average frame QP */
+    int numIntraMBs;  /* number of intra MBs */
+    int numFalseAlarm;
+    int numMisDetected;
+    int numDetected;
+
+} AVCEncFrameStats;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    /** THE FOLLOWINGS ARE APIS */
+    /**
+    This function initializes the encoder library. It verifies the validity of the
+    encoding parameters against the specified profile/level and the list of supported
+    tools by this library. It allocates necessary memories required to perform encoding.
+    For re-encoding application, if users want to setup encoder in a more precise way,
+    users can give the external SPS and PPS to the encoder to follow.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "encParam"   "Pointer to the encoding parameter structure."
+    \param "extSPS"     "External SPS used for re-encoding purpose. NULL if not present"
+    \param "extPPS"     "External PPS used for re-encoding purpose. NULL if not present"
+    \return "AVCENC_SUCCESS for success,
+             AVCENC_NOT_SUPPORTED for the use of unsupported tools,
+             AVCENC_MEMORY_FAIL for memory allocation failure,
+             AVCENC_FAIL for generic failure."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam, void* extSPS, void* extPPS);
+
+
+    /**
+    Since the output buffer size is not known prior to encoding a frame, users need to
+    allocate big enough buffer otherwise, that frame will be dropped. This function returns
+    the size of the output buffer to be allocated by the users that guarantees to hold one frame.
+    It follows the CPB spec for a particular level.  However, when the users set use_overrun_buffer
+    flag, this API is useless as excess output bits are saved in the overrun buffer waiting to be
+    copied out in small chunks, i.e. users can allocate any size of output buffer.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "size"   "Pointer to the size to be modified."
+    \return "AVCENC_SUCCESS for success, AVCENC_UNINITIALIZED when level is not known.
+    */
+
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size);
+
+    /**
+    Users call this function to provide an input structure to the encoder library which will keep
+    a list of input structures it receives in case the users call this function many time before
+    calling PVAVCEncodeSlice. The encoder library will encode them according to the frame_num order.
+    Users should not modify the content of a particular frame until this frame is encoded and
+    returned thru CBAVCEnc_ReturnInput() callback function.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "input"      "Pointer to the input structure."
+    \return "AVCENC_SUCCESS for success,
+            AVCENC_FAIL if the encoder is not in the right state to take a new input frame.
+            AVCENC_NEW_IDR for the detection or determination of a new IDR, with this status,
+            the returned NAL is an SPS NAL,
+            AVCENC_NO_PICTURE if the input frame coding timestamp is too early, users must
+            get next frame or adjust the coding timestamp."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input);
+
+    /**
+    This function is called to encode a NAL unit which can be an SPS NAL, a PPS NAL or
+    a VCL (video coding layer) NAL which contains one slice of data. It could be a
+    fixed number of macroblocks, as specified in the encoder parameters set, or the
+    maximum number of macroblocks fitted into the given input argument "buffer". The
+    input frame is taken from the oldest unencoded input frame retrieved by users by
+    PVAVCEncGetInput API.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "buffer"     "Pointer to the output AVC bitstream buffer, the format will be EBSP,
+                         not RBSP."
+    \param "buf_nal_size"   "As input, the size of the buffer in bytes.
+                        This is the physical limitation of the buffer. As output, the size of the EBSP."
+    \param "nal_type"   "Pointer to the NAL type of the returned buffer."
+    \return "AVCENC_SUCCESS for success of encoding one slice,
+             AVCENC_PICTURE_READY for the completion of a frame encoding,
+             AVCENC_FAIL for failure (this should not occur, though)."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, uint8 *buffer, uint *buf_nal_size, int *nal_type);
+
+    /**
+    This function sniffs the nal_unit_type such that users can call corresponding APIs.
+    This function is identical to PVAVCDecGetNALType() in the decoder.
+    \param "bitstream"  "Pointer to the beginning of a NAL unit (start with forbidden_zero_bit, etc.)."
+    \param "size"       "size of the bitstream (NumBytesInNALunit + 1)."
+    \param "nal_unit_type" "Pointer to the return value of nal unit type."
+    \return "AVCENC_SUCCESS if success, AVCENC_FAIL otherwise."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetNALType(uint8 *bitstream, int size, int *nal_type, int *nal_ref_idc);
+
+    /**
+    This function returns the pointer to internal overrun buffer. Users can call this to query
+    whether the overrun buffer has been used to encode the current NAL.
+    \param "avcHandle"  "Pointer to the handle."
+    \return "Pointer to overrun buffer if it is used, otherwise, NULL."
+    */
+    OSCL_IMPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle);
+
+    /**
+    This function returns the reconstructed frame of the most recently encoded frame.
+    Note that this frame is not returned to the users yet. Users should only read the
+    content of this frame.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "output"     "Pointer to the input structure."
+    \return "AVCENC_SUCCESS for success, AVCENC_NO_PICTURE if no picture to be outputted."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon);
+
+    /**
+    This function is used to return the recontructed frame back to the AVC encoder library
+    in order to be re-used for encoding operation. If users want the content of it to remain
+    unchanged for a long time, they should make a copy of it and release the memory back to
+    the encoder. The encoder relies on the id element in the AVCFrameIO structure,
+    thus users should not change the id value.
+    \param "avcHandle"  "Handle to the AVC decoder library object."
+    \param "output"      "Pointer to the AVCFrameIO structure."
+    \return "AVCENC_SUCCESS for success, AVCENC_FAIL for fail for id not found."
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon);
+
+    /**
+    This function performs clean up operation including memory deallocation.
+    The encoder will also clear the list of input structures it has not released.
+    This implies that users must keep track of the number of input structure they have allocated
+    and free them accordingly.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    */
+    OSCL_IMPORT_REF void    PVAVCCleanUpEncoder(AVCHandle *avcHandle);
+
+    /**
+    This function extracts statistics of the current frame. If the encoder has not finished
+    with the current frame, the result is not accurate.
+    \param "avcHandle"  "Handle to the AVC encoder library object."
+    \param "avcStats"   "Pointer to AVCEncFrameStats structure."
+    \return "void."
+    */
+    void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats);
+
+    /**
+    These functions are used for the modification of encoding parameters.
+    To be polished.
+    */
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate);
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom);
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval);
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle);
+    OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* _AVCENC_API_H_ */
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_int.h b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h
new file mode 100644
index 0000000..3fe08a1
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h
@@ -0,0 +1,471 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains application function interfaces to the AVC encoder library
+and necessary type defitionitions and enumerations.
+@publishedAll
+*/
+
+#ifndef AVCENC_INT_H_INCLUDED
+#define AVCENC_INT_H_INCLUDED
+
+#ifndef AVCINT_COMMON_H_INCLUDED
+#include "avcint_common.h"
+#endif
+#ifndef AVCENC_API_H_INCLUDED
+#include "avcenc_api.h"
+#endif
+
+typedef float OsclFloat;
+
+/* Definition for the structures below */
+#define DEFAULT_ATTR    0 /* default memory attribute */
+#define MAX_INPUT_FRAME 30 /* some arbitrary number, it can be much higher than this. */
+#define MAX_REF_FRAME  16 /* max size of the RefPicList0 and RefPicList1 */
+#define MAX_REF_PIC_LIST 33
+
+#define MIN_QP          0
+#define MAX_QP          51
+#define SHIFT_QP        12
+#define  LAMBDA_ACCURACY_BITS         16
+#define  LAMBDA_FACTOR(lambda)        ((int)((double)(1<<LAMBDA_ACCURACY_BITS)*lambda+0.5))
+
+
+#define DISABLE_THRESHOLDING  0
+// for better R-D performance
+#define _LUMA_COEFF_COST_       4 //!< threshold for luma coeffs
+#define _CHROMA_COEFF_COST_     4 //!< threshold for chroma coeffs, used to be 7
+#define _LUMA_MB_COEFF_COST_    5 //!< threshold for luma coeffs of inter Macroblocks
+#define _LUMA_8x8_COEFF_COST_   5 //!< threshold for luma coeffs of 8x8 Inter Partition
+#define MAX_VALUE       999999   //!< used for start value for some variables
+
+#define  WEIGHTED_COST(factor,bits)   (((factor)*(bits))>>LAMBDA_ACCURACY_BITS)
+#define  MV_COST(f,s,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py]))
+#define  MV_COST_S(f,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[cx-px]+mvbits[cy-py]))
+
+/* for sub-pel search and interpolation */
+#define SUBPEL_PRED_BLK_SIZE 576 // 24x24
+#define REF_CENTER 75
+#define V2Q_H0Q 1
+#define V0Q_H2Q 2
+#define V2Q_H2Q 3
+
+/*
+#define V3Q_H0Q 1
+#define V3Q_H1Q 2
+#define V0Q_H1Q 3
+#define V1Q_H1Q 4
+#define V1Q_H0Q 5
+#define V1Q_H3Q 6
+#define V0Q_H3Q 7
+#define V3Q_H3Q 8
+#define V2Q_H3Q 9
+#define V2Q_H0Q 10
+#define V2Q_H1Q 11
+#define V2Q_H2Q 12
+#define V3Q_H2Q 13
+#define V0Q_H2Q 14
+#define V1Q_H2Q 15
+*/
+
+
+#define DEFAULT_OVERRUN_BUFFER_SIZE 1000
+
+// associated with the above cost model
+const uint8 COEFF_COST[2][16] =
+{
+    {3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}
+};
+
+
+
+//! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6)
+const int QP2QUANT[40] =
+{
+    1, 1, 1, 1, 2, 2, 2, 2,
+    3, 3, 3, 4, 4, 4, 5, 6,
+    6, 7, 8, 9, 10, 11, 13, 14,
+    16, 18, 20, 23, 25, 29, 32, 36,
+    40, 45, 51, 57, 64, 72, 81, 91
+};
+
+
+/**
+This enumeration keeps track of the internal status of the encoder whether it is doing
+something. The encoding flow follows the order in which these states are.
+@publishedAll
+*/
+typedef enum
+{
+    AVCEnc_Initializing = 0,
+    AVCEnc_Encoding_SPS,
+    AVCEnc_Encoding_PPS,
+    AVCEnc_Analyzing_Frame,
+    AVCEnc_WaitingForBuffer,  // pending state
+    AVCEnc_Encoding_Frame,
+} AVCEnc_State ;
+
+/**
+Bitstream structure contains bitstream related parameters such as the pointer
+to the buffer, the current byte position and bit position. The content of the
+bitstreamBuffer will be in EBSP format as the emulation prevention codes are
+automatically inserted as the RBSP is recorded.
+@publishedAll
+*/
+typedef struct tagEncBitstream
+{
+    uint8 *bitstreamBuffer; /* pointer to buffer memory   */
+    int buf_size;       /* size of the buffer memory */
+    int write_pos;      /* next position to write to bitstreamBuffer  */
+    int count_zeros;   /* count number of consecutive zero */
+    uint current_word;  /* byte-swapped (MSB left) current word to write to buffer */
+    int bit_left;      /* number of bit left in current_word */
+    uint8   *overrunBuffer;  /* extra output buffer to prevent current skip due to output buffer overrun*/
+    int     oBSize;     /* size of allocated overrun buffer */
+    void   *encvid; /* pointer to the main object */
+
+} AVCEncBitstream;
+
+/**
+This structure is used for rate control purpose and other performance related control
+variables such as, RD cost, statistics, motion search stuffs, etc.
+should be in this structure.
+@publishedAll
+*/
+
+
+typedef struct tagRDInfo
+{
+    int QP;
+    int actual_bits;
+    OsclFloat mad;
+    OsclFloat R_D;
+} RDInfo;
+
+typedef struct tagMultiPass
+{
+    /* multipass rate control data */
+    int target_bits;    /* target bits for current frame, = rc->T */
+    int actual_bits;    /* actual bits for current frame obtained after encoding, = rc->Rc*/
+    int QP;             /* quantization level for current frame, = rc->Qc*/
+    int prev_QP;        /* quantization level for previous frame */
+    int prev_prev_QP;   /* quantization level for previous frame before last*/
+    OsclFloat mad;          /* mad for current frame, = video->avgMAD*/
+    int bitrate;        /* bitrate for current frame */
+    OsclFloat framerate;    /* framerate for current frame*/
+
+    int nRe_Quantized;  /* control variable for multipass encoding, */
+    /* 0 : first pass */
+    /* 1 : intermediate pass(quantization and VLC loop only) */
+    /* 2 : final pass(de-quantization, idct, etc) */
+    /* 3 : macroblock level rate control */
+
+    int encoded_frames;     /* counter for all encoded frames */
+    int re_encoded_frames;  /* counter for all multipass encoded frames*/
+    int re_encoded_times;   /* counter for all times of multipass frame encoding */
+
+    /* Multiple frame prediction*/
+    RDInfo **pRDSamples;        /* pRDSamples[30][32], 30->30fps, 32 -> 5 bit quantizer, 32 candidates*/
+    int framePos;               /* specific position in previous multiple frames*/
+    int frameRange;             /* number of overall previous multiple frames */
+    int samplesPerFrame[30];    /* number of samples per frame, 30->30fps */
+
+    /* Bit allocation for scene change frames and high motion frames */
+    OsclFloat sum_mad;
+    int counter_BTsrc;  /* BT = Bit Transfer, bit transfer from low motion frames or less complicatedly compressed frames */
+    int counter_BTdst;  /* BT = Bit Transfer, bit transfer to scene change frames or high motion frames or more complicatedly compressed frames */
+    OsclFloat sum_QP;
+    int diff_counter;   /* diff_counter = -diff_counter_BTdst, or diff_counter_BTsrc */
+
+    /* For target bitrate or framerate update */
+    OsclFloat target_bits_per_frame;        /* = C = bitrate/framerate */
+    OsclFloat target_bits_per_frame_prev;   /* previous C */
+    OsclFloat aver_mad;                     /* so-far average mad could replace sum_mad */
+    OsclFloat aver_mad_prev;                /* previous average mad */
+    int   overlapped_win_size;          /* transition period of time */
+    int   encoded_frames_prev;          /* previous encoded_frames */
+} MultiPass;
+
+
+typedef struct tagdataPointArray
+{
+    int Qp;
+    int Rp;
+    OsclFloat Mp;   /* for MB-based RC */
+    struct tagdataPointArray *next;
+    struct tagdataPointArray *prev;
+} dataPointArray;
+
+typedef struct tagAVCRateControl
+{
+
+    /* these parameters are initialized by the users AVCEncParams */
+    /* bitrate-robustness tradeoff */
+    uint scdEnable; /* enable scene change detection */
+    int idrPeriod;  /* IDR period in number of frames */
+    int intraMBRate;   /* intra MB refresh rate per frame */
+    uint dpEnable;  /* enable data partitioning */
+
+    /* quality-complexity tradeoff */
+    uint subPelEnable;  /* enable quarter pel search */
+    int mvRange;    /* motion vector search range in +/- pixel */
+    uint subMBEnable;  /* enable sub MB prediction mode (4x4, 4x8, 8x4) */
+    uint rdOptEnable;  /* enable RD-opt mode selection */
+    uint twoPass; /* flag for 2 pass encoding ( for future )*/
+    uint bidirPred; /* bi-directional prediction for B-frame. */
+
+    uint rcEnable;  /* enable rate control, '1' on, '0' const QP */
+    int initQP; /* initial QP */
+
+    /* note the following 3 params are for HRD, these triplets can be a series
+    of triplets as the generalized HRD allows. SEI message must be generated in this case. */
+    /* We no longer have to differentiate between CBR and VBR. The users to the
+    AVC encoder lib will do the mapping from CBR/VBR to these parameters. */
+    int32 bitRate;  /* target bit rate for the overall clip in bits/second*/
+    int32 cpbSize;  /* coded picture buffer size in bytes */
+    int32 initDelayOffset; /* initial CBP removal delay in bits */
+
+    OsclFloat frame_rate; /* frame rate */
+    int srcInterval; /* source frame rate in msec */
+    int basicUnit;  /* number of macroblocks per BU */
+
+    /* Then internal parameters for the operation */
+    uint first_frame; /* a flag for the first frame */
+    int lambda_mf; /* for example */
+    int totalSAD;    /* SAD of current frame */
+
+    /*******************************************/
+    /* this part comes from MPEG4 rate control */
+    int alpha;  /* weight for I frame */
+    int Rs;     /*bit rate for the sequence (or segment) e.g., 24000 bits/sec */
+    int Rc;     /*bits used for the current frame. It is the bit count obtained after encoding. */
+    int Rp;     /*bits to be removed from the buffer per picture. */
+    /*? is this the average one, or just the bits coded for the previous frame */
+    int Rps;    /*bit to be removed from buffer per src frame */
+    OsclFloat Ts;   /*number of seconds for the sequence  (or segment). e.g., 10 sec */
+    OsclFloat Ep;
+    OsclFloat Ec;   /*mean absolute difference for the current frame after motion compensation.*/
+    /*If the macroblock is intra coded, the original spatial pixel values are summed.*/
+    int Qc;     /*quantization level used for the current frame. */
+    int Nr;     /*number of P frames remaining for encoding.*/
+    int Rr; /*number of bits remaining for encoding this sequence (or segment).*/
+    int Rr_Old;
+    int T;      /*target bit to be used for the current frame.*/
+    int S;      /*number of bits used for encoding the previous frame.*/
+    int Hc; /*header and motion vector bits used in the current frame. It includes all the  information except to the residual information.*/
+    int Hp; /*header and motion vector bits used in the previous frame. It includes all the     information except to the residual information.*/
+    int Ql; /*quantization level used in the previous frame */
+    int Bs; /*buffer size e.g., R/2 */
+    int B;      /*current buffer level e.g., R/4 - start from the middle of the buffer */
+    OsclFloat X1;
+    OsclFloat X2;
+    OsclFloat X11;
+    OsclFloat M;            /*safe margin for the buffer */
+    OsclFloat smTick;    /*ratio of src versus enc frame rate */
+    double remnant;  /*remainder frame of src/enc frame for fine frame skipping */
+    int timeIncRes; /* vol->timeIncrementResolution */
+
+    dataPointArray   *end; /*quantization levels for the past (20) frames */
+
+    int     frameNumber; /* ranging from 0 to 20 nodes*/
+    int     w;
+    int     Nr_Original;
+    int     Nr_Old, Nr_Old2;
+    int     skip_next_frame;
+    int     Qdep;       /* smooth Q adjustment */
+    int     VBR_Enabled;
+
+    int totalFrameNumber; /* total coded frames, for debugging!!*/
+
+    char    oFirstTime;
+
+    int numFrameBits; /* keep track of number of bits of the current frame */
+    int NumberofHeaderBits;
+    int NumberofTextureBits;
+    int numMBHeaderBits;
+    int numMBTextureBits;
+    double *MADofMB;
+    int32 bitsPerFrame;
+
+    /* BX rate control, something like TMN8 rate control*/
+
+    MultiPass *pMP;
+
+    int     TMN_W;
+    int     TMN_TH;
+    int     VBV_fullness;
+    int     max_BitVariance_num; /* the number of the maximum bit variance within the given buffer with the unit of 10% of bitrate/framerate*/
+    int     encoded_frames; /* counter for all encoded frames */
+    int     low_bound;              /* bound for underflow detection, usually low_bound=-Bs/2, but could be changed in H.263 mode */
+    int     VBV_fullness_offset;    /* offset of VBV_fullness, usually is zero, but can be changed in H.263 mode*/
+    /* End BX */
+
+} AVCRateControl;
+
+
+/**
+This structure is for the motion vector information. */
+typedef struct tagMV
+{
+    int x;
+    int y;
+    uint sad;
+} AVCMV;
+
+/**
+This structure contains function pointers for different platform dependent implementation of
+functions. */
+typedef struct tagAVCEncFuncPtr
+{
+
+    int (*SAD_MB_HalfPel[4])(uint8*, uint8*, int, void *);
+    int (*SAD_Macroblock)(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+
+} AVCEncFuncPtr;
+
+/**
+This structure contains information necessary for correct padding.
+*/
+typedef struct tagPadInfo
+{
+    int i;
+    int width;
+    int j;
+    int height;
+} AVCPadInfo;
+
+
+#ifdef HTFM
+typedef struct tagHTFM_Stat
+{
+    int abs_dif_mad_avg;
+    uint countbreak;
+    int offsetArray[16];
+    int offsetRef[16];
+} HTFM_Stat;
+#endif
+
+
+/**
+This structure is the main object for AVC encoder library providing access to all
+global variables. It is allocated at PVAVCInitEncoder and freed at PVAVCCleanUpEncoder.
+@publishedAll
+*/
+typedef struct tagEncObject
+{
+
+    AVCCommonObj *common;
+
+    AVCEncBitstream     *bitstream; /* for current NAL */
+    uint8   *overrunBuffer;  /* extra output buffer to prevent current skip due to output buffer overrun*/
+    int     oBSize;     /* size of allocated overrun buffer */
+
+    /* rate control */
+    AVCRateControl      *rateCtrl; /* pointer to the rate control structure */
+
+    /* encoding operation */
+    AVCEnc_State        enc_state; /* encoding state */
+
+    AVCFrameIO          *currInput; /* pointer to the current input frame */
+
+    int                 currSliceGroup; /* currently encoded slice group id */
+
+    int     level[24][16], run[24][16]; /* scratch memory */
+    int     leveldc[16], rundc[16]; /* for DC component */
+    int     levelcdc[16], runcdc[16]; /* for chroma DC component */
+    int     numcoefcdc[2]; /* number of coefficient for chroma DC */
+    int     numcoefdc;      /* number of coefficients for DC component */
+
+    int     qp_const;
+    int     qp_const_c;
+    /********* intra prediction scratch memory **********************/
+    uint8   pred_i16[AVCNumI16PredMode][256]; /* save prediction for MB */
+    uint8   pred_i4[AVCNumI4PredMode][16];  /* save prediction for blk */
+    uint8   pred_ic[AVCNumIChromaMode][128];  /* for 2 chroma */
+
+    int     mostProbableI4Mode[16]; /* in raster scan order */
+    /********* motion compensation related variables ****************/
+    AVCMV   *mot16x16;          /* Saved motion vectors for 16x16 block*/
+    AVCMV(*mot16x8)[2];     /* Saved motion vectors for 16x8 block*/
+    AVCMV(*mot8x16)[2];     /* Saved motion vectors for 8x16 block*/
+    AVCMV(*mot8x8)[4];      /* Saved motion vectors for 8x8 block*/
+
+    /********* subpel position **************************************/
+    uint32  subpel_pred[SUBPEL_PRED_BLK_SIZE/*<<2*/]; /* all 16 sub-pel positions  */
+    uint8   *hpel_cand[9];      /* pointer to half-pel position */
+    int     best_hpel_pos;          /* best position */
+    uint8   qpel_cand[8][24*16];        /* pointer to quarter-pel position */
+    int     best_qpel_pos;
+    uint8   *bilin_base[9][4];    /* pointer to 4 position at top left of bilinear quarter-pel */
+
+    /* need for intra refresh rate */
+    uint8   *intraSearch;       /* Intra Array for MBs to be intra searched */
+    uint    firstIntraRefreshMBIndx; /* keep track for intra refresh */
+
+    int     i4_sad;             /* temporary for i4 mode SAD */
+    int     *min_cost;          /* Minimum cost for the all MBs */
+    int     lambda_mode;        /* Lagrange parameter for mode selection */
+    int     lambda_motion;      /* Lagrange parameter for MV selection */
+
+    uint8   *mvbits_array;      /* Table for bits spent in the cost funciton */
+    uint8   *mvbits;            /* An offset to the above array. */
+
+    /* to speedup the SAD calculation */
+    void *sad_extra_info;
+    uint8 currYMB[256];     /* interleaved current macroblock in HTFM order */
+
+#ifdef HTFM
+    int nrmlz_th[48];       /* Threshold for fast SAD calculation using HTFM */
+    HTFM_Stat htfm_stat;    /* For statistics collection */
+#endif
+
+    /* statistics */
+    int numIntraMB;         /* keep track of number of intra MB */
+
+    /* encoding complexity control */
+    uint fullsearch_enable; /* flag to enable full-pel full-search */
+
+    /* misc.*/
+    bool outOfBandParamSet; /* flag to enable out-of-band param set */
+
+    AVCSeqParamSet extSPS; /* for external SPS */
+    AVCPicParamSet extPPS; /* for external PPS */
+
+    /* time control */
+    uint32  prevFrameNum;   /* previous frame number starting from modTimeRef */
+    uint32  modTimeRef;     /* Reference modTime update every I-Vop*/
+    uint32  wrapModTime;    /* Offset to modTime Ref, rarely used */
+
+    uint    prevProcFrameNum;  /* previously processed frame number, could be skipped */
+    uint    prevCodedFrameNum;  /* previously encoded frame number */
+    /* POC related variables */
+    uint32  dispOrdPOCRef;      /* reference POC is displayer order unit. */
+
+    /* Function pointers */
+    AVCEncFuncPtr *functionPointer; /* store pointers to platform specific functions */
+
+    /* Application control data */
+    AVCHandle *avcHandle;
+
+
+} AVCEncObject;
+
+
+#endif /*AVCENC_INT_H_INCLUDED*/
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h
new file mode 100644
index 0000000..17e28ef
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h
@@ -0,0 +1,1020 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains declarations of internal functions for AVC decoder library.
+@publishedAll
+*/
+#ifndef AVCENC_LIB_H_INCLUDED
+#define AVCENC_LIB_H_INCLUDED
+
+#ifndef AVCLIB_COMMON_H_INCLUDED
+#include "avclib_common.h"
+#endif
+#ifndef AVCENC_INT_H_INCLUDED
+#include "avcenc_int.h"
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    /*------------- block.c -------------------------*/
+
+    /**
+    This function perform residue calculation, transform, quantize, inverse quantize,
+    inverse transform and residue compensation on a 4x4 block.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "blkidx"  "raster scan block index of the current 4x4 block."
+    \param "cur"    "Pointer to the reconstructed block."
+    \param "org"    "Pointer to the original block."
+    \param "coef_cost"  "Pointer to the coefficient cost to be filled in and returned."
+    \return "Number of non-zero coefficients."
+    */
+    int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost);
+
+    /**
+    This function performs IDCT on an INTER macroblock.
+    \param "video"  "Pointer to AVCCommonObj."
+    \param "curL"   "Pointer to the origin of the macroblock on the current frame."
+    \param "currMB" "Pointer to the AVCMacroblock structure."
+    \param "picPitch" "Pitch of the current frame."
+    \return "void".
+    */
+    void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch);
+
+    /**
+    This function perform residue calculation, transform, quantize, inverse quantize,
+    inverse transform and residue compensation on a macroblock.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "curL"   "Pointer to the reconstructed MB."
+    \param "orgL"    "Pointer to the original MB."
+    \return "void"
+    */
+    void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL);
+
+    /**
+    This function perform residue calculation, transform, quantize, inverse quantize,
+    inverse transform and residue compensation for chroma components of an MB.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "curC"   "Pointer to the reconstructed MB."
+    \param "orgC"    "Pointer to the original MB."
+    \param "cr"     "Flag whether it is Cr or not."
+    \return "void"
+    */
+    void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr);
+
+    /*----------- init.c ------------------*/
+    /**
+    This function interprets the encoding parameters provided by users in encParam.
+    The results are kept in AVCEncObject, AVCSeqParamSet, AVCPicParamSet and AVCSliceHeader.
+    \param "encvid"     "Pointer to AVCEncObject."
+    \param "encParam"   "Pointer to AVCEncParam."
+    \param "extSPS"     "External SPS template to be followed. NULL if not present."
+    \param "extPPS"     "External PPS template to be followed. NULL if not present."
+    \return "see AVCEnc_Status."
+    */
+    AVCEnc_Status  SetEncodeParam(AVCHandle *avcHandle, AVCEncParams *encParam,
+                                  void *extSPS, void *extPPS);
+
+    /**
+    This function verifies the encoding parameters whether they meet the set of supported
+    tool by a specific profile. If the profile is not set, it will just find the closest
+    profile instead of verifying it.
+    \param "video"  "Pointer to AVCEncObject."
+    \param "seqParam"   "Pointer to AVCSeqParamSet."
+    \param "picParam"   "Pointer to AVCPicParamSet."
+    \return "AVCENC_SUCCESS if success,
+            AVCENC_PROFILE_NOT_SUPPORTED if the specified profile
+                is not supported by this version of the library,
+            AVCENC_TOOLS_NOT_SUPPORTED if any of the specified encoding tools are
+            not supported by the user-selected profile."
+    */
+    AVCEnc_Status VerifyProfile(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam);
+
+    /**
+    This function verifies the encoding parameters whether they meet the requirement
+    for a specific level. If the level is not set, it will just find the closest
+    level instead of verifying it.
+    \param "video"  "Pointer to AVCEncObject."
+    \param "seqParam"   "Pointer to AVCSeqParamSet."
+    \param "picParam"   "Pointer to AVCPicParamSet."
+    \return "AVCENC_SUCCESS if success,
+            AVCENC_LEVEL_NOT_SUPPORTED if the specified level
+                is not supported by this version of the library,
+            AVCENC_LEVEL_FAIL if any of the encoding parameters exceed
+            the range of the user-selected level."
+    */
+    AVCEnc_Status VerifyLevel(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam);
+
+    /**
+    This funciton initializes the frame encoding by setting poc/frame_num related parameters. it
+    also performs motion estimation.
+    \param "encvid" "Pointer to the AVCEncObject."
+    \return "AVCENC_SUCCESS if success, AVCENC_NO_PICTURE if there is no input picture
+            in the queue to encode, AVCENC_POC_FAIL or AVCENC_CONSECUTIVE_NONREF for POC
+            related errors, AVCENC_NEW_IDR if new IDR is detected."
+    */
+    AVCEnc_Status InitFrame(AVCEncObject *encvid);
+
+    /**
+    This function initializes slice header related variables and other variables necessary
+    for decoding one slice.
+    \param "encvid" "Pointer to the AVCEncObject."
+    \return "AVCENC_SUCCESS if success."
+    */
+    AVCEnc_Status InitSlice(AVCEncObject *encvid);
+
+    /*----------- header.c ----------------*/
+    /**
+    This function performs bitstream encoding of the sequence parameter set NAL.
+    \param "encvid" "Pointer to the AVCEncObject."
+    \param "stream" "Pointer to AVCEncBitstream."
+    \return "AVCENC_SUCCESS if success or AVCENC_SPS_FAIL or others for unexpected failure which
+    should not occur. The SPS parameters should all be verified before this function is called."
+    */
+    AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+    /**
+    This function encodes the VUI parameters into the sequence parameter set bitstream.
+    \param "stream" "Pointer to AVCEncBitstream."
+    \param "vui"    "Pointer to AVCVUIParams."
+    \return "nothing."
+    */
+    void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui);
+
+    /**
+    This function encodes HRD parameters into the sequence parameter set bitstream
+    \param "stream" "Pointer to AVCEncBitstream."
+    \param "hrd"    "Pointer to AVCHRDParams."
+    \return "nothing."
+    */
+    void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd);
+
+
+    /**
+    This function performs bitstream encoding of the picture parameter set NAL.
+    \param "encvid" "Pointer to the AVCEncObject."
+    \param "stream" "Pointer to AVCEncBitstream."
+    \return "AVCENC_SUCCESS if success or AVCENC_PPS_FAIL or others for unexpected failure which
+    should not occur. The SPS parameters should all be verified before this function is called."
+    */
+    AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+    /**
+    This function encodes slice header information which has been initialized or fabricated
+    prior to entering this funciton.
+    \param "encvid" "Pointer to the AVCEncObject."
+    \param "stream" "Pointer to AVCEncBitstream."
+    \return "AVCENC_SUCCESS if success or bitstream fail statuses."
+    */
+    AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+    /**
+    This function encodes reference picture list reordering relted syntax.
+    \param "video" "Pointer to AVCCommonObj."
+    \param "stream" "Pointer to AVCEncBitstream."
+    \param "sliceHdr" "Pointer to AVCSliceHdr."
+    \param "slice_type" "Value of slice_type - 5 if greater than 5."
+    \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise."
+    */
+    AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type);
+
+    /**
+    This function encodes dec_ref_pic_marking related syntax.
+    \param "video" "Pointer to AVCCommonObj."
+    \param "stream" "Pointer to AVCEncBitstream."
+    \param "sliceHdr" "Pointer to AVCSliceHdr."
+    \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise."
+    */
+    AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr);
+
+    /**
+    This function initializes the POC related variables and the POC syntax to be encoded
+    to the slice header derived from the disp_order and is_reference flag of the original
+    input frame to be encoded.
+    \param "video"  "Pointer to the AVCEncObject."
+    \return "AVCENC_SUCCESS if success,
+            AVCENC_POC_FAIL if the poc type is undefined or
+            AVCENC_CONSECUTIVE_NONREF if there are consecutive non-reference frame for POC type 2."
+    */
+    AVCEnc_Status InitPOC(AVCEncObject *video);
+
+    /**
+    This function performs POC related operation after a picture is decoded.
+    \param "video" "Pointer to AVCCommonObj."
+    \return "AVCENC_SUCCESS"
+    */
+    AVCEnc_Status PostPOC(AVCCommonObj *video);
+
+    /*----------- bitstream_io.c ----------------*/
+    /**
+    This function initializes the bitstream structure with the information given by
+    the users.
+    \param "bitstream"  "Pointer to the AVCEncBitstream structure."
+    \param "buffer"     "Pointer to the unsigned char buffer for output."
+    \param "buf_size"   "The size of the buffer in bytes."
+    \param "overrunBuffer"  "Pointer to extra overrun buffer."
+    \param "oBSize"     "Size of overrun buffer in bytes."
+    \return "AVCENC_SUCCESS if success, AVCENC_BITSTREAM_INIT_FAIL if fail"
+    */
+    AVCEnc_Status BitstreamEncInit(AVCEncBitstream *bitstream, uint8 *buffer, int buf_size,
+                                   uint8 *overrunBuffer, int oBSize);
+
+    /**
+    This function writes the data from the cache into the bitstream buffer. It also adds the
+    emulation prevention code if necessary.
+    \param "stream"     "Pointer to the AVCEncBitstream structure."
+    \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+    */
+    AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream);
+
+    /**
+    This function writes the codeword into the cache which will eventually be written to
+    the bitstream buffer.
+    \param "stream"     "Pointer to the AVCEncBitstream structure."
+    \param "nBits"      "Number of bits in the codeword."
+    \param "code"       "The codeword."
+    \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+    */
+    AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code);
+
+    /**
+    This function writes one bit of data into the cache which will eventually be written
+    to the bitstream buffer.
+    \param "stream"     "Pointer to the AVCEncBitstream structure."
+    \param "code"       "The codeword."
+    \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+    */
+    AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code);
+
+    /**
+    This function adds trailing bits to the bitstream and reports back the final EBSP size.
+    \param "stream"     "Pointer to the AVCEncBitstream structure."
+    \param "nal_size"   "Output the final NAL size."
+    \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+    */
+    AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size);
+
+    /**
+    This function checks whether the current bit position is byte-aligned or not.
+    \param "stream" "Pointer to the bitstream structure."
+    \return "true if byte-aligned, false otherwise."
+    */
+    bool byte_aligned(AVCEncBitstream *stream);
+
+
+    /**
+    This function checks the availability of overrun buffer and switches to use it when
+    normal bufffer is not big enough.
+    \param "stream" "Pointer to the bitstream structure."
+    \param "numExtraBytes" "Number of extra byte needed."
+    \return "AVCENC_SUCCESS or AVCENC_FAIL."
+    */
+    AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes);
+
+
+    /*-------------- intra_est.c ---------------*/
+
+    /** This function performs intra/inter decision based on ABE.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "min_cost"   "Best inter cost."
+    \param "curL"   "Pointer to the current MB origin in reconstructed frame."
+    \param "picPitch" "Pitch of the reconstructed frame."
+    \return "Boolean for intra mode."
+    */
+
+//bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch);
+    bool IntraDecision(int *min_cost, uint8 *cur, int pitch, bool ave);
+
+    /**
+    This function performs intra prediction mode search.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "mbnum"  "Current MB number."
+    \param "curL"   "Pointer to the current MB origin in reconstructed frame."
+    \param "picPitch" "Pitch of the reconstructed frame."
+    \return "void."
+    */
+    void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch);
+
+    /**
+    This function generates all the I16 prediction modes for an MB and keep it in
+    encvid->pred_i16.
+    \param "encvid" "Pointer to AVCEncObject."
+    \return "void"
+    */
+    void intrapred_luma_16x16(AVCEncObject *encvid);
+
+    /**
+    This function calculate the cost of all I16 modes and compare them to get the minimum.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "orgY"   "Pointer to the original luma MB."
+    \param "min_cost" "Pointer to the minimal cost so-far."
+    \return "void"
+    */
+    void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost);
+
+    /**
+    This function calculates the cost of each I16 mode.
+    \param "org"    "Pointer to the original luma MB."
+    \param "org_pitch" "Stride size of the original frame."
+    \param "pred"   "Pointer to the prediction values."
+    \param "min_cost" "Minimal cost so-far."
+    \return "Cost"
+    */
+
+    int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost);
+
+    /**
+    This function generates all the I4 prediction modes and select the best one
+    for all the blocks inside a macroblock.It also calls dct_luma to generate the reconstructed
+    MB, and transform coefficients to be encoded.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "min_cost" "Pointer to the minimal cost so-far."
+    \return "void"
+    */
+    void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost);
+
+    /**
+    This function calculates the most probable I4 mode of a given 4x4 block
+    from neighboring informationaccording to AVC/H.264 standard.
+    \param "video"  "Pointer to AVCCommonObj."
+    \param "blkidx" "The current block index."
+    \return "Most probable mode."
+    */
+    int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx);
+
+    /**
+    This function is where a lot of actions take place in the 4x4 block level inside
+    mb_intra4x4_search.
+    \param "encvid" "Pointer to AVCEncObject."
+    \param "blkidx" "The current 4x4 block index."
+    \param "cur"    "Pointer to the reconstructed block."
+    \param "org"    "Pointer to the original block."
+    \return "Minimal cost, also set currMB->i4Mode"
+    */
+    int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org);
+
+    /**
+    This function calculates the cost of a given I4 prediction mode.
+    \param "org"    "Pointer to the original block."
+    \param "org_pitch"  "Stride size of the original frame."
+    \param "pred"   "Pointer to the prediction block. (encvid->pred_i4)"
+    \param "cost"   "Pointer to the minimal cost (to be updated)."
+    \return "void"
+    */
+    void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost);
+
+    /**
+    This function performs chroma intra search. Each mode is saved in encvid->pred_ic.
+    \param "encvid" "Pointer to AVCEncObject."
+    \return "void"
+    */
+    void chroma_intra_search(AVCEncObject *encvid);
+
+    /**
+    This function calculates the cost of a chroma prediction mode.
+    \param "orgCb"  "Pointer to the original Cb block."
+    \param "orgCr"  "Pointer to the original Cr block."
+    \param "org_pitch"  "Stride size of the original frame."
+    \param "pred"   "Pointer to the prediction block (encvid->pred_ic)"
+    \param "mincost"    "Minimal cost so far."
+    \return "Cost."
+    */
+
+    int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int mincost);
+
+    /*-------------- motion_comp.c ---------------*/
+
+    /**
+    This is a main function to peform inter prediction.
+    \param "encvid"     "Pointer to AVCEncObject."
+    \param "video"      "Pointer to AVCCommonObj."
+    \return "void".
+    */
+    void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video);
+
+
+    /**
+    This function is called for luma motion compensation.
+    \param "ref"    "Pointer to the origin of a reference luma."
+    \param "picwidth"   "Width of the picture."
+    \param "picheight"  "Height of the picture."
+    \param "x_pos"  "X-coordinate of the predicted block in quarter pel resolution."
+    \param "y_pos"  "Y-coordinate of the predicted block in quarter pel resolution."
+    \param "pred"   "Pointer to the output predicted block."
+    \param "pred_pitch" "Width of pred."
+    \param "blkwidth"   "Width of the current partition."
+    \param "blkheight"  "Height of the current partition."
+    \return "void"
+    */
+    void eLumaMotionComp(uint8 *ref, int picwidth, int picheight,
+                         int x_pos, int y_pos,
+                         uint8 *pred, int pred_pitch,
+                         int blkwidth, int blkheight);
+
+    void eFullPelMC(uint8 *in, int inwidth, uint8 *out, int outpitch,
+                    int blkwidth, int blkheight);
+
+    void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+                        int blkwidth, int blkheight, int dx);
+
+    void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
+                        int blkwidth, int blkheight, int dx);
+
+    void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
+                        int blkwidth, int blkheight);
+
+    void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+                        int blkwidth, int blkheight, int dy);
+
+    void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
+                        int blkwidth, int blkheight);
+
+    void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
+                        int blkwidth, int blkheight, int dy);
+
+    void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
+                           uint8 *out, int outpitch,
+                           int blkwidth, int blkheight);
+
+    void eChromaMotionComp(uint8 *ref, int picwidth, int picheight,
+                           int x_pos, int y_pos, uint8 *pred, int pred_pitch,
+                           int blkwidth, int blkheight);
+
+    void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                  uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                            uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                 uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                   uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+    void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                                 uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+
+    /*-------------- motion_est.c ---------------*/
+
+    /**
+    Allocate and initialize arrays necessary for motion search algorithm.
+    \param "envid" "Pointer to AVCEncObject."
+    \return "AVC_SUCCESS or AVC_MEMORY_FAIL."
+    */
+    AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle);
+
+    /**
+    Clean up memory allocated in InitMotionSearchModule.
+    \param "envid" "Pointer to AVCEncObject."
+    \return "void."
+    */
+    void CleanMotionSearchModule(AVCHandle *avcHandle);
+
+
+    /**
+    This function performs motion estimation of all macroblocks in a frame during the InitFrame.
+    The goal is to find the best MB partition for inter and find out if intra search is needed for
+    any MBs. This intra MB tendency can be used for scene change detection.
+    \param "encvid" "Pointer to AVCEncObject."
+    \return "void"
+    */
+    void AVCMotionEstimation(AVCEncObject *encvid);
+
+    /**
+    This function performs repetitive edge padding to the reference picture by adding 16 pixels
+    around the luma and 8 pixels around the chromas.
+    \param "refPic" "Pointer to the reference picture."
+    \return "void"
+    */
+    void  AVCPaddingEdge(AVCPictureData *refPic);
+
+    /**
+    This function keeps track of intra refresh macroblock locations.
+    \param "encvid" "Pointer to the global array structure AVCEncObject."
+    \param "mblock" "Pointer to the array of AVCMacroblock structures."
+    \param "totalMB" "Total number of MBs in a frame."
+    \param "numRefresh" "Number of MB to be intra refresh in a single frame."
+    \return "void"
+    */
+    void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh);
+
+#ifdef HTFM
+    void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect);
+    void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat);
+    void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[]);
+    void    HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch);
+#endif
+
+    /**
+    This function reads the input MB into a smaller faster memory space to minimize the cache miss.
+    \param "encvid" "Pointer to the global AVCEncObject."
+    \param "cur"    "Pointer to the original input macroblock."
+    \param "pitch"  "Stride size of the input frame (luma)."
+    \return "void"
+    */
+    void    AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch);
+
+    /**
+    Performs motion vector search for a macroblock.
+    \param "encvid" "Pointer to AVCEncObject structure."
+    \param "cur"    "Pointer to the current macroblock in the input frame."
+    \param "best_cand" "Array of best candidates (to be filled in and returned)."
+    \param "i0"     "X-coordinate of the macroblock."
+    \param "j0"     "Y-coordinate of the macroblock."
+    \param "type_pred" "Indicates the type of operations."
+    \param "FS_en"      "Flag for fullsearch enable."
+    \param "hp_guess"   "Guess for half-pel search."
+    \return "void"
+    */
+    void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
+                           int i0, int j0, int type_pred, int FS_en, int *hp_guess);
+
+//AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
+//                           int num_pass);
+
+    /**
+    Perform full-pel exhaustive search around the predicted MV.
+    \param "encvid" "Pointer to AVCEncObject structure."
+    \param "prev"   "Pointer to the reference frame."
+    \param "cur"    "Pointer to the input macroblock."
+    \param "imin"   "Pointer to minimal mv (x)."
+    \param "jmin"   "Pointer to minimal mv (y)."
+    \param "ilow, ihigh, jlow, jhigh"   "Lower bound on search range."
+    \param "cmvx, cmvy" "Predicted MV value."
+
+    \return "The cost function of the best candidate."
+    */
+    int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
+                      int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
+                      int cmvx, int cmvy);
+
+    /**
+    Select candidates from neighboring blocks according to the type of the
+    prediction selection.
+    \param "mvx"    "Pointer to the candidate, x-coordinate."
+    \param "mvy"    "Pointer to the candidate, y-coordinate."
+    \param "num_can"    "Pointer to the number of candidates returned."
+    \param "imb"    "The MB index x-coordinate."
+    \param "jmb"    "The MB index y-coordinate."
+    \param "type_pred"  "Type of the prediction."
+    \param "cmvx, cmvy" "Pointer to predicted MV (modified version)."
+    \return "void."
+    */
+    void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
+                               AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy);
+
+    /**
+    Utility function to move the values in the array dn according to the new
+    location to avoid redundant calculation.
+    \param "dn" "Array of integer of size 9."
+    \param "new_loc"    "New location index."
+    \return "void."
+    */
+    void AVCMoveNeighborSAD(int dn[], int new_loc);
+
+    /**
+    Find minimum index of dn.
+    \param "dn" "Array of integer of size 9."
+    \return "The index of dn with the smallest dn[] value."
+    */
+    int AVCFindMin(int dn[]);
+
+
+    /*------------- findhalfpel.c -------------------*/
+
+    /**
+    Search for the best half-pel resolution MV around the full-pel MV.
+    \param "encvid" "Pointer to the global AVCEncObject structure."
+    \param "cur"    "Pointer to the current macroblock."
+    \param "mot"    "Pointer to the AVCMV array of the frame."
+    \param "ncand"  "Pointer to the origin of the fullsearch result."
+    \param "xpos"   "The current MB position in x."
+    \param "ypos"   "The current MB position in y."
+    \param "hp_guess"   "Input to help speedup the search."
+    \param "cmvx, cmvy" "Predicted motion vector use for mvcost."
+    \return "Minimal cost (SATD) without MV cost. (for rate control purpose)"
+    */
+    int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand,
+                         int xpos, int ypos, int hp_guess, int cmvx, int cmvy);
+
+    /**
+    This function generates sub-pel pixels required to do subpel MV search.
+    \param "subpel_pred" "Pointer to 2-D array, each array for each position."
+    \param "ncand" "Pointer to the full-pel center position in ref frame."
+    \param "lx" "Pitch of the ref frame."
+    \return "void"
+     */
+    void GenerateHalfPelPred(uint8 *subpel_pred, uint8 *ncand, int lx);
+
+    /**
+    This function calculate vertical interpolation at half-point of size 4x17.
+    \param "dst" "Pointer to destination."
+    \param "ref" "Pointer to the starting reference pixel."
+    \return "void."
+    */
+    void VertInterpWClip(uint8 *dst, uint8 *ref);
+
+    /**
+    This function generates quarter-pel pixels around the best half-pel result
+    during the sub-pel MV search.
+    \param "bilin_base"  "Array of pointers to be used as basis for q-pel interp."
+    \param "qpel_pred"  "Array of pointers pointing to quarter-pel candidates."
+    \param "hpel_pos" "Best half-pel position at the center."
+    \return "void"
+    */
+    void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_pred, int hpel_pos);
+
+    /**
+    This function calculates the SATD of a subpel candidate.
+    \param "cand"   "Pointer to a candidate."
+    \param "cur"    "Pointer to the current block."
+    \param "dmin"   "Min-so-far SATD."
+    \return "Sum of Absolute Transformed Difference."
+    */
+    int SATD_MB(uint8 *cand, uint8 *cur, int dmin);
+
+    /*------------- rate_control.c -------------------*/
+
+    /** This function is a utility function. It returns average QP of the previously encoded frame.
+    \param "rateCtrl" "Pointer to AVCRateControl structure."
+    \return "Average QP."
+    */
+    int GetAvgFrameQP(AVCRateControl *rateCtrl);
+
+    /**
+    This function takes the timestamp of the input and determine whether it should be encoded
+    or skipped.
+    \param "encvid" "Pointer to the AVCEncObject structure."
+    \param "rateCtrl"   "Pointer to the AVCRateControl structure."
+    \param "modTime"    "The 32 bit timestamp of the input frame."
+    \param "frameNum"   "Pointer to the frame number if to be encoded."
+    \return "AVC_SUCCESS or else."
+    */
+    AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum);
+
+    /**
+    This function updates the buffer fullness when frames are dropped either by the
+    rate control algorithm or by the users to make sure that target bit rate is still met.
+    \param "video" "Pointer to the common object structure."
+    \param "rateCtrl" "Pointer to rate control structure."
+    \param "frameInc" "Difference of the current frame number and previous frame number."
+    \return "void."
+    */
+    void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc);
+
+    /**
+    This function initializes rate control module and allocates necessary bufferes to do the job.
+    \param "avcHandle" "Pointer to the encoder handle."
+    \return "AVCENC_SUCCESS or AVCENC_MEMORY_FAIL."
+    */
+    AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle);
+
+    /**
+    This function frees buffers allocated in InitRateControlModule.
+    \param "avcHandle" "Pointer to the encoder handle."
+    \return "void."
+    */
+    void CleanupRateControlModule(AVCHandle *avcHandle);
+
+    /**
+    This function is called at the beginning of each GOP or the first IDR frame. It calculates
+    target bits for a GOP.
+    \param "encvid" "Pointer to the encoder object."
+    \return "void."
+    */
+    void RCInitGOP(AVCEncObject *encvid);
+
+    /**
+    This function calculates target bits for a particular frame.
+    \param "video"  "Pointer to the AVCEncObject structure."
+    \return "void"
+    */
+    void RCInitFrameQP(AVCEncObject *video);
+
+    /**
+    This function calculates QP for the upcoming frame or basic unit.
+    \param "encvid" "Pointer to the encoder object."
+    \param "rateCtrl" "Pointer to the rate control object."
+    \return "QP value ranging from 0-51."
+    */
+    int  RCCalculateQP(AVCEncObject *encvid, AVCRateControl *rateCtrl);
+
+    /**
+    This function translates the luma QP to chroma QP and calculates lambda based on QP.
+    \param "video"  "Pointer to the AVCEncObject structure."
+    \return "void"
+    */
+    void RCInitChromaQP(AVCEncObject *encvid);
+
+    /**
+    This function is called before encoding each macroblock.
+    \param "encvid" "Pointer to the encoder object."
+    \return "void."
+    */
+    void RCInitMBQP(AVCEncObject *encvid);
+
+    /**
+    This function updates bits usage stats after encoding an macroblock.
+    \param "video" "Pointer to AVCCommonObj."
+    \param "rateCtrl" "Pointer to AVCRateControl."
+    \param "num_header_bits" "Number of bits used for MB header."
+    \param "num_texture_bits" "Number of bits used for MB texture."
+    \return "void"
+    */
+    void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits);
+
+    /**
+    This function calculates the difference between prediction and original MB.
+    \param "encvid" "Pointer to the encoder object."
+    \param "currMB" "Pointer to the current macroblock structure."
+    \param "orgL" "Pointer to the original MB."
+    \param "orgPitch" "Pointer to the original picture pitch."
+    \return "void."
+    */
+    void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch);
+
+    /**
+    Restore QP related parameters of previous MB when current MB is skipped.
+    \param "currMB" "Pointer to the current macroblock."
+    \param "video"  "Pointer to the common video structure."
+    \param "encvid" "Pointer to the global encoding structure."
+    \return "void"
+    */
+    void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid);
+
+    /**
+    This function is called after done with a frame.
+    \param "encvid" "Pointer to the encoder object."
+    \return "AVCENC_SUCCESS or AVCENC_SKIPPED_PICTURE when bufer overflow (need to discard current frame)."
+    */
+    AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid);
+
+    /*--------- residual.c -------------------*/
+
+    /**
+    This function encodes the intra pcm data and fill it in the corresponding location
+    on the current picture.
+    \param "video"  "Pointer to AVCEncObject."
+    \return "AVCENC_SUCCESS if success, or else for bitstream errors."
+    */
+    AVCEnc_Status EncodeIntraPCM(AVCEncObject *video);
+
+    /**
+    This function performs CAVLC syntax encoding on the run and level information of the coefficients.
+    The level and run arrays are elements in AVCEncObject structure, populated by TransQuantZZ,
+    TransQuantIntraDC and TransQuantChromaDC functions.
+    \param "video"  "Pointer to AVCEncObject."
+    \param "type"   "One of AVCResidualType for a particular 4x4 block."
+    \param "bindx"  "Block index or number of nonzero coefficients for AVC_Intra16DC and AVC_ChromaDC mode."
+    \param "currMB" "Pointer to the current macroblock structure."
+    \return "AVCENC_SUCCESS for success."
+    \Note   "This function has 32-bit machine specific instruction!!!!"
+    */
+    AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int bindx, AVCMacroblock *currMB);
+
+
+    /*------------- sad.c ---------------------------*/
+
+
+    int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+
+#ifdef HTFM /*  3/2/1, Hypothesis Testing Fast Matching */
+    int AVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+    int AVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+    int AVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+    int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+    int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+#endif
+
+
+    /*------------- slice.c -------------------------*/
+
+    /**
+    This function performs the main encoding loop for a slice.
+    \param "encvid" "Pointer to AVCEncObject."
+    \return "AVCENC_SUCCESS for success, AVCENC_PICTURE_READY for end-of-picture and
+             AVCENC_FAIL or AVCENC_SLICE_EMPTY otherwise."
+    */
+    AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid);
+
+    /**
+    This function performs the main encoding operation for one macroblock.
+    \param "video" "pointer to AVCEncObject."
+    \return "AVCENC_SUCCESS for success, or other bitstream related failure status."
+    */
+    AVCEnc_Status EncodeMB(AVCEncObject *video);
+
+    /**
+    This function calls prediction INTRA/INTER functions, transform,
+    quantization and zigzag scanning to get the run-level symbols.
+    \param "encvid" "pointer to AVCEncObject."
+    \param "curL"   "pointer to Luma component of the current frame.
+    \param "curCb"  "pointer to Cb component of the current frame.
+    \param "curCr"  "pointer to Cr component of the current frame.
+    \return "void for now."
+     */
+    void MBPredTransQuantZZ(AVCEncObject *encvid, uint8 *curL, uint8 *curCb, uint8 *curCr);
+
+    /**
+    This function copies the content of the prediction MB into the reconstructed YUV
+    frame directly.
+    \param "curL"   "Pointer to the destination Y component."
+    \param "curCb"  "Pointer to the destination Cb component."
+    \param "curCr"  "Pointer to the destination Cr component."
+    \param "predBlock"  "Pointer to the prediction MB."
+    \param "picWidth"   "The width of the frame."
+    \return "None."
+    */
+    void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picWidth);
+
+    /**
+    This function encodes the mb_type, CBP, prediction mode, ref idx and MV.
+    \param "currMB" "Pointer to the current macroblock structure."
+    \param "video" "Pointer to the AVCEncObject structure."
+    \return "AVCENC_SUCCESS for success or else for fail."
+    */
+    AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *video);
+
+    /**
+    This function finds the right mb_type for a macroblock given the mbMode, CBP,
+    NumPart, PredPartMode.
+    \param "currMB" "Pointer to the current macroblock structure."
+    \param "slice_type" "Value of the slice_type."
+    \return "mb_type."
+    */
+    uint InterpretMBType(AVCMacroblock *currMB, int slice_type);
+
+    /**
+    This function encodes the mb_pred part of the macroblock data.
+    \param "video"  "Pointer to the AVCCommonObj structure."
+    \param "currMB" "Pointer to the current macroblock structure."
+    \param "stream" "Pointer to the AVCEncBitstream structure."
+    \return "AVCENC_SUCCESS for success or bitstream fail status."
+    */
+    AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+    /**
+    This function encodes the sub_mb_pred part of the macroblock data.
+    \param "video"  "Pointer to the AVCCommonObj structure."
+    \param "currMB" "Pointer to the current macroblock structure."
+    \param "stream" "Pointer to the AVCEncBitstream structure."
+    \return "AVCENC_SUCCESS for success or bitstream fail status."
+    */
+    AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+    /**
+    This function interprets the sub_mb_type and sets necessary information
+    when the slice type is AVC_P_SLICE.
+    in the macroblock structure.
+    \param "mblock" "Pointer to current AVCMacroblock."
+    \param "sub_mb_type" "From the syntax bitstream."
+    \return "void"
+    */
+    void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type);
+
+    /**
+    This function interprets the sub_mb_type and sets necessary information
+    when the slice type is AVC_B_SLICE.
+    in the macroblock structure.
+    \param "mblock" "Pointer to current AVCMacroblock."
+    \param "sub_mb_type" "From the syntax bitstream."
+    \return "void"
+    */
+    void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type);
+
+    /**
+    This function encodes intra 4x4 mode. It calculates the predicted I4x4 mode and the
+    remnant to be encoded.
+    \param "video"  "Pointer to AVCEncObject structure."
+    \param "currMB" "Pointer to the AVCMacroblock structure."
+    \param "stream" "Pointer to AVCEncBitstream sructure."
+    \return "AVCENC_SUCCESS for success."
+    */
+    AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+    /*------------- vlc_encode.c -----------------------*/
+    /**
+    This function encodes and writes a value into an Exp-Golomb codeword.
+    \param "bitstream" "Pointer to AVCEncBitstream."
+    \param "codeNum" "Pointer to the value of the codeNum."
+    \return "AVCENC_SUCCESS for success or bitstream error messages for fail."
+    */
+    AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum);
+
+    /**
+    This function maps and encodes signed Exp-Golomb codes.
+    \param "bitstream" "Pointer to AVCEncBitstream."
+    \param "value"  "Pointer to syntax element value."
+    \return "AVCENC_SUCCESS or AVCENC_FAIL."
+    */
+    AVCEnc_Status  se_v(AVCEncBitstream *bitstream, int value);
+
+    /**
+    This function maps and encodes truncated Exp-Golomb codes.
+    \param "bitstream" "Pointer to AVCEncBitstream."
+    \param "value"  "Pointer to syntax element value."
+    \param "range"  "Range of the value as input to determine the algorithm."
+    \return "AVCENC_SUCCESS or AVCENC_FAIL."
+    */
+    AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range);
+
+    /**
+    This function creates Exp-Golomb codeword from codeNum.
+    \param "bitstream" "Pointer to AVCEncBitstream."
+    \param "codeNum" "Pointer to the codeNum value."
+    \return "AVCENC_SUCCESS for success or bitstream error messages for fail."
+    */
+    AVCEnc_Status SetEGBitstring(AVCEncBitstream *bitstream, uint codeNum);
+
+    /**
+    This function performs CAVLC encoding of the CBP (coded block pattern) of a macroblock
+    by calling ue_v() and then mapping the CBP to the corresponding VLC codeNum.
+    \param "currMB"  "Pointer to the current AVCMacroblock structure."
+    \param "stream"  "Pointer to the AVCEncBitstream."
+    \return "void"
+    */
+    AVCEnc_Status EncodeCBP(AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+    /**
+    This function encodes trailing ones and total coefficient.
+    \param "stream" "Pointer to the AVCEncBitstream."
+    \param "TrailingOnes"   "The trailing one variable output."
+    \param "TotalCoeff" "The total coefficient variable output."
+    \param "nC" "Context for number of nonzero coefficient (prediction context)."
+    \return "AVCENC_SUCCESS for success or else for bitstream failure."
+    */
+    AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC);
+
+    /**
+    This function encodes trailing ones and total coefficient for chroma DC block.
+    \param "stream" "Pointer to the AVCEncBitstream."
+    \param "TrailingOnes"   "The trailing one variable output."
+    \param "TotalCoeff" "The total coefficient variable output."
+    \return "AVCENC_SUCCESS for success or else for bitstream failure."
+    */
+    AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff);
+
+    /**
+    This function encodes total_zeros value as in Table 9-7 and 9-8.
+    \param "stream" "Pointer to the AVCEncBitstream."
+    \param "TotalZeros" "The total_zeros value."
+    \param "TotalCoeff" "The total coefficient variable output."
+    \return "AVCENC_SUCCESS for success or else for bitstream failure."
+    */
+    AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff);
+
+    /**
+    This function encodes total_zeros VLC syntax for chroma DC as in Table 9-9.
+    \param "stream" "Pointer to the AVCEncBitstream."
+    \param "TotalZeros" "The total_zeros value."
+    \param "TotalCoeff" "The total coefficient variable output."
+    \return "AVCENC_SUCCESS for success or else for bitstream failure."
+    */
+    AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff);
+
+    /**
+    This function encodes run_before VLC syntax as in Table 9-10.
+    \param "stream" "Pointer to the AVCEncBitstream."
+    \param "run_before" "The run_before value."
+    \param "zerosLeft"  "The context for number of zeros left."
+    \return "AVCENC_SUCCESS for success or else for bitstream failure."
+    */
+    AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _AVCENC_LIB_H_ */
+
diff --git a/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp
new file mode 100644
index 0000000..75ab514
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp
@@ -0,0 +1,336 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define WORD_SIZE 32
+
+/* array for trailing bit pattern as function of number of bits */
+/* the first one is unused. */
+const static uint8 trailing_bits[9] = {0, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+
+/* ======================================================================== */
+/*  Function : BitstreamInit()                                              */
+/*  Date     : 11/4/2003                                                    */
+/*  Purpose  : Populate bitstream structure with bitstream buffer and size  */
+/*             it also initializes internal data                            */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS if successed, AVCENC_FAIL if failed.              */
+/*  Modified :                                                              */
+/* ======================================================================== */
+/* |--------|--------|----~~~~~-----|---------|---------|---------|
+   ^                                          ^write_pos          ^buf_size
+   bitstreamBuffer                  <--------->
+                                    current_word
+
+   |-----xxxxxxxxxxxxx|  = current_word 32 or 16 bits
+    <---->
+     bit_left
+ ======================================================================== */
+
+AVCEnc_Status BitstreamEncInit(AVCEncBitstream *stream, uint8 *buffer, int buf_size,
+                               uint8 *overrunBuffer, int oBSize)
+{
+    if (stream == NULL || buffer == NULL || buf_size <= 0)
+    {
+        return AVCENC_BITSTREAM_INIT_FAIL;
+    }
+
+    stream->bitstreamBuffer = buffer;
+
+    stream->buf_size = buf_size;
+
+    stream->write_pos = 0;
+
+    stream->count_zeros = 0;
+
+    stream->current_word = 0;
+
+    stream->bit_left = WORD_SIZE;
+
+    stream->overrunBuffer = overrunBuffer;
+
+    stream->oBSize = oBSize;
+
+    return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/*  Function : AVCBitstreamSaveWord()                                           */
+/*  Date     : 3/29/2004                                                    */
+/*  Purpose  : Save the current_word into the buffer, byte-swap, and        */
+/*              add emulation prevention insertion.                         */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is  */
+/*              full.                                                       */
+/*  Modified :                                                              */
+/* ======================================================================== */
+AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream)
+{
+    int num_bits;
+    uint8 *write_pnt, byte;
+    uint current_word;
+
+    /* check number of bytes in current_word, must always be byte-aligned!!!! */
+    num_bits = WORD_SIZE - stream->bit_left; /* must be multiple of 8 !!*/
+
+    if (stream->buf_size - stream->write_pos <= (num_bits >> 3) + 2) /* 2 more bytes for possible EPBS */
+    {
+        if (AVCENC_SUCCESS != AVCBitstreamUseOverrunBuffer(stream, (num_bits >> 3) + 2))
+        {
+            return AVCENC_BITSTREAM_BUFFER_FULL;
+        }
+    }
+
+    /* write word, byte-by-byte */
+    write_pnt = stream->bitstreamBuffer + stream->write_pos;
+    current_word = stream->current_word;
+    while (num_bits) /* no need to check stream->buf_size and stream->write_pos, taken care already */
+    {
+        num_bits -= 8;
+        byte = (current_word >> num_bits) & 0xFF;
+        if (byte != 0)
+        {
+            *write_pnt++ = byte;
+            stream->write_pos++;
+            stream->count_zeros = 0;
+        }
+        else
+        {
+            stream->count_zeros++;
+            *write_pnt++ = byte;
+            stream->write_pos++;
+            if (stream->count_zeros == 2)
+            {   /* for num_bits = 32, this can add 2 more bytes extra for EPBS */
+                *write_pnt++ = 0x3;
+                stream->write_pos++;
+                stream->count_zeros = 0;
+            }
+        }
+    }
+
+    /* reset current_word and bit_left */
+    stream->current_word = 0;
+    stream->bit_left = WORD_SIZE;
+
+    return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/*  Function : BitstreamWriteBits()                                         */
+/*  Date     : 3/29/2004                                                    */
+/*  Purpose  : Write up to machine word.                                    */
+/*  In/out   : Unused bits in 'code' must be all zeros.                     */
+/*  Return   : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is  */
+/*              full.                                                       */
+/*  Modified :                                                              */
+/* ======================================================================== */
+AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    int bit_left = stream->bit_left;
+    uint current_word = stream->current_word;
+
+    //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWriteBits",nBits,-1);
+
+    if (nBits > WORD_SIZE) /* has to be taken care of specially */
+    {
+        return AVCENC_FAIL; /* for now */
+        /* otherwise, break it down to 2 write of less than 16 bits at a time. */
+    }
+
+    if (nBits <= bit_left) /* more bits left in current_word */
+    {
+        stream->current_word = (current_word << nBits) | code;
+        stream->bit_left -= nBits;
+        if (stream->bit_left == 0) /* prepare for the next word */
+        {
+            status = AVCBitstreamSaveWord(stream);
+            return status;
+        }
+    }
+    else
+    {
+        stream->current_word = (current_word << bit_left) | (code >> (nBits - bit_left));
+
+        nBits -= bit_left;
+
+        stream->bit_left = 0;
+
+        status = AVCBitstreamSaveWord(stream); /* save current word */
+
+        stream->bit_left = WORD_SIZE - nBits;
+
+        stream->current_word = code; /* no extra masking for code, must be handled before saving */
+    }
+
+    return status;
+}
+
+
+/* ======================================================================== */
+/*  Function : BitstreamWrite1Bit()                                         */
+/*  Date     : 3/30/2004                                                    */
+/*  Purpose  : Write 1 bit                                                  */
+/*  In/out   : Unused bits in 'code' must be all zeros.                     */
+/*  Return   : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is  */
+/*              full.                                                       */
+/*  Modified :                                                              */
+/* ======================================================================== */
+AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code)
+{
+    AVCEnc_Status status;
+    uint current_word = stream->current_word;
+
+    //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWrite1Bit",code,-1);
+
+    //if(1 <= bit_left) /* more bits left in current_word */
+    /* we can assume that there always be positive bit_left in the current word */
+    stream->current_word = (current_word << 1) | code;
+    stream->bit_left--;
+    if (stream->bit_left == 0) /* prepare for the next word */
+    {
+        status = AVCBitstreamSaveWord(stream);
+        return status;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+
+/* ======================================================================== */
+/*  Function : BitstreamTrailingBits()                                      */
+/*  Date     : 3/31/2004                                                    */
+/*  Purpose  : Add trailing bits and report the final EBSP size.            */
+/*  In/out   :                                                              */
+/*  Return   : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is  */
+/*              full.                                                       */
+/*  Modified :                                                              */
+/* ======================================================================== */
+AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size)
+{
+    (void)(nal_size);
+
+    AVCEnc_Status status;
+    int bit_left = bitstream->bit_left;
+
+    bit_left &= 0x7; /* modulo by 8 */
+    if (bit_left == 0) bit_left = 8;
+    /* bitstream->bit_left == 0 cannot happen here since it would have been Saved already */
+
+    status = BitstreamWriteBits(bitstream, bit_left, trailing_bits[bit_left]);
+
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    /* if it's not saved, save it. */
+    //if(bitstream->bit_left<(WORD_SIZE<<3)) /* in fact, no need to check */
+    {
+        status = AVCBitstreamSaveWord(bitstream);
+    }
+
+    return status;
+}
+
+/* check whether it's byte-aligned */
+bool byte_aligned(AVCEncBitstream *stream)
+{
+    if (stream->bit_left % 8)
+        return false;
+    else
+        return true;
+}
+
+
+/* determine whether overrun buffer can be used or not */
+AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes)
+{
+    AVCEncObject *encvid = (AVCEncObject*)stream->encvid;
+
+    if (stream->overrunBuffer != NULL) // overrunBuffer is set
+    {
+        if (stream->bitstreamBuffer != stream->overrunBuffer) // not already used
+        {
+            if (stream->write_pos + numExtraBytes >= stream->oBSize)
+            {
+                stream->oBSize = stream->write_pos + numExtraBytes + 100;
+                stream->oBSize &= (~0x3); // make it multiple of 4
+
+                // allocate new overrun Buffer
+                if (encvid->overrunBuffer)
+                {
+                    encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData,
+                                                  (int)encvid->overrunBuffer);
+                }
+
+                encvid->oBSize = stream->oBSize;
+                encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+                                        stream->oBSize, DEFAULT_ATTR);
+
+                stream->overrunBuffer = encvid->overrunBuffer;
+                if (stream->overrunBuffer == NULL)
+                {
+                    return AVCENC_FAIL;
+                }
+            }
+
+            // copy everything to overrun buffer and start using it.
+            memcpy(stream->overrunBuffer, stream->bitstreamBuffer, stream->write_pos);
+            stream->bitstreamBuffer = stream->overrunBuffer;
+            stream->buf_size = stream->oBSize;
+        }
+        else // overrun buffer is already used
+        {
+            stream->oBSize = stream->write_pos + numExtraBytes + 100;
+            stream->oBSize &= (~0x3); // make it multiple of 4
+
+            // allocate new overrun buffer
+            encvid->oBSize = stream->oBSize;
+            encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+                                    stream->oBSize, DEFAULT_ATTR);
+
+            if (encvid->overrunBuffer == NULL)
+            {
+                return AVCENC_FAIL;
+            }
+
+
+            // copy from the old buffer to new buffer
+            memcpy(encvid->overrunBuffer, stream->overrunBuffer, stream->write_pos);
+            // free old buffer
+            encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData,
+                                          (int)stream->overrunBuffer);
+
+            // assign pointer to new buffer
+            stream->overrunBuffer = encvid->overrunBuffer;
+            stream->bitstreamBuffer = stream->overrunBuffer;
+            stream->buf_size = stream->oBSize;
+        }
+
+        return AVCENC_SUCCESS;
+    }
+    else // overrunBuffer is not enable.
+    {
+        return AVCENC_FAIL;
+    }
+
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/block.cpp b/media/libstagefright/codecs/avc/enc/src/block.cpp
new file mode 100644
index 0000000..01e26a6
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/block.cpp
@@ -0,0 +1,1283 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+/* subtract with the prediction and do transformation */
+void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
+{
+    int16 *ptr = dataBlock;
+    int r0, r1, r2, r3, j;
+    int curpitch = (uint)pitch >> 16;
+    int predpitch = (pitch & 0xFFFF);
+
+    /* horizontal */
+    j = 4;
+    while (j > 0)
+    {
+        /* calculate the residue first */
+        r0 = cur[0] - predBlock[0];
+        r1 = cur[1] - predBlock[1];
+        r2 = cur[2] - predBlock[2];
+        r3 = cur[3] - predBlock[3];
+
+        r0 += r3;           //ptr[0] + ptr[3];
+        r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
+        r1 += r2;           //ptr[1] + ptr[2];
+        r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
+
+        ptr[0] = r0 + r1;
+        ptr[2] = r0 - r1;
+        ptr[1] = (r3 << 1) + r2;
+        ptr[3] = r3 - (r2 << 1);
+
+        ptr += 16;
+        predBlock += predpitch;
+        cur += curpitch;
+        j--;
+    }
+    /* vertical */
+    ptr = dataBlock;
+    j = 4;
+    while (j > 0)
+    {
+        r0 = ptr[0] + ptr[48];
+        r3 = ptr[0] - ptr[48];
+        r1 = ptr[16] + ptr[32];
+        r2 = ptr[16] - ptr[32];
+
+        ptr[0] = r0 + r1;
+        ptr[32] = r0 - r1;
+        ptr[16] = (r3 << 1) + r2;
+        ptr[48] = r3 - (r2 << 1);
+
+        ptr++;
+        j--;
+    }
+
+    return ;
+}
+
+
+/* do residue transform quant invquant, invtrans and write output out */
+int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
+{
+    AVCCommonObj *video = encvid->common;
+    int org_pitch = encvid->currInput->pitch;
+    int pitch = video->currPic->pitch;
+    int16 *coef = video->block;
+    uint8 *pred = video->pred_block; // size 16 for a 4x4 block
+    int pred_pitch = video->pred_pitch;
+    int r0, r1, r2, r3, j, k, idx;
+    int *level, *run;
+    int Qq, Rq, q_bits, qp_const, quant;
+    int data, lev, zero_run;
+    int numcoeff;
+
+    coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
+
+    /* first take a 4x4 transform */
+    /* horizontal */
+    j = 4;
+    while (j > 0)
+    {
+        /* calculate the residue first */
+        r0 = org[0] - pred[0];   /* OPTIMIZEABLE */
+        r1 = org[1] - pred[1];
+        r2 = org[2] - pred[2];
+        r3 = org[3] - pred[3];
+
+        r0 += r3;           //ptr[0] + ptr[3];
+        r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
+        r1 += r2;           //ptr[1] + ptr[2];
+        r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
+
+        coef[0] = r0 + r1;
+        coef[2] = r0 - r1;
+        coef[1] = (r3 << 1) + r2;
+        coef[3] = r3 - (r2 << 1);
+
+        coef += 16;
+        org += org_pitch;
+        pred += pred_pitch;
+        j--;
+    }
+    /* vertical */
+    coef -= 64;
+    pred -= (pred_pitch << 2);
+    j = 4;
+    while (j > 0)   /* OPTIMIZABLE */
+    {
+        r0 = coef[0] + coef[48];
+        r3 = coef[0] - coef[48];
+        r1 = coef[16] + coef[32];
+        r2 = coef[16] - coef[32];
+
+        coef[0] = r0 + r1;
+        coef[32] = r0 - r1;
+        coef[16] = (r3 << 1) + r2;
+        coef[48] = r3 - (r2 << 1);
+
+        coef++;
+        j--;
+    }
+
+    coef -= 4;
+
+    /* quant */
+    level = encvid->level[ras2dec[blkidx]];
+    run = encvid->run[ras2dec[blkidx]];
+
+    Rq = video->QPy_mod_6;
+    Qq = video->QPy_div_6;
+    qp_const = encvid->qp_const;
+    q_bits = 15 + Qq;
+
+    zero_run = 0;
+    numcoeff = 0;
+    for (k = 0; k < 16; k++)
+    {
+        idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+        data = coef[idx];
+        quant = quant_coef[Rq][k];
+        if (data > 0)
+        {
+            lev = data * quant + qp_const;
+        }
+        else
+        {
+            lev = -data * quant + qp_const;
+        }
+        lev >>= q_bits;
+        if (lev)
+        {
+            *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
+
+            /* dequant */
+            quant = dequant_coefres[Rq][k];
+            if (data > 0)
+            {
+                level[numcoeff] = lev;
+                coef[idx] = (lev * quant) << Qq;
+            }
+            else
+            {
+                level[numcoeff] = -lev;
+                coef[idx] = (-lev * quant) << Qq;
+            }
+            run[numcoeff++] = zero_run;
+            zero_run = 0;
+        }
+        else
+        {
+            zero_run++;
+            coef[idx] = 0;
+        }
+    }
+
+    if (video->currMB->mb_intra) // only do inverse transform with intra block
+    {
+        if (numcoeff) /* then do inverse transform */
+        {
+            for (j = 4; j > 0; j--) /* horizontal */
+            {
+                r0 = coef[0] + coef[2];
+                r1 = coef[0] - coef[2];
+                r2 = (coef[1] >> 1) - coef[3];
+                r3 = coef[1] + (coef[3] >> 1);
+
+                coef[0] = r0 + r3;
+                coef[1] = r1 + r2;
+                coef[2] = r1 - r2;
+                coef[3] = r0 - r3;
+
+                coef += 16;
+            }
+
+            coef -= 64;
+            for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
+            {
+                r0 = coef[0] + coef[32];
+                r1 = coef[0] - coef[32];
+                r2 = (coef[16] >> 1) - coef[48];
+                r3 = coef[16] + (coef[48] >> 1);
+                r0 += r3;
+                r3 = (r0 - (r3 << 1)); /* r0-r3 */
+                r1 += r2;
+                r2 = (r1 - (r2 << 1)); /* r1-r2 */
+                r0 += 32;
+                r1 += 32;
+                r2 += 32;
+                r3 += 32;
+
+                r0 = pred[0] + (r0 >> 6);
+                if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                r1 = *(pred += pred_pitch) + (r1 >> 6);
+                if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                r2 = *(pred += pred_pitch) + (r2 >> 6);
+                if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                r3 = pred[pred_pitch] + (r3 >> 6);
+                if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+
+                *cur = r0;
+                *(cur += pitch) = r1;
+                *(cur += pitch) = r2;
+                cur[pitch] = r3;
+                cur -= (pitch << 1);
+                cur++;
+                pred -= (pred_pitch << 1);
+                pred++;
+                coef++;
+            }
+        }
+        else  // copy from pred to cur
+        {
+            *((uint32*)cur) = *((uint32*)pred);
+            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+            *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+        }
+    }
+
+    return numcoeff;
+}
+
+
+void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
+{
+    int16 *coef, *coef8 = video->block;
+    uint8 *cur;  // the same as curL
+    int b8, b4;
+    int r0, r1, r2, r3, j, blkidx;
+
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        cur = curL;
+        coef = coef8;
+
+        if (currMB->CBP&(1 << b8))
+        {
+            for (b4 = 0; b4 < 4; b4++)
+            {
+                blkidx = blkIdx2blkXY[b8][b4];
+                /* do IDCT */
+                if (currMB->nz_coeff[blkidx])
+                {
+                    for (j = 4; j > 0; j--) /* horizontal */
+                    {
+                        r0 = coef[0] + coef[2];
+                        r1 = coef[0] - coef[2];
+                        r2 = (coef[1] >> 1) - coef[3];
+                        r3 = coef[1] + (coef[3] >> 1);
+
+                        coef[0] = r0 + r3;
+                        coef[1] = r1 + r2;
+                        coef[2] = r1 - r2;
+                        coef[3] = r0 - r3;
+
+                        coef += 16;
+                    }
+
+                    coef -= 64;
+                    for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
+                    {
+                        r0 = coef[0] + coef[32];
+                        r1 = coef[0] - coef[32];
+                        r2 = (coef[16] >> 1) - coef[48];
+                        r3 = coef[16] + (coef[48] >> 1);
+                        r0 += r3;
+                        r3 = (r0 - (r3 << 1)); /* r0-r3 */
+                        r1 += r2;
+                        r2 = (r1 - (r2 << 1)); /* r1-r2 */
+                        r0 += 32;
+                        r1 += 32;
+                        r2 += 32;
+                        r3 += 32;
+
+                        r0 = cur[0] + (r0 >> 6);
+                        if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                        *cur = r0;
+                        r1 = *(cur += picPitch) + (r1 >> 6);
+                        if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                        *cur = r1;
+                        r2 = *(cur += picPitch) + (r2 >> 6);
+                        if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                        *cur = r2;
+                        r3 = cur[picPitch] + (r3 >> 6);
+                        if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                        cur[picPitch] = r3;
+
+                        cur -= (picPitch << 1);
+                        cur++;
+                        coef++;
+                    }
+                    cur -= 4;
+                    coef -= 4;
+                }
+                if (b4&1)
+                {
+                    cur += ((picPitch << 2) - 4);
+                    coef += 60;
+                }
+                else
+                {
+                    cur += 4;
+                    coef += 4;
+                }
+            }
+        }
+
+        if (b8&1)
+        {
+            curL += ((picPitch << 3) - 8);
+            coef8 += 120;
+        }
+        else
+        {
+            curL += 8;
+            coef8 += 8;
+        }
+    }
+
+    return ;
+}
+
+/* performa dct, quant, iquant, idct for the entire MB */
+void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
+{
+    AVCCommonObj *video = encvid->common;
+    int pitch = video->currPic->pitch;
+    int org_pitch = encvid->currInput->pitch;
+    AVCMacroblock *currMB = video->currMB;
+    int16 *coef = video->block;
+    uint8 *pred = encvid->pred_i16[currMB->i16Mode];
+    int blk_x, blk_y, j, k, idx, b8, b4;
+    int r0, r1, r2, r3, m0, m1, m2 , m3;
+    int data, lev;
+    int *level, *run, zero_run, ncoeff;
+    int Rq, Qq, quant, q_bits, qp_const;
+    int offset_cur[4], offset_pred[4], offset;
+
+    /* horizontal */
+    for (j = 16; j > 0; j--)
+    {
+        for (blk_x = 4; blk_x > 0; blk_x--)
+        {
+            /* calculate the residue first */
+            r0 = *orgL++ - *pred++;
+            r1 = *orgL++ - *pred++;
+            r2 = *orgL++ - *pred++;
+            r3 = *orgL++ - *pred++;
+
+            r0 += r3;           //ptr[0] + ptr[3];
+            r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
+            r1 += r2;           //ptr[1] + ptr[2];
+            r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
+
+            *coef++ = r0 + r1;
+            *coef++ = (r3 << 1) + r2;
+            *coef++ = r0 - r1;
+            *coef++ = r3 - (r2 << 1);
+        }
+        orgL += (org_pitch - 16);
+    }
+    pred -= 256;
+    coef -= 256;
+    /* vertical */
+    for (blk_y = 4; blk_y > 0; blk_y--)
+    {
+        for (j = 16; j > 0; j--)
+        {
+            r0 = coef[0] + coef[48];
+            r3 = coef[0] - coef[48];
+            r1 = coef[16] + coef[32];
+            r2 = coef[16] - coef[32];
+
+            coef[0] = r0 + r1;
+            coef[32] = r0 - r1;
+            coef[16] = (r3 << 1) + r2;
+            coef[48] = r3 - (r2 << 1);
+
+            coef++;
+        }
+        coef += 48;
+    }
+
+    /* then perform DC transform */
+    coef -= 256;
+    for (j = 4; j > 0; j--)
+    {
+        r0 = coef[0] + coef[12];
+        r3 = coef[0] - coef[12];
+        r1 = coef[4] + coef[8];
+        r2 = coef[4] - coef[8];
+
+        coef[0] = r0 + r1;
+        coef[8] = r0 - r1;
+        coef[4] = r3 + r2;
+        coef[12] = r3 - r2;
+        coef += 64;
+    }
+    coef -= 256;
+    for (j = 4; j > 0; j--)
+    {
+        r0 = coef[0] + coef[192];
+        r3 = coef[0] - coef[192];
+        r1 = coef[64] + coef[128];
+        r2 = coef[64] - coef[128];
+
+        coef[0] = (r0 + r1) >> 1;
+        coef[128] = (r0 - r1) >> 1;
+        coef[64] = (r3 + r2) >> 1;
+        coef[192] = (r3 - r2) >> 1;
+        coef += 4;
+    }
+
+    coef -= 16;
+    // then quantize DC
+    level = encvid->leveldc;
+    run = encvid->rundc;
+
+    Rq = video->QPy_mod_6;
+    Qq = video->QPy_div_6;
+    quant = quant_coef[Rq][0];
+    q_bits = 15 + Qq;
+    qp_const = encvid->qp_const;
+
+    zero_run = 0;
+    ncoeff = 0;
+    for (k = 0; k < 16; k++) /* in zigzag scan order */
+    {
+        idx = ZIGZAG2RASTERDC[k];
+        data = coef[idx];
+        if (data > 0)   // quant
+        {
+            lev = data * quant + (qp_const << 1);
+        }
+        else
+        {
+            lev = -data * quant + (qp_const << 1);
+        }
+        lev >>= (q_bits + 1);
+        if (lev) // dequant
+        {
+            if (data > 0)
+            {
+                level[ncoeff] = lev;
+                coef[idx] = lev;
+            }
+            else
+            {
+                level[ncoeff] = -lev;
+                coef[idx] = -lev;
+            }
+            run[ncoeff++] = zero_run;
+            zero_run = 0;
+        }
+        else
+        {
+            zero_run++;
+            coef[idx] = 0;
+        }
+    }
+
+    /* inverse transform DC */
+    encvid->numcoefdc = ncoeff;
+    if (ncoeff)
+    {
+        quant = dequant_coefres[Rq][0];
+
+        for (j = 0; j < 4; j++)
+        {
+            m0 = coef[0] + coef[4];
+            m1 = coef[0] - coef[4];
+            m2 = coef[8] + coef[12];
+            m3 = coef[8] - coef[12];
+
+
+            coef[0] = m0 + m2;
+            coef[4] = m0 - m2;
+            coef[8] = m1 - m3;
+            coef[12] = m1 + m3;
+            coef += 64;
+        }
+
+        coef -= 256;
+
+        if (Qq >= 2)  /* this way should be faster than JM */
+        {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
+            Qq -= 2;
+            for (j = 0; j < 4; j++)
+            {
+                m0 = coef[0] + coef[64];
+                m1 = coef[0] - coef[64];
+                m2 = coef[128] + coef[192];
+                m3 = coef[128] - coef[192];
+
+                coef[0] = ((m0 + m2) * quant) << Qq;
+                coef[64] = ((m0 - m2) * quant) << Qq;
+                coef[128] = ((m1 - m3) * quant) << Qq;
+                coef[192] = ((m1 + m3) * quant) << Qq;
+                coef += 4;
+            }
+            Qq += 2; /* restore the value */
+        }
+        else
+        {
+            Qq = 2 - Qq;
+            offset = 1 << (Qq - 1);
+
+            for (j = 0; j < 4; j++)
+            {
+                m0 = coef[0] + coef[64];
+                m1 = coef[0] - coef[64];
+                m2 = coef[128] + coef[192];
+                m3 = coef[128] - coef[192];
+
+                coef[0] = (((m0 + m2) * quant + offset) >> Qq);
+                coef[64] = (((m0 - m2) * quant + offset) >> Qq);
+                coef[128] = (((m1 - m3) * quant + offset) >> Qq);
+                coef[192] = (((m1 + m3) * quant + offset) >> Qq);
+                coef += 4;
+            }
+            Qq = 2 - Qq; /* restore the value */
+        }
+        coef -= 16; /* back to the origin */
+    }
+
+    /* now zigzag scan ac coefs, quant, iquant and itrans */
+    run = encvid->run[0];
+    level = encvid->level[0];
+
+    /* offset btw 4x4 block */
+    offset_cur[0] = 0;
+    offset_cur[1] = (pitch << 2) - 8;
+
+    /* offset btw 8x8 block */
+    offset_cur[2] = 8 - (pitch << 3);
+    offset_cur[3] = -8;
+
+    /* similarly for pred */
+    offset_pred[0] = 0;
+    offset_pred[1] = 56;
+    offset_pred[2] = -120;
+    offset_pred[3] = -8;
+
+    currMB->CBP = 0;
+
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        for (b4 = 0; b4 < 4; b4++)
+        {
+
+            zero_run = 0;
+            ncoeff = 0;
+
+            for (k = 1; k < 16; k++)
+            {
+                idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+                data = coef[idx];
+                quant = quant_coef[Rq][k];
+                if (data > 0)
+                {
+                    lev = data * quant + qp_const;
+                }
+                else
+                {
+                    lev = -data * quant + qp_const;
+                }
+                lev >>= q_bits;
+                if (lev)
+                {   /* dequant */
+                    quant = dequant_coefres[Rq][k];
+                    if (data > 0)
+                    {
+                        level[ncoeff] = lev;
+                        coef[idx] = (lev * quant) << Qq;
+                    }
+                    else
+                    {
+                        level[ncoeff] = -lev;
+                        coef[idx] = (-lev * quant) << Qq;
+                    }
+                    run[ncoeff++] = zero_run;
+                    zero_run = 0;
+                }
+                else
+                {
+                    zero_run++;
+                    coef[idx] = 0;
+                }
+            }
+
+            currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
+            if (ncoeff)
+            {
+                currMB->CBP |= (1 << b8);
+
+                // do inverse transform here
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = coef[0] + coef[2];
+                    r1 = coef[0] - coef[2];
+                    r2 = (coef[1] >> 1) - coef[3];
+                    r3 = coef[1] + (coef[3] >> 1);
+
+                    coef[0] = r0 + r3;
+                    coef[1] = r1 + r2;
+                    coef[2] = r1 - r2;
+                    coef[3] = r0 - r3;
+
+                    coef += 16;
+                }
+                coef -= 64;
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = coef[0] + coef[32];
+                    r1 = coef[0] - coef[32];
+                    r2 = (coef[16] >> 1) - coef[48];
+                    r3 = coef[16] + (coef[48] >> 1);
+
+                    r0 += r3;
+                    r3 = (r0 - (r3 << 1)); /* r0-r3 */
+                    r1 += r2;
+                    r2 = (r1 - (r2 << 1)); /* r1-r2 */
+                    r0 += 32;
+                    r1 += 32;
+                    r2 += 32;
+                    r3 += 32;
+                    r0 = pred[0] + (r0 >> 6);
+                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                    r1 = pred[16] + (r1 >> 6);
+                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                    r2 = pred[32] + (r2 >> 6);
+                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                    r3 = pred[48] + (r3 >> 6);
+                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                    *curL = r0;
+                    *(curL += pitch) = r1;
+                    *(curL += pitch) = r2;
+                    curL[pitch] = r3;
+                    curL -= (pitch << 1);
+                    curL++;
+                    pred++;
+                    coef++;
+                }
+            }
+            else  // do DC-only inverse
+            {
+                m0 = coef[0] + 32;
+
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = pred[0] + (m0 >> 6);
+                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                    r1 = pred[16] + (m0 >> 6);
+                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                    r2 = pred[32] + (m0 >> 6);
+                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                    r3 = pred[48] + (m0 >> 6);
+                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                    *curL = r0;
+                    *(curL += pitch) = r1;
+                    *(curL += pitch) = r2;
+                    curL[pitch] = r3;
+                    curL -= (pitch << 1);
+                    curL++;
+                    pred++;
+                }
+                coef += 4;
+            }
+
+            run += 16;  // follow coding order
+            level += 16;
+            curL += offset_cur[b4&1];
+            pred += offset_pred[b4&1];
+            coef += offset_pred[b4&1];
+        }
+
+        curL += offset_cur[2 + (b8&1)];
+        pred += offset_pred[2 + (b8&1)];
+        coef += offset_pred[2 + (b8&1)];
+    }
+
+    return ;
+}
+
+
+void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCMacroblock *currMB = video->currMB;
+    int org_pitch = (encvid->currInput->pitch) >> 1;
+    int pitch = (video->currPic->pitch) >> 1;
+    int pred_pitch = 16;
+    int16 *coef = video->block + 256;
+    uint8 *pred = video->pred_block;
+    int j, blk_x, blk_y, k, idx, b4;
+    int r0, r1, r2, r3, m0;
+    int Qq, Rq, qp_const, q_bits, quant;
+    int *level, *run, zero_run, ncoeff;
+    int data, lev;
+    int offset_cur[2], offset_pred[2], offset_coef[2];
+    uint8 nz_temp[4];
+    int  coeff_cost;
+
+    if (cr)
+    {
+        coef += 8;
+        pred += 8;
+    }
+
+    if (currMB->mb_intra == 0) // inter mode
+    {
+        pred = curC;
+        pred_pitch = pitch;
+    }
+
+    /* do 4x4 transform */
+    /* horizontal */
+    for (j = 8; j > 0; j--)
+    {
+        for (blk_x = 2; blk_x > 0; blk_x--)
+        {
+            /* calculate the residue first */
+            r0 = *orgC++ - *pred++;
+            r1 = *orgC++ - *pred++;
+            r2 = *orgC++ - *pred++;
+            r3 = *orgC++ - *pred++;
+
+            r0 += r3;           //ptr[0] + ptr[3];
+            r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
+            r1 += r2;           //ptr[1] + ptr[2];
+            r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
+
+            *coef++ = r0 + r1;
+            *coef++ = (r3 << 1) + r2;
+            *coef++ = r0 - r1;
+            *coef++ = r3 - (r2 << 1);
+
+        }
+        coef += 8; // coef pitch is 16
+        pred += (pred_pitch - 8); // pred_pitch is 16
+        orgC += (org_pitch - 8);
+    }
+    pred -= (pred_pitch << 3);
+    coef -= 128;
+    /* vertical */
+    for (blk_y = 2; blk_y > 0; blk_y--)
+    {
+        for (j = 8; j > 0; j--)
+        {
+            r0 = coef[0] + coef[48];
+            r3 = coef[0] - coef[48];
+            r1 = coef[16] + coef[32];
+            r2 = coef[16] - coef[32];
+
+            coef[0] = r0 + r1;
+            coef[32] = r0 - r1;
+            coef[16] = (r3 << 1) + r2;
+            coef[48] = r3 - (r2 << 1);
+
+            coef++;
+        }
+        coef += 56;
+    }
+    /* then perform DC transform */
+    coef -= 128;
+
+    /* 2x2 transform of DC components*/
+    r0 = coef[0];
+    r1 = coef[4];
+    r2 = coef[64];
+    r3 = coef[68];
+
+    coef[0] = r0 + r1 + r2 + r3;
+    coef[4] = r0 - r1 + r2 - r3;
+    coef[64] = r0 + r1 - r2 - r3;
+    coef[68] = r0 - r1 - r2 + r3;
+
+    Qq    = video->QPc_div_6;
+    Rq    = video->QPc_mod_6;
+    quant = quant_coef[Rq][0];
+    q_bits    = 15 + Qq;
+    qp_const = encvid->qp_const_c;
+
+    zero_run = 0;
+    ncoeff = 0;
+    run = encvid->runcdc + (cr << 2);
+    level = encvid->levelcdc + (cr << 2);
+
+    /* in zigzag scan order */
+    for (k = 0; k < 4; k++)
+    {
+        idx = ((k >> 1) << 6) + ((k & 1) << 2);
+        data = coef[idx];
+        if (data > 0)
+        {
+            lev = data * quant + (qp_const << 1);
+        }
+        else
+        {
+            lev = -data * quant + (qp_const << 1);
+        }
+        lev >>= (q_bits + 1);
+        if (lev)
+        {
+            if (data > 0)
+            {
+                level[ncoeff] = lev;
+                coef[idx] = lev;
+            }
+            else
+            {
+                level[ncoeff] = -lev;
+                coef[idx] = -lev;
+            }
+            run[ncoeff++] = zero_run;
+            zero_run = 0;
+        }
+        else
+        {
+            zero_run++;
+            coef[idx] = 0;
+        }
+    }
+
+    encvid->numcoefcdc[cr] = ncoeff;
+
+    if (ncoeff)
+    {
+        currMB->CBP |= (1 << 4); // DC present
+        // do inverse transform
+        quant = dequant_coefres[Rq][0];
+
+        r0 = coef[0] + coef[4];
+        r1 = coef[0] - coef[4];
+        r2 = coef[64] + coef[68];
+        r3 = coef[64] - coef[68];
+
+        r0 += r2;
+        r2 = r0 - (r2 << 1);
+        r1 += r3;
+        r3 = r1 - (r3 << 1);
+
+        if (Qq >= 1)
+        {
+            Qq -= 1;
+            coef[0] = (r0 * quant) << Qq;
+            coef[4] = (r1 * quant) << Qq;
+            coef[64] = (r2 * quant) << Qq;
+            coef[68] = (r3 * quant) << Qq;
+            Qq++;
+        }
+        else
+        {
+            coef[0] = (r0 * quant) >> 1;
+            coef[4] = (r1 * quant) >> 1;
+            coef[64] = (r2 * quant) >> 1;
+            coef[68] = (r3 * quant) >> 1;
+        }
+    }
+
+    /* now do AC zigzag scan, quant, iquant and itrans */
+    if (cr)
+    {
+        run = encvid->run[20];
+        level = encvid->level[20];
+    }
+    else
+    {
+        run = encvid->run[16];
+        level = encvid->level[16];
+    }
+
+    /* offset btw 4x4 block */
+    offset_cur[0] = 0;
+    offset_cur[1] = (pitch << 2) - 8;
+    offset_pred[0] = 0;
+    offset_pred[1] = (pred_pitch << 2) - 8;
+    offset_coef[0] = 0;
+    offset_coef[1] = 56;
+
+    coeff_cost = 0;
+
+    for (b4 = 0; b4 < 4; b4++)
+    {
+        zero_run = 0;
+        ncoeff = 0;
+        for (k = 1; k < 16; k++) /* in zigzag scan order */
+        {
+            idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+            data = coef[idx];
+            quant = quant_coef[Rq][k];
+            if (data > 0)
+            {
+                lev = data * quant + qp_const;
+            }
+            else
+            {
+                lev = -data * quant + qp_const;
+            }
+            lev >>= q_bits;
+            if (lev)
+            {
+                /* for RD performance*/
+                if (lev > 1)
+                    coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
+                else
+                    coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
+
+                /* dequant */
+                quant = dequant_coefres[Rq][k];
+                if (data > 0)
+                {
+                    level[ncoeff] = lev;
+                    coef[idx] = (lev * quant) << Qq;
+                }
+                else
+                {
+                    level[ncoeff] = -lev;
+                    coef[idx] = (-lev * quant) << Qq;
+                }
+                run[ncoeff++] = zero_run;
+                zero_run = 0;
+            }
+            else
+            {
+                zero_run++;
+                coef[idx] = 0;
+            }
+        }
+
+        nz_temp[b4] = ncoeff; // raster scan
+
+        // just advance the pointers for now, do IDCT later
+        coef += 4;
+        run += 16;
+        level += 16;
+        coef += offset_coef[b4&1];
+    }
+
+    /* rewind the pointers */
+    coef -= 128;
+
+    if (coeff_cost < _CHROMA_COEFF_COST_)
+    {
+        /* if it's not efficient to encode any blocks.
+        Just do DC only */
+        /* We can reset level and run also, but setting nz to zero should be enough. */
+        currMB->nz_coeff[16+(cr<<1)] = 0;
+        currMB->nz_coeff[17+(cr<<1)] = 0;
+        currMB->nz_coeff[20+(cr<<1)] = 0;
+        currMB->nz_coeff[21+(cr<<1)] = 0;
+
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            // do DC-only inverse
+            m0 = coef[0] + 32;
+
+            for (j = 4; j > 0; j--)
+            {
+                r0 = pred[0] + (m0 >> 6);
+                if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                r1 = *(pred += pred_pitch) + (m0 >> 6);
+                if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                r2 = pred[pred_pitch] + (m0 >> 6);
+                if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                r3 = pred[pred_pitch<<1] + (m0 >> 6);
+                if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                *curC = r0;
+                *(curC += pitch) = r1;
+                *(curC += pitch) = r2;
+                curC[pitch] = r3;
+                curC -= (pitch << 1);
+                curC++;
+                pred += (1 - pred_pitch);
+            }
+            coef += 4;
+            curC += offset_cur[b4&1];
+            pred += offset_pred[b4&1];
+            coef += offset_coef[b4&1];
+        }
+    }
+    else // not dropping anything, continue with the IDCT
+    {
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            ncoeff = nz_temp[b4] ; // in raster scan
+            currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
+
+            if (ncoeff) // do a check on the nonzero-coeff
+            {
+                currMB->CBP |= (2 << 4);
+
+                // do inverse transform here
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = coef[0] + coef[2];
+                    r1 = coef[0] - coef[2];
+                    r2 = (coef[1] >> 1) - coef[3];
+                    r3 = coef[1] + (coef[3] >> 1);
+
+                    coef[0] = r0 + r3;
+                    coef[1] = r1 + r2;
+                    coef[2] = r1 - r2;
+                    coef[3] = r0 - r3;
+
+                    coef += 16;
+                }
+                coef -= 64;
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = coef[0] + coef[32];
+                    r1 = coef[0] - coef[32];
+                    r2 = (coef[16] >> 1) - coef[48];
+                    r3 = coef[16] + (coef[48] >> 1);
+
+                    r0 += r3;
+                    r3 = (r0 - (r3 << 1)); /* r0-r3 */
+                    r1 += r2;
+                    r2 = (r1 - (r2 << 1)); /* r1-r2 */
+                    r0 += 32;
+                    r1 += 32;
+                    r2 += 32;
+                    r3 += 32;
+                    r0 = pred[0] + (r0 >> 6);
+                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                    r1 = *(pred += pred_pitch) + (r1 >> 6);
+                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                    r2 = pred[pred_pitch] + (r2 >> 6);
+                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                    r3 = pred[pred_pitch<<1] + (r3 >> 6);
+                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                    *curC = r0;
+                    *(curC += pitch) = r1;
+                    *(curC += pitch) = r2;
+                    curC[pitch] = r3;
+                    curC -= (pitch << 1);
+                    curC++;
+                    pred += (1 - pred_pitch);
+                    coef++;
+                }
+            }
+            else
+            {
+                // do DC-only inverse
+                m0 = coef[0] + 32;
+
+                for (j = 4; j > 0; j--)
+                {
+                    r0 = pred[0] + (m0 >> 6);
+                    if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
+                    r1 = *(pred += pred_pitch) + (m0 >> 6);
+                    if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
+                    r2 = pred[pred_pitch] + (m0 >> 6);
+                    if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
+                    r3 = pred[pred_pitch<<1] + (m0 >> 6);
+                    if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
+                    *curC = r0;
+                    *(curC += pitch) = r1;
+                    *(curC += pitch) = r2;
+                    curC[pitch] = r3;
+                    curC -= (pitch << 1);
+                    curC++;
+                    pred += (1 - pred_pitch);
+                }
+                coef += 4;
+            }
+            curC += offset_cur[b4&1];
+            pred += offset_pred[b4&1];
+            coef += offset_coef[b4&1];
+        }
+    }
+
+    return ;
+}
+
+
+/* only DC transform */
+int TransQuantIntra16DC(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    int16 *block = video->block;
+    int *level = encvid->leveldc;
+    int *run = encvid->rundc;
+    int16 *ptr = block;
+    int r0, r1, r2, r3, j;
+    int Qq = video->QPy_div_6;
+    int Rq = video->QPy_mod_6;
+    int q_bits, qp_const, quant;
+    int data, lev, zero_run;
+    int k, ncoeff, idx;
+
+    /* DC transform */
+    /* horizontal */
+    j = 4;
+    while (j)
+    {
+        r0 = ptr[0] + ptr[12];
+        r3 = ptr[0] - ptr[12];
+        r1 = ptr[4] + ptr[8];
+        r2 = ptr[4] - ptr[8];
+
+        ptr[0] = r0 + r1;
+        ptr[8] = r0 - r1;
+        ptr[4] = r3 + r2;
+        ptr[12] = r3 - r2;
+        ptr += 64;
+        j--;
+    }
+    /* vertical */
+    ptr = block;
+    j = 4;
+    while (j)
+    {
+        r0 = ptr[0] + ptr[192];
+        r3 = ptr[0] - ptr[192];
+        r1 = ptr[64] + ptr[128];
+        r2 = ptr[64] - ptr[128];
+
+        ptr[0] = (r0 + r1) >> 1;
+        ptr[128] = (r0 - r1) >> 1;
+        ptr[64] = (r3 + r2) >> 1;
+        ptr[192] = (r3 - r2) >> 1;
+        ptr += 4;
+        j--;
+    }
+
+    quant = quant_coef[Rq][0];
+    q_bits    = 15 + Qq;
+    qp_const = (1 << q_bits) / 3;    // intra
+
+    zero_run = 0;
+    ncoeff = 0;
+
+    for (k = 0; k < 16; k++) /* in zigzag scan order */
+    {
+        idx = ZIGZAG2RASTERDC[k];
+        data = block[idx];
+        if (data > 0)
+        {
+            lev = data * quant + (qp_const << 1);
+        }
+        else
+        {
+            lev = -data * quant + (qp_const << 1);
+        }
+        lev >>= (q_bits + 1);
+        if (lev)
+        {
+            if (data > 0)
+            {
+                level[ncoeff] = lev;
+                block[idx] = lev;
+            }
+            else
+            {
+                level[ncoeff] = -lev;
+                block[idx] = -lev;
+            }
+            run[ncoeff++] = zero_run;
+            zero_run = 0;
+        }
+        else
+        {
+            zero_run++;
+            block[idx] = 0;
+        }
+    }
+    return ncoeff;
+}
+
+int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
+{
+    AVCCommonObj *video = encvid->common;
+    int *level, *run;
+    int r0, r1, r2, r3;
+    int Qq, Rq, q_bits, qp_const, quant;
+    int data, lev, zero_run;
+    int k, ncoeff, idx;
+
+    level = encvid->levelcdc + (cr << 2); /* cb or cr */
+    run = encvid->runcdc + (cr << 2);
+
+    /* 2x2 transform of DC components*/
+    r0 = block[0];
+    r1 = block[4];
+    r2 = block[64];
+    r3 = block[68];
+
+    block[0] = r0 + r1 + r2 + r3;
+    block[4] = r0 - r1 + r2 - r3;
+    block[64] = r0 + r1 - r2 - r3;
+    block[68] = r0 - r1 - r2 + r3;
+
+    Qq    = video->QPc_div_6;
+    Rq    = video->QPc_mod_6;
+    quant = quant_coef[Rq][0];
+    q_bits    = 15 + Qq;
+    if (slice_type == AVC_I_SLICE)
+    {
+        qp_const = (1 << q_bits) / 3;
+    }
+    else
+    {
+        qp_const = (1 << q_bits) / 6;
+    }
+
+    zero_run = 0;
+    ncoeff = 0;
+
+    for (k = 0; k < 4; k++) /* in zigzag scan order */
+    {
+        idx = ((k >> 1) << 6) + ((k & 1) << 2);
+        data = block[idx];
+        if (data > 0)
+        {
+            lev = data * quant + (qp_const << 1);
+        }
+        else
+        {
+            lev = -data * quant + (qp_const << 1);
+        }
+        lev >>= (q_bits + 1);
+        if (lev)
+        {
+            if (data > 0)
+            {
+                level[ncoeff] = lev;
+                block[idx] = lev;
+            }
+            else
+            {
+                level[ncoeff] = -lev;
+                block[idx] = -lev;
+            }
+            run[ncoeff++] = zero_run;
+            zero_run = 0;
+        }
+        else
+        {
+            zero_run++;
+            block[idx] = 0;
+        }
+    }
+    return ncoeff;
+}
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp
new file mode 100644
index 0000000..38a2a15
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp
@@ -0,0 +1,622 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+/* 3/29/01 fast half-pel search based on neighboring guess */
+/* value ranging from 0 to 4, high complexity (more accurate) to
+   low complexity (less accurate) */
+#define HP_DISTANCE_TH      5 // 2  /* half-pel distance threshold */
+
+#define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
+
+const static int distance_tab[9][9] =   /* [hp_guess][k] */
+{
+    {0, 1, 1, 1, 1, 1, 1, 1, 1},
+    {1, 0, 1, 2, 3, 4, 3, 2, 1},
+    {1, 0, 0, 0, 1, 2, 3, 2, 1},
+    {1, 2, 1, 0, 1, 2, 3, 4, 3},
+    {1, 2, 1, 0, 0, 0, 1, 2, 3},
+    {1, 4, 3, 2, 1, 0, 1, 2, 3},
+    {1, 2, 3, 2, 1, 0, 0, 0, 1},
+    {1, 2, 3, 4, 3, 2, 1, 0, 1},
+    {1, 0, 1, 2, 3, 2, 1, 0, 0}
+};
+
+#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
+                 x = 0xFF & (~(x>>31));}
+
+#define CLIP_UPPER16(x)     if((uint)x >= 0x20000000){ \
+        x = 0xFF0000 & (~(x>>31));} \
+        else { \
+        x = (x>>5)&0xFF0000; \
+        }
+
+/*=====================================================================
+    Function:   AVCFindHalfPelMB
+    Date:       10/31/2007
+    Purpose:    Find half pel resolution MV surrounding the full-pel MV
+=====================================================================*/
+
+int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand,
+                     int xpos, int ypos, int hp_guess, int cmvx, int cmvy)
+{
+    AVCPictureData *currPic = encvid->common->currPic;
+    int lx = currPic->pitch;
+    int d, dmin, satd_min;
+    uint8* cand;
+    int lambda_motion = encvid->lambda_motion;
+    uint8 *mvbits = encvid->mvbits;
+    int mvcost;
+    /* list of candidate to go through for half-pel search*/
+    uint8 *subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
+    uint8 **hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */
+
+    int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2};
+    int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2};
+    int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1};
+    int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1};
+    int h, hmin, q, qmin;
+
+    OSCL_UNUSED_ARG(xpos);
+    OSCL_UNUSED_ARG(ypos);
+    OSCL_UNUSED_ARG(hp_guess);
+
+    GenerateHalfPelPred(subpel_pred, ncand, lx);
+
+    cur = encvid->currYMB; // pre-load current original MB
+
+    cand = hpel_cand[0];
+
+    // find cost for the current full-pel position
+    dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD
+    mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy);
+    satd_min = dmin;
+    dmin += mvcost;
+    hmin = 0;
+
+    /* find half-pel */
+    for (h = 1; h < 9; h++)
+    {
+        d = SATD_MB(hpel_cand[h], cur, dmin);
+        mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy);
+        d += mvcost;
+
+        if (d < dmin)
+        {
+            dmin = d;
+            hmin = h;
+            satd_min = d - mvcost;
+        }
+    }
+
+    mot->sad = dmin;
+    mot->x += xh[hmin];
+    mot->y += yh[hmin];
+    encvid->best_hpel_pos = hmin;
+
+    /*** search for quarter-pel ****/
+    GenerateQuartPelPred(encvid->bilin_base[hmin], &(encvid->qpel_cand[0][0]), hmin);
+
+    encvid->best_qpel_pos = qmin = -1;
+
+    for (q = 0; q < 8; q++)
+    {
+        d = SATD_MB(encvid->qpel_cand[q], cur, dmin);
+        mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy);
+        d += mvcost;
+        if (d < dmin)
+        {
+            dmin = d;
+            qmin = q;
+            satd_min = d - mvcost;
+        }
+    }
+
+    if (qmin != -1)
+    {
+        mot->sad = dmin;
+        mot->x += xq[qmin];
+        mot->y += yq[qmin];
+        encvid->best_qpel_pos = qmin;
+    }
+
+    return satd_min;
+}
+
+
+
+/** This function generates sub-pel prediction around the full-pel candidate.
+Each sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */
+/** The sub-pel position is labeled in spiral manner from the center. */
+
+void GenerateHalfPelPred(uint8* subpel_pred, uint8 *ncand, int lx)
+{
+    /* let's do straightforward way first */
+    uint8 *ref;
+    uint8 *dst;
+    uint8 tmp8;
+    int32 tmp32;
+    int16 tmp_horz[18*22], *dst_16, *src_16;
+    register int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; // temp register
+    int msk;
+    int i, j;
+
+    /* first copy full-pel to the first array */
+    /* to be optimized later based on byte-offset load */
+    ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */
+    dst = subpel_pred;
+
+    dst -= 4; /* offset */
+    for (j = 0; j < 22; j++) /* 24x22 */
+    {
+        i = 6;
+        while (i > 0)
+        {
+            tmp32 = *ref++;
+            tmp8 = *ref++;
+            tmp32 |= (tmp8 << 8);
+            tmp8 = *ref++;
+            tmp32 |= (tmp8 << 16);
+            tmp8 = *ref++;
+            tmp32 |= (tmp8 << 24);
+            *((uint32*)(dst += 4)) = tmp32;
+            i--;
+        }
+        ref += (lx - 24);
+    }
+
+    /* from the first array, we do horizontal interp */
+    ref = subpel_pred + 2;
+    dst_16 = tmp_horz; /* 17 x 22 */
+
+    for (j = 4; j > 0; j--)
+    {
+        for (i = 16; i > 0; i -= 4)
+        {
+            a = ref[-2];
+            b = ref[-1];
+            c = ref[0];
+            d = ref[1];
+            e = ref[2];
+            f = ref[3];
+            *dst_16++ = a + f - 5 * (b + e) + 20 * (c + d);
+            a = ref[4];
+            *dst_16++ = b + a - 5 * (c + f) + 20 * (d + e);
+            b = ref[5];
+            *dst_16++ = c + b - 5 * (d + a) + 20 * (e + f);
+            c = ref[6];
+            *dst_16++ = d + c - 5 * (e + b) + 20 * (f + a);
+
+            ref += 4;
+        }
+        /* do the 17th column here */
+        d = ref[3];
+        *dst_16 =  e + d - 5 * (f + c) + 20 * (a + b);
+        dst_16 += 2; /* stride for tmp_horz is 18 */
+        ref += 8;  /* stride for ref is 24 */
+        if (j == 3)  // move 18 lines down
+        {
+            dst_16 += 324;//18*18;
+            ref += 432;//18*24;
+        }
+    }
+
+    ref -= 480;//20*24;
+    dst_16 -= 360;//20*18;
+    dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/
+
+    for (j = 18; j > 0; j--)
+    {
+        for (i = 16; i > 0; i -= 4)
+        {
+            a = ref[-2];
+            b = ref[-1];
+            c = ref[0];
+            d = ref[1];
+            e = ref[2];
+            f = ref[3];
+            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+            *dst_16++ = tmp32;
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *dst++ = tmp32;
+
+            a = ref[4];
+            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+            *dst_16++ = tmp32;
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *dst++ = tmp32;
+
+            b = ref[5];
+            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+            *dst_16++ = tmp32;
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *dst++ = tmp32;
+
+            c = ref[6];
+            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+            *dst_16++ = tmp32;
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *dst++ = tmp32;
+
+            ref += 4;
+        }
+        /* do the 17th column here */
+        d = ref[3];
+        tmp32 =  e + d - 5 * (f + c) + 20 * (a + b);
+        *dst_16 = tmp32;
+        tmp32 = (tmp32 + 16) >> 5;
+        CLIP_RESULT(tmp32)
+        *dst = tmp32;
+
+        dst += 8;  /* stride for dst is 24 */
+        dst_16 += 2; /* stride for tmp_horz is 18 */
+        ref += 8;  /* stride for ref is 24 */
+    }
+
+
+    /* Do middle point filtering*/
+    src_16 = tmp_horz; /* 17 x 22 */
+    dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/
+    dst -= 24; // offset
+    for (i = 0; i < 17; i++)
+    {
+        for (j = 16; j > 0; j -= 4)
+        {
+            a = *src_16;
+            b = *(src_16 += 18);
+            c = *(src_16 += 18);
+            d = *(src_16 += 18);
+            e = *(src_16 += 18);
+            f = *(src_16 += 18);
+
+            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+            tmp32 = (tmp32 + 512) >> 10;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;
+
+            a = *(src_16 += 18);
+            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+            tmp32 = (tmp32 + 512) >> 10;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;
+
+            b = *(src_16 += 18);
+            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+            tmp32 = (tmp32 + 512) >> 10;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;
+
+            c = *(src_16 += 18);
+            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+            tmp32 = (tmp32 + 512) >> 10;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;
+
+            src_16 -= (18 << 2);
+        }
+
+        d = src_16[90]; // 18*5
+        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+        tmp32 = (tmp32 + 512) >> 10;
+        CLIP_RESULT(tmp32)
+        dst[24] = tmp32;
+
+        src_16 -= ((18 << 4) - 1);
+        dst -= ((24 << 4) - 1);
+    }
+
+    /* do vertical interpolation */
+    ref = subpel_pred + 2;
+    dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */
+    dst -= 24; // offset
+
+    for (i = 2; i > 0; i--)
+    {
+        for (j = 16; j > 0; j -= 4)
+        {
+            a = *ref;
+            b = *(ref += 24);
+            c = *(ref += 24);
+            d = *(ref += 24);
+            e = *(ref += 24);
+            f = *(ref += 24);
+
+            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            a = *(ref += 24);
+            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            b = *(ref += 24);
+            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            c = *(ref += 24);
+            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            ref -= (24 << 2);
+        }
+
+        d = ref[120]; // 24*5
+        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+        tmp32 = (tmp32 + 16) >> 5;
+        CLIP_RESULT(tmp32)
+        dst[24] = tmp32;  // 10th
+
+        dst -= ((24 << 4) - 1);
+        ref -= ((24 << 4) - 1);
+    }
+
+    // note that using SIMD here doesn't help much, the cycle almost stays the same
+    // one can just use the above code and change the for(i=2 to for(i=18
+    for (i = 16; i > 0; i -= 4)
+    {
+        msk = 0;
+        for (j = 17; j > 0; j--)
+        {
+            a = *((uint32*)ref); /* load 4 bytes */
+            b = (a >> 8) & 0xFF00FF; /* second and fourth byte */
+            a &= 0xFF00FF;
+
+            c = *((uint32*)(ref + 120));
+            d = (c >> 8) & 0xFF00FF;
+            c &= 0xFF00FF;
+
+            a += c;
+            b += d;
+
+            e = *((uint32*)(ref + 72)); /* e, f */
+            f = (e >> 8) & 0xFF00FF;
+            e &= 0xFF00FF;
+
+            c = *((uint32*)(ref + 48)); /* c, d */
+            d = (c >> 8) & 0xFF00FF;
+            c &= 0xFF00FF;
+
+            c += e;
+            d += f;
+
+            a += 20 * c;
+            b += 20 * d;
+            a += 0x100010;
+            b += 0x100010;
+
+            e = *((uint32*)(ref += 24)); /* e, f */
+            f = (e >> 8) & 0xFF00FF;
+            e &= 0xFF00FF;
+
+            c = *((uint32*)(ref + 72)); /* c, d */
+            d = (c >> 8) & 0xFF00FF;
+            c &= 0xFF00FF;
+
+            c += e;
+            d += f;
+
+            a -= 5 * c;
+            b -= 5 * d;
+
+            c = a << 16;
+            d = b << 16;
+            CLIP_UPPER16(a)
+            CLIP_UPPER16(c)
+            CLIP_UPPER16(b)
+            CLIP_UPPER16(d)
+
+            a |= (c >> 16);
+            b |= (d >> 16);
+            //  a>>=5;
+            //  b>>=5;
+            /* clip */
+            //  msk |= b;  msk|=a;
+            //  a &= 0xFF00FF;
+            //  b &= 0xFF00FF;
+            a |= (b << 8);  /* pack it back */
+
+            *((uint16*)(dst += 24)) = a & 0xFFFF; //dst is not word-aligned.
+            *((uint16*)(dst + 2)) = a >> 16;
+
+        }
+        dst -= 404; // 24*17-4
+        ref -= 404;
+        /*      if(msk & 0xFF00FF00) // need clipping
+                {
+                    VertInterpWClip(dst,ref); // re-do 4 column with clip
+                }*/
+    }
+
+    return ;
+}
+
+void VertInterpWClip(uint8 *dst, uint8 *ref)
+{
+    int i, j;
+    int a, b, c, d, e, f;
+    int32 tmp32;
+
+    dst -= 4;
+    ref -= 4;
+
+    for (i = 4; i > 0; i--)
+    {
+        for (j = 16; j > 0; j -= 4)
+        {
+            a = *ref;
+            b = *(ref += 24);
+            c = *(ref += 24);
+            d = *(ref += 24);
+            e = *(ref += 24);
+            f = *(ref += 24);
+
+            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            a = *(ref += 24);
+            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            b = *(ref += 24);
+            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            c = *(ref += 24);
+            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+            tmp32 = (tmp32 + 16) >> 5;
+            CLIP_RESULT(tmp32)
+            *(dst += 24) = tmp32;  // 10th
+
+            ref -= (24 << 2);
+        }
+
+        d = ref[120]; // 24*5
+        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+        tmp32 = (tmp32 + 16) >> 5;
+        CLIP_RESULT(tmp32)
+        dst[24] = tmp32;  // 10th
+
+        dst -= ((24 << 4) - 1);
+        ref -= ((24 << 4) - 1);
+    }
+
+    return ;
+}
+
+
+void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_cand, int hpel_pos)
+{
+    // for even value of hpel_pos, start with pattern 1, otherwise, start with pattern 2
+    int i, j;
+
+    uint8 *c1 = qpel_cand;
+    uint8 *tl = bilin_base[0];
+    uint8 *tr = bilin_base[1];
+    uint8 *bl = bilin_base[2];
+    uint8 *br = bilin_base[3];
+    int a, b, c, d;
+    int offset = 1 - (384 * 7);
+
+    if (!(hpel_pos&1)) // diamond pattern
+    {
+        j = 16;
+        while (j--)
+        {
+            i = 16;
+            while (i--)
+            {
+                d = tr[24];
+                a = *tr++;
+                b = bl[1];
+                c = *br++;
+
+                *c1 = (c + a + 1) >> 1;
+                *(c1 += 384) = (b + a + 1) >> 1; /* c2 */
+                *(c1 += 384) = (b + c + 1) >> 1; /* c3 */
+                *(c1 += 384) = (b + d + 1) >> 1; /* c4 */
+
+                b = *bl++;
+
+                *(c1 += 384) = (c + d + 1) >> 1;  /* c5 */
+                *(c1 += 384) = (b + d + 1) >> 1;  /* c6 */
+                *(c1 += 384) = (b + c + 1) >> 1;  /* c7 */
+                *(c1 += 384) = (b + a + 1) >> 1;  /* c8 */
+
+                c1 += offset;
+            }
+            // advance to the next line, pitch is 24
+            tl += 8;
+            tr += 8;
+            bl += 8;
+            br += 8;
+            c1 += 8;
+        }
+    }
+    else // star pattern
+    {
+        j = 16;
+        while (j--)
+        {
+            i = 16;
+            while (i--)
+            {
+                a = *br++;
+                b = *tr++;
+                c = tl[1];
+                *c1 = (a + b + 1) >> 1;
+                b = bl[1];
+                *(c1 += 384) = (a + c + 1) >> 1; /* c2 */
+                c = tl[25];
+                *(c1 += 384) = (a + b + 1) >> 1; /* c3 */
+                b = tr[23];
+                *(c1 += 384) = (a + c + 1) >> 1; /* c4 */
+                c = tl[24];
+                *(c1 += 384) = (a + b + 1) >> 1; /* c5 */
+                b = *bl++;
+                *(c1 += 384) = (a + c + 1) >> 1; /* c6 */
+                c = *tl++;
+                *(c1 += 384) = (a + b + 1) >> 1; /* c7 */
+                *(c1 += 384) = (a + c + 1) >> 1; /* c8 */
+
+                c1 += offset;
+            }
+            // advance to the next line, pitch is 24
+            tl += 8;
+            tr += 8;
+            bl += 8;
+            br += 8;
+            c1 += 8;
+        }
+    }
+
+    return ;
+}
+
+
+/* assuming cand always has a pitch of 24 */
+int SATD_MB(uint8 *cand, uint8 *cur, int dmin)
+{
+    int cost;
+
+
+    dmin = (dmin << 16) | 24;
+    cost = AVCSAD_Macroblock_C(cand, cur, dmin, NULL);
+
+    return cost;
+}
+
+
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/header.cpp b/media/libstagefright/codecs/avc/enc/src/header.cpp
new file mode 100644
index 0000000..9acff9e
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/header.cpp
@@ -0,0 +1,917 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_api.h"
+
+/** see subclause 7.4.2.1 */
+/* no need for checking the valid range , already done in SetEncodeParam(),
+if we have to send another SPS, the ranges should be verified first before
+users call PVAVCEncodeSPS() */
+AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCSeqParamSet *seqParam = video->currSeqParams;
+    AVCVUIParams *vui = &(seqParam->vui_parameters);
+    int i;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+
+    //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"EncodeSPS",-1,-1);
+
+    status = BitstreamWriteBits(stream, 8, seqParam->profile_idc);
+    status = BitstreamWrite1Bit(stream, seqParam->constrained_set0_flag);
+    status = BitstreamWrite1Bit(stream, seqParam->constrained_set1_flag);
+    status = BitstreamWrite1Bit(stream, seqParam->constrained_set2_flag);
+    status = BitstreamWrite1Bit(stream, seqParam->constrained_set3_flag);
+    status = BitstreamWriteBits(stream, 4, 0);  /* forbidden zero bits */
+    if (status != AVCENC_SUCCESS)  /* we can check after each write also */
+    {
+        return status;
+    }
+
+    status = BitstreamWriteBits(stream, 8, seqParam->level_idc);
+    status = ue_v(stream, seqParam->seq_parameter_set_id);
+    status = ue_v(stream, seqParam->log2_max_frame_num_minus4);
+    status = ue_v(stream, seqParam->pic_order_cnt_type);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    if (seqParam->pic_order_cnt_type == 0)
+    {
+        status = ue_v(stream, seqParam->log2_max_pic_order_cnt_lsb_minus4);
+    }
+    else if (seqParam->pic_order_cnt_type == 1)
+    {
+        status = BitstreamWrite1Bit(stream, seqParam->delta_pic_order_always_zero_flag);
+        status = se_v(stream, seqParam->offset_for_non_ref_pic); /* upto 32 bits */
+        status = se_v(stream, seqParam->offset_for_top_to_bottom_field); /* upto 32 bits */
+        status = ue_v(stream, seqParam->num_ref_frames_in_pic_order_cnt_cycle);
+
+        for (i = 0; i < (int)(seqParam->num_ref_frames_in_pic_order_cnt_cycle); i++)
+        {
+            status = se_v(stream, seqParam->offset_for_ref_frame[i]); /* upto 32 bits */
+        }
+    }
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = ue_v(stream, seqParam->num_ref_frames);
+    status = BitstreamWrite1Bit(stream, seqParam->gaps_in_frame_num_value_allowed_flag);
+    status = ue_v(stream, seqParam->pic_width_in_mbs_minus1);
+    status = ue_v(stream, seqParam->pic_height_in_map_units_minus1);
+    status = BitstreamWrite1Bit(stream, seqParam->frame_mbs_only_flag);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+    /* if frame_mbs_only_flag is 0, then write, mb_adaptive_frame_field_frame here */
+
+    status = BitstreamWrite1Bit(stream, seqParam->direct_8x8_inference_flag);
+    status = BitstreamWrite1Bit(stream, seqParam->frame_cropping_flag);
+    if (seqParam->frame_cropping_flag)
+    {
+        status = ue_v(stream, seqParam->frame_crop_left_offset);
+        status = ue_v(stream, seqParam->frame_crop_right_offset);
+        status = ue_v(stream, seqParam->frame_crop_top_offset);
+        status = ue_v(stream, seqParam->frame_crop_bottom_offset);
+    }
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = BitstreamWrite1Bit(stream, seqParam->vui_parameters_present_flag);
+    if (seqParam->vui_parameters_present_flag)
+    {
+        /* not supported */
+        //return AVCENC_SPS_FAIL;
+        EncodeVUI(stream, vui);
+    }
+
+    return status;
+}
+
+
+void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui)
+{
+    int temp;
+
+    temp = vui->aspect_ratio_info_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        BitstreamWriteBits(stream, 8, vui->aspect_ratio_idc);
+        if (vui->aspect_ratio_idc == 255)
+        {
+            BitstreamWriteBits(stream, 16, vui->sar_width);
+            BitstreamWriteBits(stream, 16, vui->sar_height);
+        }
+    }
+    temp = vui->overscan_info_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        BitstreamWrite1Bit(stream, vui->overscan_appropriate_flag);
+    }
+    temp = vui->video_signal_type_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        BitstreamWriteBits(stream, 3, vui->video_format);
+        BitstreamWrite1Bit(stream, vui->video_full_range_flag);
+        temp = vui->colour_description_present_flag;
+        BitstreamWrite1Bit(stream, temp);
+        if (temp)
+        {
+            BitstreamWriteBits(stream, 8, vui->colour_primaries);
+            BitstreamWriteBits(stream, 8, vui->transfer_characteristics);
+            BitstreamWriteBits(stream, 8, vui->matrix_coefficients);
+        }
+    }
+    temp = vui->chroma_location_info_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        ue_v(stream, vui->chroma_sample_loc_type_top_field);
+        ue_v(stream, vui->chroma_sample_loc_type_bottom_field);
+    }
+
+    temp = vui->timing_info_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        BitstreamWriteBits(stream, 32, vui->num_units_in_tick);
+        BitstreamWriteBits(stream, 32, vui->time_scale);
+        BitstreamWrite1Bit(stream, vui->fixed_frame_rate_flag);
+    }
+
+    temp = vui->nal_hrd_parameters_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        EncodeHRD(stream, &(vui->nal_hrd_parameters));
+    }
+    temp = vui->vcl_hrd_parameters_present_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        EncodeHRD(stream, &(vui->vcl_hrd_parameters));
+    }
+    if (vui->nal_hrd_parameters_present_flag || vui->vcl_hrd_parameters_present_flag)
+    {
+        BitstreamWrite1Bit(stream, vui->low_delay_hrd_flag);
+    }
+    BitstreamWrite1Bit(stream, vui->pic_struct_present_flag);
+    temp = vui->bitstream_restriction_flag;
+    BitstreamWrite1Bit(stream, temp);
+    if (temp)
+    {
+        BitstreamWrite1Bit(stream, vui->motion_vectors_over_pic_boundaries_flag);
+        ue_v(stream, vui->max_bytes_per_pic_denom);
+        ue_v(stream, vui->max_bits_per_mb_denom);
+        ue_v(stream, vui->log2_max_mv_length_horizontal);
+        ue_v(stream, vui->log2_max_mv_length_vertical);
+        ue_v(stream, vui->max_dec_frame_reordering);
+        ue_v(stream, vui->max_dec_frame_buffering);
+    }
+
+    return ;
+}
+
+
+void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd)
+{
+    int i;
+
+    ue_v(stream, hrd->cpb_cnt_minus1);
+    BitstreamWriteBits(stream, 4, hrd->bit_rate_scale);
+    BitstreamWriteBits(stream, 4, hrd->cpb_size_scale);
+    for (i = 0; i <= (int)hrd->cpb_cnt_minus1; i++)
+    {
+        ue_v(stream, hrd->bit_rate_value_minus1[i]);
+        ue_v(stream, hrd->cpb_size_value_minus1[i]);
+        ue_v(stream, hrd->cbr_flag[i]);
+    }
+    BitstreamWriteBits(stream, 5, hrd->initial_cpb_removal_delay_length_minus1);
+    BitstreamWriteBits(stream, 5, hrd->cpb_removal_delay_length_minus1);
+    BitstreamWriteBits(stream, 5, hrd->dpb_output_delay_length_minus1);
+    BitstreamWriteBits(stream, 5, hrd->time_offset_length);
+
+    return ;
+}
+
+
+
+/** see subclause 7.4.2.2 */
+/* no need for checking the valid range , already done in SetEncodeParam().
+If we have to send another SPS, the ranges should be verified first before
+users call PVAVCEncodeSPS()*/
+AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    AVCPicParamSet *picParam = video->currPicParams;
+    int i, iGroup, numBits;
+    uint temp;
+
+    status = ue_v(stream, picParam->pic_parameter_set_id);
+    status = ue_v(stream, picParam->seq_parameter_set_id);
+    status = BitstreamWrite1Bit(stream, picParam->entropy_coding_mode_flag);
+    status = BitstreamWrite1Bit(stream, picParam->pic_order_present_flag);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = ue_v(stream, picParam->num_slice_groups_minus1);
+    if (picParam->num_slice_groups_minus1 > 0)
+    {
+        status = ue_v(stream, picParam->slice_group_map_type);
+        if (picParam->slice_group_map_type == 0)
+        {
+            for (iGroup = 0; iGroup <= (int)picParam->num_slice_groups_minus1; iGroup++)
+            {
+                status = ue_v(stream, picParam->run_length_minus1[iGroup]);
+            }
+        }
+        else if (picParam->slice_group_map_type == 2)
+        {
+            for (iGroup = 0; iGroup < (int)picParam->num_slice_groups_minus1; iGroup++)
+            {
+                status = ue_v(stream, picParam->top_left[iGroup]);
+                status = ue_v(stream, picParam->bottom_right[iGroup]);
+            }
+        }
+        else if (picParam->slice_group_map_type == 3 ||
+                 picParam->slice_group_map_type == 4 ||
+                 picParam->slice_group_map_type == 5)
+        {
+            status = BitstreamWrite1Bit(stream, picParam->slice_group_change_direction_flag);
+            status = ue_v(stream, picParam->slice_group_change_rate_minus1);
+        }
+        else /*if(picParam->slice_group_map_type == 6)*/
+        {
+            status = ue_v(stream, picParam->pic_size_in_map_units_minus1);
+
+            numBits = 0;/* ceil(log2(num_slice_groups_minus1+1)) bits */
+            i = picParam->num_slice_groups_minus1;
+            while (i > 0)
+            {
+                numBits++;
+                i >>= 1;
+            }
+
+            for (i = 0; i <= (int)picParam->pic_size_in_map_units_minus1; i++)
+            {
+                status = BitstreamWriteBits(stream, numBits, picParam->slice_group_id[i]);
+            }
+        }
+    }
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = ue_v(stream, picParam->num_ref_idx_l0_active_minus1);
+    status = ue_v(stream, picParam->num_ref_idx_l1_active_minus1);
+    status = BitstreamWrite1Bit(stream, picParam->weighted_pred_flag);
+    status = BitstreamWriteBits(stream, 2, picParam->weighted_bipred_idc);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = se_v(stream, picParam->pic_init_qp_minus26);
+    status = se_v(stream, picParam->pic_init_qs_minus26);
+    status = se_v(stream, picParam->chroma_qp_index_offset);
+
+    temp = picParam->deblocking_filter_control_present_flag << 2;
+    temp |= (picParam->constrained_intra_pred_flag << 1);
+    temp |= picParam->redundant_pic_cnt_present_flag;
+
+    status = BitstreamWriteBits(stream, 3, temp);
+
+    return status;
+}
+
+/** see subclause 7.4.3 */
+AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCPicParamSet *currPPS = video->currPicParams;
+    AVCSeqParamSet *currSPS = video->currSeqParams;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    int slice_type, temp, i;
+    int num_bits;
+
+    num_bits = (stream->write_pos << 3) - stream->bit_left;
+
+    status = ue_v(stream, sliceHdr->first_mb_in_slice);
+
+    slice_type = video->slice_type;
+
+    if (video->mbNum == 0) /* first mb in frame */
+    {
+        status = ue_v(stream, sliceHdr->slice_type);
+    }
+    else
+    {
+        status = ue_v(stream, slice_type);
+    }
+
+    status = ue_v(stream, sliceHdr->pic_parameter_set_id);
+
+    status = BitstreamWriteBits(stream, currSPS->log2_max_frame_num_minus4 + 4, sliceHdr->frame_num);
+
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+    /* if frame_mbs_only_flag is 0, encode field_pic_flag, bottom_field_flag here */
+
+    if (video->nal_unit_type == AVC_NALTYPE_IDR)
+    {
+        status = ue_v(stream, sliceHdr->idr_pic_id);
+    }
+
+    if (currSPS->pic_order_cnt_type == 0)
+    {
+        status = BitstreamWriteBits(stream, currSPS->log2_max_pic_order_cnt_lsb_minus4 + 4,
+                                    sliceHdr->pic_order_cnt_lsb);
+
+        if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag)
+        {
+            status = se_v(stream, sliceHdr->delta_pic_order_cnt_bottom); /* 32 bits */
+        }
+    }
+    if (currSPS->pic_order_cnt_type == 1 && !currSPS->delta_pic_order_always_zero_flag)
+    {
+        status = se_v(stream, sliceHdr->delta_pic_order_cnt[0]);    /* 32 bits */
+        if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag)
+        {
+            status = se_v(stream, sliceHdr->delta_pic_order_cnt[1]); /* 32 bits */
+        }
+    }
+
+    if (currPPS->redundant_pic_cnt_present_flag)
+    {
+        status = ue_v(stream, sliceHdr->redundant_pic_cnt);
+    }
+
+    if (slice_type == AVC_B_SLICE)
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->direct_spatial_mv_pred_flag);
+    }
+
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    if (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE || slice_type == AVC_B_SLICE)
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->num_ref_idx_active_override_flag);
+        if (sliceHdr->num_ref_idx_active_override_flag)
+        {
+            /* we shouldn't enter this part at all */
+            status = ue_v(stream, sliceHdr->num_ref_idx_l0_active_minus1);
+            if (slice_type == AVC_B_SLICE)
+            {
+                status = ue_v(stream, sliceHdr->num_ref_idx_l1_active_minus1);
+            }
+        }
+    }
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    /* ref_pic_list_reordering() */
+    status = ref_pic_list_reordering(video, stream, sliceHdr, slice_type);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) ||
+            (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE))
+    {
+        //      pred_weight_table(); // not supported !!
+        return AVCENC_PRED_WEIGHT_TAB_FAIL;
+    }
+
+    if (video->nal_ref_idc != 0)
+    {
+        status = dec_ref_pic_marking(video, stream, sliceHdr);
+        if (status != AVCENC_SUCCESS)
+        {
+            return status;
+        }
+    }
+
+    if (currPPS->entropy_coding_mode_flag && slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+    {
+        return AVCENC_CABAC_FAIL;
+        /*      ue_v(stream,&(sliceHdr->cabac_init_idc));
+                if(sliceHdr->cabac_init_idc > 2){
+                    // not supported !!!!
+                }*/
+    }
+
+    status = se_v(stream, sliceHdr->slice_qp_delta);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    if (slice_type == AVC_SP_SLICE || slice_type == AVC_SI_SLICE)
+    {
+        if (slice_type == AVC_SP_SLICE)
+        {
+            status = BitstreamWrite1Bit(stream, sliceHdr->sp_for_switch_flag);
+            /* if sp_for_switch_flag is 0, P macroblocks in SP slice is decoded using
+            SP decoding process for non-switching pictures in 8.6.1 */
+            /* else, P macroblocks in SP slice is decoded using SP and SI decoding
+            process for switching picture in 8.6.2 */
+        }
+        status = se_v(stream, sliceHdr->slice_qs_delta);
+        if (status != AVCENC_SUCCESS)
+        {
+            return status;
+        }
+    }
+
+    if (currPPS->deblocking_filter_control_present_flag)
+    {
+
+        status = ue_v(stream, sliceHdr->disable_deblocking_filter_idc);
+
+        if (sliceHdr->disable_deblocking_filter_idc != 1)
+        {
+            status = se_v(stream, sliceHdr->slice_alpha_c0_offset_div2);
+
+            status = se_v(stream, sliceHdr->slice_beta_offset_div_2);
+        }
+        if (status != AVCENC_SUCCESS)
+        {
+            return status;
+        }
+    }
+
+    if (currPPS->num_slice_groups_minus1 > 0 && currPPS->slice_group_map_type >= 3
+            && currPPS->slice_group_map_type <= 5)
+    {
+        /* Ceil(Log2(PicSizeInMapUnits/(float)SliceGroupChangeRate + 1)) */
+        temp = video->PicSizeInMapUnits / video->SliceGroupChangeRate;
+        if (video->PicSizeInMapUnits % video->SliceGroupChangeRate)
+        {
+            temp++;
+        }
+        i = 0;
+        while (temp > 1)
+        {
+            temp >>= 1;
+            i++;
+        }
+
+        BitstreamWriteBits(stream, i, sliceHdr->slice_group_change_cycle);
+    }
+
+
+    encvid->rateCtrl->NumberofHeaderBits += (stream->write_pos << 3) - stream->bit_left - num_bits;
+
+    return AVCENC_SUCCESS;
+}
+
+/** see subclause 7.4.3.1 */
+AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type)
+{
+    (void)(video);
+    int i;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+
+    if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l0);
+        if (sliceHdr->ref_pic_list_reordering_flag_l0)
+        {
+            i = 0;
+            do
+            {
+                status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l0[i]);
+                if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 ||
+                        sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1)
+                {
+                    status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l0[i]);
+                    /* this check should be in InitSlice(), if we ever use it */
+                    /*if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 &&
+                        sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -1)
+                    {
+                        return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+                    }
+                    if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1 &&
+                        sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -2)
+                    {
+                        return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+                    }*/
+                }
+                else if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 2)
+                {
+                    status = ue_v(stream, sliceHdr->long_term_pic_num_l0[i]);
+                }
+                i++;
+            }
+            while (sliceHdr->reordering_of_pic_nums_idc_l0[i] != 3
+                    && i <= (int)sliceHdr->num_ref_idx_l0_active_minus1 + 1) ;
+        }
+    }
+    if (slice_type == AVC_B_SLICE)
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l1);
+        if (sliceHdr->ref_pic_list_reordering_flag_l1)
+        {
+            i = 0;
+            do
+            {
+                status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l1[i]);
+                if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 ||
+                        sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1)
+                {
+                    status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l1[i]);
+                    /* This check should be in InitSlice() if we ever use it
+                    if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 &&
+                        sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -1)
+                    {
+                        return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+                    }
+                    if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1 &&
+                        sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -2)
+                    {
+                        return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+                    }*/
+                }
+                else if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 2)
+                {
+                    status = ue_v(stream, sliceHdr->long_term_pic_num_l1[i]);
+                }
+                i++;
+            }
+            while (sliceHdr->reordering_of_pic_nums_idc_l1[i] != 3
+                    && i <= (int)sliceHdr->num_ref_idx_l1_active_minus1 + 1) ;
+        }
+    }
+
+    return status;
+}
+
+/** see subclause 7.4.3.3 */
+AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr)
+{
+    int i;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+
+    if (video->nal_unit_type == AVC_NALTYPE_IDR)
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->no_output_of_prior_pics_flag);
+        status = BitstreamWrite1Bit(stream, sliceHdr->long_term_reference_flag);
+        if (sliceHdr->long_term_reference_flag == 0) /* used for short-term */
+        {
+            video->MaxLongTermFrameIdx = -1; /* no long-term frame indx */
+        }
+        else /* used for long-term */
+        {
+            video->MaxLongTermFrameIdx = 0;
+            video->LongTermFrameIdx = 0;
+        }
+    }
+    else
+    {
+        status = BitstreamWrite1Bit(stream, sliceHdr->adaptive_ref_pic_marking_mode_flag); /* default to zero */
+        if (sliceHdr->adaptive_ref_pic_marking_mode_flag)
+        {
+            i = 0;
+            do
+            {
+                status = ue_v(stream, sliceHdr->memory_management_control_operation[i]);
+                if (sliceHdr->memory_management_control_operation[i] == 1 ||
+                        sliceHdr->memory_management_control_operation[i] == 3)
+                {
+                    status = ue_v(stream, sliceHdr->difference_of_pic_nums_minus1[i]);
+                }
+                if (sliceHdr->memory_management_control_operation[i] == 2)
+                {
+                    status = ue_v(stream, sliceHdr->long_term_pic_num[i]);
+                }
+                if (sliceHdr->memory_management_control_operation[i] == 3 ||
+                        sliceHdr->memory_management_control_operation[i] == 6)
+                {
+                    status = ue_v(stream, sliceHdr->long_term_frame_idx[i]);
+                }
+                if (sliceHdr->memory_management_control_operation[i] == 4)
+                {
+                    status = ue_v(stream, sliceHdr->max_long_term_frame_idx_plus1[i]);
+                }
+                i++;
+            }
+            while (sliceHdr->memory_management_control_operation[i] != 0 && i < MAX_DEC_REF_PIC_MARKING);
+            if (i >= MAX_DEC_REF_PIC_MARKING && sliceHdr->memory_management_control_operation[i] != 0)
+            {
+                return AVCENC_DEC_REF_PIC_MARK_FAIL; /* we're screwed!!, not enough memory */
+            }
+        }
+    }
+
+    return status;
+}
+
+/* see subclause 8.2.1 Decoding process for picture order count.
+See also PostPOC() for initialization of some variables. */
+AVCEnc_Status InitPOC(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCSeqParamSet *currSPS = video->currSeqParams;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCFrameIO  *currInput = encvid->currInput;
+    int i;
+
+    switch (currSPS->pic_order_cnt_type)
+    {
+        case 0: /* POC MODE 0 , subclause 8.2.1.1 */
+            /* encoding part */
+            if (video->nal_unit_type == AVC_NALTYPE_IDR)
+            {
+                encvid->dispOrdPOCRef = currInput->disp_order;
+            }
+            while (currInput->disp_order < encvid->dispOrdPOCRef)
+            {
+                encvid->dispOrdPOCRef -= video->MaxPicOrderCntLsb;
+            }
+            sliceHdr->pic_order_cnt_lsb = currInput->disp_order - encvid->dispOrdPOCRef;
+            while (sliceHdr->pic_order_cnt_lsb >= video->MaxPicOrderCntLsb)
+            {
+                sliceHdr->pic_order_cnt_lsb -= video->MaxPicOrderCntLsb;
+            }
+            /* decoding part */
+            /* Calculate the MSBs of current picture */
+            if (video->nal_unit_type == AVC_NALTYPE_IDR)
+            {
+                video->prevPicOrderCntMsb = 0;
+                video->prevPicOrderCntLsb = 0;
+            }
+            if (sliceHdr->pic_order_cnt_lsb  <  video->prevPicOrderCntLsb  &&
+                    (video->prevPicOrderCntLsb - sliceHdr->pic_order_cnt_lsb)  >= (video->MaxPicOrderCntLsb / 2))
+                video->PicOrderCntMsb = video->prevPicOrderCntMsb + video->MaxPicOrderCntLsb;
+            else if (sliceHdr->pic_order_cnt_lsb  >  video->prevPicOrderCntLsb  &&
+                     (sliceHdr->pic_order_cnt_lsb - video->prevPicOrderCntLsb)  > (video->MaxPicOrderCntLsb / 2))
+                video->PicOrderCntMsb = video->prevPicOrderCntMsb - video->MaxPicOrderCntLsb;
+            else
+                video->PicOrderCntMsb = video->prevPicOrderCntMsb;
+
+            /* JVT-I010 page 81 is different from JM7.3 */
+            if (!sliceHdr->field_pic_flag || !sliceHdr->bottom_field_flag)
+            {
+                video->PicOrderCnt = video->TopFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb;
+            }
+
+            if (!sliceHdr->field_pic_flag)
+            {
+                video->BottomFieldOrderCnt = video->TopFieldOrderCnt + sliceHdr->delta_pic_order_cnt_bottom;
+            }
+            else if (sliceHdr->bottom_field_flag)
+            {
+                video->PicOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb;
+            }
+
+            if (!sliceHdr->field_pic_flag)
+            {
+                video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt);
+            }
+
+            if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag)
+            {
+                sliceHdr->delta_pic_order_cnt_bottom = 0; /* defaulted to zero */
+            }
+
+            break;
+        case 1: /* POC MODE 1, subclause 8.2.1.2 */
+            /* calculate FrameNumOffset */
+            if (video->nal_unit_type == AVC_NALTYPE_IDR)
+            {
+                encvid->dispOrdPOCRef = currInput->disp_order;  /* reset the reference point */
+                video->prevFrameNumOffset = 0;
+                video->FrameNumOffset = 0;
+            }
+            else if (video->prevFrameNum > sliceHdr->frame_num)
+            {
+                video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum;
+            }
+            else
+            {
+                video->FrameNumOffset = video->prevFrameNumOffset;
+            }
+            /* calculate absFrameNum */
+            if (currSPS->num_ref_frames_in_pic_order_cnt_cycle)
+            {
+                video->absFrameNum = video->FrameNumOffset + sliceHdr->frame_num;
+            }
+            else
+            {
+                video->absFrameNum = 0;
+            }
+
+            if (video->absFrameNum > 0 && video->nal_ref_idc == 0)
+            {
+                video->absFrameNum--;
+            }
+
+            /* derive picOrderCntCycleCnt and frameNumInPicOrderCntCycle */
+            if (video->absFrameNum > 0)
+            {
+                video->picOrderCntCycleCnt = (video->absFrameNum - 1) / currSPS->num_ref_frames_in_pic_order_cnt_cycle;
+                video->frameNumInPicOrderCntCycle = (video->absFrameNum - 1) % currSPS->num_ref_frames_in_pic_order_cnt_cycle;
+            }
+            /* derive expectedDeltaPerPicOrderCntCycle, this value can be computed up front. */
+            video->expectedDeltaPerPicOrderCntCycle = 0;
+            for (i = 0; i < (int)currSPS->num_ref_frames_in_pic_order_cnt_cycle; i++)
+            {
+                video->expectedDeltaPerPicOrderCntCycle += currSPS->offset_for_ref_frame[i];
+            }
+            /* derive expectedPicOrderCnt */
+            if (video->absFrameNum)
+            {
+                video->expectedPicOrderCnt = video->picOrderCntCycleCnt * video->expectedDeltaPerPicOrderCntCycle;
+                for (i = 0; i <= video->frameNumInPicOrderCntCycle; i++)
+                {
+                    video->expectedPicOrderCnt += currSPS->offset_for_ref_frame[i];
+                }
+            }
+            else
+            {
+                video->expectedPicOrderCnt = 0;
+            }
+
+            if (video->nal_ref_idc == 0)
+            {
+                video->expectedPicOrderCnt += currSPS->offset_for_non_ref_pic;
+            }
+            /* derive TopFieldOrderCnt and BottomFieldOrderCnt */
+            /* encoding part */
+            if (!currSPS->delta_pic_order_always_zero_flag)
+            {
+                sliceHdr->delta_pic_order_cnt[0] = currInput->disp_order - encvid->dispOrdPOCRef - video->expectedPicOrderCnt;
+
+                if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag)
+                {
+                    sliceHdr->delta_pic_order_cnt[1] = sliceHdr->delta_pic_order_cnt[0]; /* should be calculated from currInput->bottom_field->disp_order */
+                }
+                else
+                {
+                    sliceHdr->delta_pic_order_cnt[1] = 0;
+                }
+            }
+            else
+            {
+                sliceHdr->delta_pic_order_cnt[0] = sliceHdr->delta_pic_order_cnt[1] = 0;
+            }
+
+            if (sliceHdr->field_pic_flag == 0)
+            {
+                video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0];
+                video->BottomFieldOrderCnt = video->TopFieldOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[1];
+
+                video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt);
+            }
+            else if (sliceHdr->bottom_field_flag == 0)
+            {
+                video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0];
+                video->PicOrderCnt = video->TopFieldOrderCnt;
+            }
+            else
+            {
+                video->BottomFieldOrderCnt = video->expectedPicOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[0];
+                video->PicOrderCnt = video->BottomFieldOrderCnt;
+            }
+            break;
+
+
+        case 2: /* POC MODE 2, subclause 8.2.1.3 */
+            /* decoding order must be the same as display order */
+            /* we don't check for that. The decoder will just output in decoding order. */
+            /* Check for 2 consecutive non-reference frame */
+            if (video->nal_ref_idc == 0)
+            {
+                if (encvid->dispOrdPOCRef == 1)
+                {
+                    return AVCENC_CONSECUTIVE_NONREF;
+                }
+                encvid->dispOrdPOCRef = 1;  /* act as a flag for non ref */
+            }
+            else
+            {
+                encvid->dispOrdPOCRef = 0;
+            }
+
+
+            if (video->nal_unit_type == AVC_NALTYPE_IDR)
+            {
+                video->FrameNumOffset = 0;
+            }
+            else if (video->prevFrameNum > sliceHdr->frame_num)
+            {
+                video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum;
+            }
+            else
+            {
+                video->FrameNumOffset = video->prevFrameNumOffset;
+            }
+            /* derive tempPicOrderCnt, we just use PicOrderCnt */
+            if (video->nal_unit_type == AVC_NALTYPE_IDR)
+            {
+                video->PicOrderCnt = 0;
+            }
+            else if (video->nal_ref_idc == 0)
+            {
+                video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num) - 1;
+            }
+            else
+            {
+                video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num);
+            }
+            /* derive TopFieldOrderCnt and BottomFieldOrderCnt */
+            if (sliceHdr->field_pic_flag == 0)
+            {
+                video->TopFieldOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCnt;
+            }
+            else if (sliceHdr->bottom_field_flag)
+            {
+                video->BottomFieldOrderCnt = video->PicOrderCnt;
+            }
+            else
+            {
+                video->TopFieldOrderCnt = video->PicOrderCnt;
+            }
+            break;
+        default:
+            return AVCENC_POC_FAIL;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+/** see subclause 8.2.1 */
+AVCEnc_Status PostPOC(AVCCommonObj *video)
+{
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCSeqParamSet *currSPS = video->currSeqParams;
+
+    video->prevFrameNum = sliceHdr->frame_num;
+
+    switch (currSPS->pic_order_cnt_type)
+    {
+        case 0: /* subclause 8.2.1.1 */
+            if (video->mem_mgr_ctrl_eq_5)
+            {
+                video->prevPicOrderCntMsb = 0;
+                video->prevPicOrderCntLsb = video->TopFieldOrderCnt;
+            }
+            else
+            {
+                video->prevPicOrderCntMsb = video->PicOrderCntMsb;
+                video->prevPicOrderCntLsb = sliceHdr->pic_order_cnt_lsb;
+            }
+            break;
+        case 1:  /* subclause 8.2.1.2 and 8.2.1.3 */
+        case 2:
+            if (video->mem_mgr_ctrl_eq_5)
+            {
+                video->prevFrameNumOffset = 0;
+            }
+            else
+            {
+                video->prevFrameNumOffset = video->FrameNumOffset;
+            }
+            break;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
diff --git a/media/libstagefright/codecs/avc/enc/src/init.cpp b/media/libstagefright/codecs/avc/enc/src/init.cpp
new file mode 100644
index 0000000..c258b57
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/init.cpp
@@ -0,0 +1,899 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_api.h"
+
+#define LOG2_MAX_FRAME_NUM_MINUS4   12   /* 12 default */
+#define SLICE_GROUP_CHANGE_CYCLE    1    /* default */
+
+/* initialized variables to be used in SPS*/
+AVCEnc_Status  SetEncodeParam(AVCHandle* avcHandle, AVCEncParams* encParam,
+                              void* extSPS, void* extPPS)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCCommonObj *video = encvid->common;
+    AVCSeqParamSet *seqParam = video->currSeqParams;
+    AVCPicParamSet *picParam = video->currPicParams;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCEnc_Status status;
+    void *userData = avcHandle->userData;
+    int ii, maxFrameNum;
+
+    AVCSeqParamSet* extS = NULL;
+    AVCPicParamSet* extP = NULL;
+
+    if (extSPS) extS = (AVCSeqParamSet*) extSPS;
+    if (extPPS) extP = (AVCPicParamSet*) extPPS;
+
+    /* This part sets the default values of the encoding options this
+    library supports in seqParam, picParam and sliceHdr structures and
+    also copy the values from the encParam into the above 3 structures.
+
+    Some parameters will be assigned later when we encode SPS or PPS such as
+    the seq_parameter_id or pic_parameter_id. Also some of the slice parameters
+    have to be re-assigned per slice basis such as frame_num, slice_type,
+    first_mb_in_slice, pic_order_cnt_lsb, slice_qp_delta, slice_group_change_cycle */
+
+    /* profile_idc, constrained_setx_flag and level_idc is set by VerifyProfile(),
+    and VerifyLevel() functions later. */
+
+    encvid->fullsearch_enable = encParam->fullsearch;
+
+    encvid->outOfBandParamSet = ((encParam->out_of_band_param_set == AVC_ON) ? TRUE : FALSE);
+
+    /* parameters derived from the the encParam that are used in SPS */
+    if (extS)
+    {
+        video->MaxPicOrderCntLsb =  1 << (extS->log2_max_pic_order_cnt_lsb_minus4 + 4);
+        video->PicWidthInMbs = extS->pic_width_in_mbs_minus1 + 1;
+        video->PicHeightInMapUnits = extS->pic_height_in_map_units_minus1 + 1 ;
+        video->FrameHeightInMbs = (2 - extS->frame_mbs_only_flag) * video->PicHeightInMapUnits ;
+    }
+    else
+    {
+        video->MaxPicOrderCntLsb =  1 << (encParam->log2_max_poc_lsb_minus_4 + 4);
+        video->PicWidthInMbs = (encParam->width + 15) >> 4; /* round it to multiple of 16 */
+        video->FrameHeightInMbs = (encParam->height + 15) >> 4; /* round it to multiple of 16 */
+        video->PicHeightInMapUnits = video->FrameHeightInMbs;
+    }
+
+    video->PicWidthInSamplesL = video->PicWidthInMbs * 16 ;
+    if (video->PicWidthInSamplesL + 32 > 0xFFFF)
+    {
+        return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch
+    }
+
+    video->PicWidthInSamplesC = video->PicWidthInMbs * 8 ;
+    video->PicHeightInMbs = video->FrameHeightInMbs;
+    video->PicSizeInMapUnits = video->PicWidthInMbs * video->PicHeightInMapUnits ;
+    video->PicHeightInSamplesL = video->PicHeightInMbs * 16;
+    video->PicHeightInSamplesC = video->PicHeightInMbs * 8;
+    video->PicSizeInMbs = video->PicWidthInMbs * video->PicHeightInMbs;
+
+    if (!extS && !extP)
+    {
+        maxFrameNum = (encParam->idr_period == -1) ? (1 << 16) : encParam->idr_period;
+        ii = 0;
+        while (maxFrameNum > 0)
+        {
+            ii++;
+            maxFrameNum >>= 1;
+        }
+        if (ii < 4) ii = 4;
+        else if (ii > 16) ii = 16;
+
+        seqParam->log2_max_frame_num_minus4 = ii - 4;//LOG2_MAX_FRAME_NUM_MINUS4; /* default */
+
+        video->MaxFrameNum = 1 << ii; //(LOG2_MAX_FRAME_NUM_MINUS4 + 4); /* default */
+        video->MaxPicNum = video->MaxFrameNum;
+
+        /************* set the SPS *******************/
+        seqParam->seq_parameter_set_id = 0; /* start with zero */
+        /* POC */
+        seqParam->pic_order_cnt_type = encParam->poc_type; /* POC type */
+        if (encParam->poc_type == 0)
+        {
+            if (/*encParam->log2_max_poc_lsb_minus_4<0 || (no need, it's unsigned)*/
+                encParam->log2_max_poc_lsb_minus_4 > 12)
+            {
+                return AVCENC_INVALID_POC_LSB;
+            }
+            seqParam->log2_max_pic_order_cnt_lsb_minus4 = encParam->log2_max_poc_lsb_minus_4;
+        }
+        else if (encParam->poc_type == 1)
+        {
+            seqParam->delta_pic_order_always_zero_flag = encParam->delta_poc_zero_flag;
+            seqParam->offset_for_non_ref_pic = encParam->offset_poc_non_ref;
+            seqParam->offset_for_top_to_bottom_field = encParam->offset_top_bottom;
+            seqParam->num_ref_frames_in_pic_order_cnt_cycle = encParam->num_ref_in_cycle;
+            if (encParam->offset_poc_ref == NULL)
+            {
+                return AVCENC_ENCPARAM_MEM_FAIL;
+            }
+            for (ii = 0; ii < encParam->num_ref_frame; ii++)
+            {
+                seqParam->offset_for_ref_frame[ii] = encParam->offset_poc_ref[ii];
+            }
+        }
+        /* number of reference frame */
+        if (encParam->num_ref_frame > 16 || encParam->num_ref_frame < 0)
+        {
+            return AVCENC_INVALID_NUM_REF;
+        }
+        seqParam->num_ref_frames = encParam->num_ref_frame; /* num reference frame range 0...16*/
+        seqParam->gaps_in_frame_num_value_allowed_flag = FALSE;
+        seqParam->pic_width_in_mbs_minus1 = video->PicWidthInMbs - 1;
+        seqParam->pic_height_in_map_units_minus1 = video->PicHeightInMapUnits - 1;
+        seqParam->frame_mbs_only_flag = TRUE;
+        seqParam->mb_adaptive_frame_field_flag = FALSE;
+        seqParam->direct_8x8_inference_flag = FALSE; /* default */
+        seqParam->frame_cropping_flag = FALSE;
+        seqParam->frame_crop_bottom_offset = 0;
+        seqParam->frame_crop_left_offset = 0;
+        seqParam->frame_crop_right_offset = 0;
+        seqParam->frame_crop_top_offset = 0;
+        seqParam->vui_parameters_present_flag = FALSE; /* default */
+    }
+    else if (extS) // use external SPS and PPS
+    {
+        seqParam->seq_parameter_set_id = extS->seq_parameter_set_id;
+        seqParam->log2_max_frame_num_minus4 = extS->log2_max_frame_num_minus4;
+        video->MaxFrameNum = 1 << (extS->log2_max_frame_num_minus4 + 4);
+        video->MaxPicNum = video->MaxFrameNum;
+        if (encParam->idr_period > (int)(video->MaxFrameNum) || (encParam->idr_period == -1))
+        {
+            encParam->idr_period = (int)video->MaxFrameNum;
+        }
+
+        seqParam->pic_order_cnt_type = extS->pic_order_cnt_type;
+        if (seqParam->pic_order_cnt_type == 0)
+        {
+            if (/*extS->log2_max_pic_order_cnt_lsb_minus4<0 || (no need it's unsigned)*/
+                extS->log2_max_pic_order_cnt_lsb_minus4 > 12)
+            {
+                return AVCENC_INVALID_POC_LSB;
+            }
+            seqParam->log2_max_pic_order_cnt_lsb_minus4 = extS->log2_max_pic_order_cnt_lsb_minus4;
+        }
+        else if (seqParam->pic_order_cnt_type == 1)
+        {
+            seqParam->delta_pic_order_always_zero_flag = extS->delta_pic_order_always_zero_flag;
+            seqParam->offset_for_non_ref_pic = extS->offset_for_non_ref_pic;
+            seqParam->offset_for_top_to_bottom_field = extS->offset_for_top_to_bottom_field;
+            seqParam->num_ref_frames_in_pic_order_cnt_cycle = extS->num_ref_frames_in_pic_order_cnt_cycle;
+            if (extS->offset_for_ref_frame == NULL)
+            {
+                return AVCENC_ENCPARAM_MEM_FAIL;
+            }
+            for (ii = 0; ii < (int) extS->num_ref_frames; ii++)
+            {
+                seqParam->offset_for_ref_frame[ii] = extS->offset_for_ref_frame[ii];
+            }
+        }
+        /* number of reference frame */
+        if (extS->num_ref_frames > 16 /*|| extS->num_ref_frames<0 (no need, it's unsigned)*/)
+        {
+            return AVCENC_INVALID_NUM_REF;
+        }
+        seqParam->num_ref_frames = extS->num_ref_frames; /* num reference frame range 0...16*/
+        seqParam->gaps_in_frame_num_value_allowed_flag = extS->gaps_in_frame_num_value_allowed_flag;
+        seqParam->pic_width_in_mbs_minus1 = extS->pic_width_in_mbs_minus1;
+        seqParam->pic_height_in_map_units_minus1 = extS->pic_height_in_map_units_minus1;
+        seqParam->frame_mbs_only_flag = extS->frame_mbs_only_flag;
+        if (extS->frame_mbs_only_flag != TRUE)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+        seqParam->mb_adaptive_frame_field_flag = extS->mb_adaptive_frame_field_flag;
+        if (extS->mb_adaptive_frame_field_flag != FALSE)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+
+        seqParam->direct_8x8_inference_flag = extS->direct_8x8_inference_flag;
+        seqParam->frame_cropping_flag = extS->frame_cropping_flag ;
+        if (extS->frame_cropping_flag != FALSE)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+
+        seqParam->frame_crop_bottom_offset = 0;
+        seqParam->frame_crop_left_offset = 0;
+        seqParam->frame_crop_right_offset = 0;
+        seqParam->frame_crop_top_offset = 0;
+        seqParam->vui_parameters_present_flag = extS->vui_parameters_present_flag;
+        if (extS->vui_parameters_present_flag)
+        {
+            memcpy(&(seqParam->vui_parameters), &(extS->vui_parameters), sizeof(AVCVUIParams));
+        }
+    }
+    else
+    {
+        return AVCENC_NOT_SUPPORTED;
+    }
+
+    /***************** now PPS ******************************/
+    if (!extP && !extS)
+    {
+        picParam->pic_parameter_set_id = (uint)(-1); /* start with zero */
+        picParam->seq_parameter_set_id = (uint)(-1); /* start with zero */
+        picParam->entropy_coding_mode_flag = 0; /* default to CAVLC */
+        picParam->pic_order_present_flag = 0; /* default for now, will need it for B-slice */
+        /* FMO */
+        if (encParam->num_slice_group < 1 || encParam->num_slice_group > MAX_NUM_SLICE_GROUP)
+        {
+            return AVCENC_INVALID_NUM_SLICEGROUP;
+        }
+        picParam->num_slice_groups_minus1 = encParam->num_slice_group - 1;
+
+        if (picParam->num_slice_groups_minus1 > 0)
+        {
+            picParam->slice_group_map_type = encParam->fmo_type;
+            switch (encParam->fmo_type)
+            {
+                case 0:
+                    for (ii = 0; ii <= (int)picParam->num_slice_groups_minus1; ii++)
+                    {
+                        picParam->run_length_minus1[ii] = encParam->run_length_minus1[ii];
+                    }
+                    break;
+                case 2:
+                    for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++)
+                    {
+                        picParam->top_left[ii] = encParam->top_left[ii];
+                        picParam->bottom_right[ii] = encParam->bottom_right[ii];
+                    }
+                    break;
+                case 3:
+                case 4:
+                case 5:
+                    if (encParam->change_dir_flag == AVC_ON)
+                    {
+                        picParam->slice_group_change_direction_flag = TRUE;
+                    }
+                    else
+                    {
+                        picParam->slice_group_change_direction_flag = FALSE;
+                    }
+                    if (/*encParam->change_rate_minus1 < 0 || (no need it's unsigned) */
+                        encParam->change_rate_minus1 > video->PicSizeInMapUnits - 1)
+                    {
+                        return AVCENC_INVALID_CHANGE_RATE;
+                    }
+                    picParam->slice_group_change_rate_minus1 = encParam->change_rate_minus1;
+                    video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1;
+                    break;
+                case 6:
+                    picParam->pic_size_in_map_units_minus1 = video->PicSizeInMapUnits - 1;
+
+                    /* allocate picParam->slice_group_id */
+                    picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR);
+                    if (picParam->slice_group_id == NULL)
+                    {
+                        return AVCENC_MEMORY_FAIL;
+                    }
+
+                    if (encParam->slice_group == NULL)
+                    {
+                        return AVCENC_ENCPARAM_MEM_FAIL;
+                    }
+                    for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++)
+                    {
+                        picParam->slice_group_id[ii] = encParam->slice_group[ii];
+                    }
+                    break;
+                default:
+                    return AVCENC_INVALID_FMO_TYPE;
+            }
+        }
+        picParam->num_ref_idx_l0_active_minus1 = encParam->num_ref_frame - 1; /* assume frame only */
+        picParam->num_ref_idx_l1_active_minus1 = 0; /* default value */
+        picParam->weighted_pred_flag = 0; /* no weighted prediction supported */
+        picParam->weighted_bipred_idc = 0; /* range 0,1,2 */
+        if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */
+            picParam->weighted_bipred_idc > 2)
+        {
+            return AVCENC_WEIGHTED_BIPRED_FAIL;
+        }
+        picParam->pic_init_qp_minus26 = 0; /* default, will be changed at slice level anyway */
+        if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25)
+        {
+            return AVCENC_INIT_QP_FAIL; /* out of range */
+        }
+        picParam->pic_init_qs_minus26 = 0;
+        if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25)
+        {
+            return AVCENC_INIT_QS_FAIL; /* out of range */
+        }
+
+        picParam->chroma_qp_index_offset = 0; /* default to zero for now */
+        if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12)
+        {
+            return AVCENC_CHROMA_QP_FAIL; /* out of range */
+        }
+        /* deblocking */
+        picParam->deblocking_filter_control_present_flag = (encParam->db_filter == AVC_ON) ? TRUE : FALSE ;
+        /* constrained intra prediction */
+        picParam->constrained_intra_pred_flag = (encParam->constrained_intra_pred == AVC_ON) ? TRUE : FALSE;
+        picParam->redundant_pic_cnt_present_flag = 0; /* default */
+    }
+    else if (extP)// external PPS
+    {
+        picParam->pic_parameter_set_id = extP->pic_parameter_set_id - 1; /* to be increased by one */
+        picParam->seq_parameter_set_id = extP->seq_parameter_set_id;
+        picParam->entropy_coding_mode_flag = extP->entropy_coding_mode_flag;
+        if (extP->entropy_coding_mode_flag != 0) /* default to CAVLC */
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+        picParam->pic_order_present_flag = extP->pic_order_present_flag; /* default for now, will need it for B-slice */
+        if (extP->pic_order_present_flag != 0)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+        /* FMO */
+        if (/*(extP->num_slice_groups_minus1<0) || (no need it's unsigned) */
+            (extP->num_slice_groups_minus1 > MAX_NUM_SLICE_GROUP - 1))
+        {
+            return AVCENC_INVALID_NUM_SLICEGROUP;
+        }
+        picParam->num_slice_groups_minus1 = extP->num_slice_groups_minus1;
+
+        if (picParam->num_slice_groups_minus1 > 0)
+        {
+            picParam->slice_group_map_type = extP->slice_group_map_type;
+            switch (extP->slice_group_map_type)
+            {
+                case 0:
+                    for (ii = 0; ii <= (int)extP->num_slice_groups_minus1; ii++)
+                    {
+                        picParam->run_length_minus1[ii] = extP->run_length_minus1[ii];
+                    }
+                    break;
+                case 2:
+                    for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++)
+                    {
+                        picParam->top_left[ii] = extP->top_left[ii];
+                        picParam->bottom_right[ii] = extP->bottom_right[ii];
+                    }
+                    break;
+                case 3:
+                case 4:
+                case 5:
+                    picParam->slice_group_change_direction_flag = extP->slice_group_change_direction_flag;
+                    if (/*extP->slice_group_change_rate_minus1 < 0 || (no need, it's unsigned) */
+                        extP->slice_group_change_rate_minus1 > video->PicSizeInMapUnits - 1)
+                    {
+                        return AVCENC_INVALID_CHANGE_RATE;
+                    }
+                    picParam->slice_group_change_rate_minus1 = extP->slice_group_change_rate_minus1;
+                    video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1;
+                    break;
+                case 6:
+                    if (extP->pic_size_in_map_units_minus1 != video->PicSizeInMapUnits - 1)
+                    {
+                        return AVCENC_NOT_SUPPORTED;
+                    }
+
+                    picParam->pic_size_in_map_units_minus1 = extP->pic_size_in_map_units_minus1;
+
+                    /* allocate picParam->slice_group_id */
+                    picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR);
+                    if (picParam->slice_group_id == NULL)
+                    {
+                        return AVCENC_MEMORY_FAIL;
+                    }
+
+                    if (extP->slice_group_id == NULL)
+                    {
+                        return AVCENC_ENCPARAM_MEM_FAIL;
+                    }
+                    for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++)
+                    {
+                        picParam->slice_group_id[ii] = extP->slice_group_id[ii];
+                    }
+                    break;
+                default:
+                    return AVCENC_INVALID_FMO_TYPE;
+            }
+        }
+        picParam->num_ref_idx_l0_active_minus1 = extP->num_ref_idx_l0_active_minus1;
+        picParam->num_ref_idx_l1_active_minus1 = extP->num_ref_idx_l1_active_minus1; /* default value */
+        if (picParam->num_ref_idx_l1_active_minus1 != 0)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+
+        if (extP->weighted_pred_flag)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+
+        picParam->weighted_pred_flag = 0; /* no weighted prediction supported */
+        picParam->weighted_bipred_idc = extP->weighted_bipred_idc; /* range 0,1,2 */
+        if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */
+            picParam->weighted_bipred_idc > 2)
+        {
+            return AVCENC_WEIGHTED_BIPRED_FAIL;
+        }
+        picParam->pic_init_qp_minus26 = extP->pic_init_qp_minus26; /* default, will be changed at slice level anyway */
+        if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25)
+        {
+            return AVCENC_INIT_QP_FAIL; /* out of range */
+        }
+        picParam->pic_init_qs_minus26 = extP->pic_init_qs_minus26;
+        if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25)
+        {
+            return AVCENC_INIT_QS_FAIL; /* out of range */
+        }
+
+        picParam->chroma_qp_index_offset = extP->chroma_qp_index_offset; /* default to zero for now */
+        if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12)
+        {
+            return AVCENC_CHROMA_QP_FAIL; /* out of range */
+        }
+        /* deblocking */
+        picParam->deblocking_filter_control_present_flag = extP->deblocking_filter_control_present_flag;
+        /* constrained intra prediction */
+        picParam->constrained_intra_pred_flag = extP->constrained_intra_pred_flag;
+        if (extP->redundant_pic_cnt_present_flag  != 0)
+        {
+            return AVCENC_NOT_SUPPORTED;
+        }
+        picParam->redundant_pic_cnt_present_flag = extP->redundant_pic_cnt_present_flag; /* default */
+    }
+    else
+    {
+        return AVCENC_NOT_SUPPORTED;
+    }
+
+    /****************** now set up some SliceHeader parameters ***********/
+    if (picParam->deblocking_filter_control_present_flag == TRUE)
+    {
+        /* these values only present when db_filter is ON */
+        if (encParam->disable_db_idc > 2)
+        {
+            return AVCENC_INVALID_DEBLOCK_IDC; /* out of range */
+        }
+        sliceHdr->disable_deblocking_filter_idc = encParam->disable_db_idc;
+
+        if (encParam->alpha_offset < -6 || encParam->alpha_offset > 6)
+        {
+            return AVCENC_INVALID_ALPHA_OFFSET;
+        }
+        sliceHdr->slice_alpha_c0_offset_div2 = encParam->alpha_offset;
+
+        if (encParam->beta_offset < -6 || encParam->beta_offset > 6)
+        {
+            return AVCENC_INVALID_BETA_OFFSET;
+        }
+        sliceHdr->slice_beta_offset_div_2 =  encParam->beta_offset;
+    }
+    if (encvid->outOfBandParamSet == TRUE)
+    {
+        sliceHdr->idr_pic_id = 0;
+    }
+    else
+    {
+        sliceHdr->idr_pic_id = (uint)(-1); /* start with zero */
+    }
+    sliceHdr->field_pic_flag = FALSE;
+    sliceHdr->bottom_field_flag = FALSE;  /* won't be used anyway */
+    video->MbaffFrameFlag = (seqParam->mb_adaptive_frame_field_flag && !sliceHdr->field_pic_flag);
+
+    /* the rest will be set in InitSlice() */
+
+    /* now the rate control and performance related parameters */
+    rateCtrl->scdEnable = (encParam->auto_scd == AVC_ON) ? TRUE : FALSE;
+    rateCtrl->idrPeriod = encParam->idr_period + 1;
+    rateCtrl->intraMBRate = encParam->intramb_refresh;
+    rateCtrl->dpEnable = (encParam->data_par == AVC_ON) ? TRUE : FALSE;
+
+    rateCtrl->subPelEnable = (encParam->sub_pel == AVC_ON) ? TRUE : FALSE;
+    rateCtrl->mvRange = encParam->search_range;
+
+    rateCtrl->subMBEnable = (encParam->submb_pred == AVC_ON) ? TRUE : FALSE;
+    rateCtrl->rdOptEnable = (encParam->rdopt_mode == AVC_ON) ? TRUE : FALSE;
+    rateCtrl->bidirPred = (encParam->bidir_pred == AVC_ON) ? TRUE : FALSE;
+
+    rateCtrl->rcEnable = (encParam->rate_control == AVC_ON) ? TRUE : FALSE;
+    rateCtrl->initQP = encParam->initQP;
+    rateCtrl->initQP = AVC_CLIP3(0, 51, rateCtrl->initQP);
+
+    rateCtrl->bitRate = encParam->bitrate;
+    rateCtrl->cpbSize = encParam->CPB_size;
+    rateCtrl->initDelayOffset = (rateCtrl->bitRate * encParam->init_CBP_removal_delay / 1000);
+
+    if (encParam->frame_rate == 0)
+    {
+        return AVCENC_INVALID_FRAMERATE;
+    }
+
+    rateCtrl->frame_rate = (OsclFloat)(encParam->frame_rate * 1.0 / 1000);
+//  rateCtrl->srcInterval = encParam->src_interval;
+    rateCtrl->first_frame = 1; /* set this flag for the first time */
+
+    /* contrained_setx_flag will be set inside the VerifyProfile called below.*/
+    if (!extS && !extP)
+    {
+        seqParam->profile_idc = encParam->profile;
+        seqParam->constrained_set0_flag = FALSE;
+        seqParam->constrained_set1_flag = FALSE;
+        seqParam->constrained_set2_flag = FALSE;
+        seqParam->constrained_set3_flag = FALSE;
+        seqParam->level_idc = encParam->level;
+    }
+    else
+    {
+        seqParam->profile_idc = extS->profile_idc;
+        seqParam->constrained_set0_flag = extS->constrained_set0_flag;
+        seqParam->constrained_set1_flag = extS->constrained_set1_flag;
+        seqParam->constrained_set2_flag = extS->constrained_set2_flag;
+        seqParam->constrained_set3_flag = extS->constrained_set3_flag;
+        seqParam->level_idc = extS->level_idc;
+    }
+
+
+    status = VerifyProfile(encvid, seqParam, picParam);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    status = VerifyLevel(encvid, seqParam, picParam);
+    if (status != AVCENC_SUCCESS)
+    {
+        return status;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+/* verify the profile setting */
+AVCEnc_Status VerifyProfile(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam)
+{
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+
+    if (seqParam->profile_idc == 0) /* find profile for this setting */
+    {
+        /* find the right profile for it */
+        if (seqParam->direct_8x8_inference_flag == TRUE &&
+                picParam->entropy_coding_mode_flag == FALSE &&
+                picParam->num_slice_groups_minus1 <= 7 /*&&
+            picParam->num_slice_groups_minus1>=0 (no need, it's unsigned) */)
+        {
+            seqParam->profile_idc = AVC_EXTENDED;
+            seqParam->constrained_set2_flag = TRUE;
+        }
+
+        if (rateCtrl->dpEnable == FALSE &&
+                picParam->num_slice_groups_minus1 == 0 &&
+                picParam->redundant_pic_cnt_present_flag == FALSE)
+        {
+            seqParam->profile_idc = AVC_MAIN;
+            seqParam->constrained_set1_flag = TRUE;
+        }
+
+        if (rateCtrl->bidirPred == FALSE &&
+                rateCtrl->dpEnable == FALSE &&
+                seqParam->frame_mbs_only_flag == TRUE &&
+                picParam->weighted_pred_flag == FALSE &&
+                picParam->weighted_bipred_idc == 0 &&
+                picParam->entropy_coding_mode_flag == FALSE &&
+                picParam->num_slice_groups_minus1 <= 7 /*&&
+            picParam->num_slice_groups_minus1>=0 (no need, it's unsigned)*/)
+        {
+            seqParam->profile_idc = AVC_BASELINE;
+            seqParam->constrained_set0_flag = TRUE;
+        }
+
+        if (seqParam->profile_idc == 0) /* still zero */
+        {
+            return AVCENC_PROFILE_NOT_SUPPORTED;
+        }
+    }
+
+    /* check the list of supported profile by this library */
+    switch (seqParam->profile_idc)
+    {
+        case AVC_BASELINE:
+            if (rateCtrl->bidirPred == TRUE ||
+                    rateCtrl->dpEnable == TRUE ||
+                    seqParam->frame_mbs_only_flag != TRUE ||
+                    picParam->weighted_pred_flag == TRUE ||
+                    picParam->weighted_bipred_idc != 0 ||
+                    picParam->entropy_coding_mode_flag == TRUE ||
+                    picParam->num_slice_groups_minus1 > 7 /*||
+            picParam->num_slice_groups_minus1<0 (no need, it's unsigned) */)
+            {
+                status = AVCENC_TOOLS_NOT_SUPPORTED;
+            }
+            break;
+
+        case AVC_MAIN:
+        case AVC_EXTENDED:
+            status = AVCENC_PROFILE_NOT_SUPPORTED;
+    }
+
+    return status;
+}
+
+/* verify the level setting */
+AVCEnc_Status VerifyLevel(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam)
+{
+    (void)(picParam);
+
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCCommonObj *video = encvid->common;
+    int mb_per_sec, ii;
+    int lev_idx;
+    int dpb_size;
+
+    mb_per_sec = (int)(video->PicSizeInMbs * rateCtrl->frame_rate + 0.5);
+    dpb_size = (seqParam->num_ref_frames * video->PicSizeInMbs * 3) >> 6;
+
+    if (seqParam->level_idc == 0) /* find level for this setting */
+    {
+        for (ii = 0; ii < MAX_LEVEL_IDX; ii++)
+        {
+            if (mb_per_sec <= MaxMBPS[ii] &&
+                    video->PicSizeInMbs <= (uint)MaxFS[ii] &&
+                    rateCtrl->bitRate <= (int32)MaxBR[ii]*1000 &&
+                    rateCtrl->cpbSize <= (int32)MaxCPB[ii]*1000 &&
+                    rateCtrl->mvRange <= MaxVmvR[ii] &&
+                    dpb_size <= MaxDPBX2[ii]*512)
+            {
+                seqParam->level_idc = mapIdx2Lev[ii];
+                break;
+            }
+        }
+        if (seqParam->level_idc == 0)
+        {
+            return AVCENC_LEVEL_NOT_SUPPORTED;
+        }
+    }
+
+    /* check if this level is supported by this library */
+    lev_idx = mapLev2Idx[seqParam->level_idc];
+    if (seqParam->level_idc == AVC_LEVEL1_B)
+    {
+        seqParam->constrained_set3_flag = 1;
+    }
+
+
+    if (lev_idx == 255) /* not defined */
+    {
+        return AVCENC_LEVEL_NOT_SUPPORTED;
+    }
+
+    /* check if the encoding setting complies with the level */
+    if (mb_per_sec > MaxMBPS[lev_idx] ||
+            video->PicSizeInMbs > (uint)MaxFS[lev_idx] ||
+            rateCtrl->bitRate > (int32)MaxBR[lev_idx]*1000 ||
+            rateCtrl->cpbSize > (int32)MaxCPB[lev_idx]*1000 ||
+            rateCtrl->mvRange > MaxVmvR[lev_idx])
+    {
+        return AVCENC_LEVEL_FAIL;
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+/* initialize variables at the beginning of each frame */
+/* determine the picture type */
+/* encode POC */
+/* maybe we should do more stuff here. MotionEstimation+SCD and generate a new SPS and PPS */
+AVCEnc_Status InitFrame(AVCEncObject *encvid)
+{
+    AVCStatus ret;
+    AVCEnc_Status status;
+    AVCCommonObj *video = encvid->common;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+
+    /* look for the next frame in coding_order and look for available picture
+       in the DPB. Note, video->currFS->PicOrderCnt, currFS->FrameNum and currPic->PicNum
+       are set to wrong number in this function (right for decoder). */
+    if (video->nal_unit_type == AVC_NALTYPE_IDR)
+    {
+        // call init DPB in here.
+        ret = AVCConfigureSequence(encvid->avcHandle, video, TRUE);
+        if (ret != AVC_SUCCESS)
+        {
+            return AVCENC_FAIL;
+        }
+    }
+
+    /* flexible macroblock ordering (every frame)*/
+    /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */
+    /* It changes once per each PPS. */
+    FMOInit(video);
+
+    ret = DPBInitBuffer(encvid->avcHandle, video); // get new buffer
+
+    if (ret != AVC_SUCCESS)
+    {
+        return (AVCEnc_Status)ret; // AVCENC_PICTURE_READY, FAIL
+    }
+
+    DPBInitPic(video, 0); /* 0 is dummy */
+
+    /************* determine picture type IDR or non-IDR ***********/
+    video->currPicType = AVC_FRAME;
+    video->slice_data_partitioning = FALSE;
+    encvid->currInput->is_reference = 1; /* default to all frames */
+    video->nal_ref_idc = 1;  /* need to set this for InitPOC */
+    video->currPic->isReference = TRUE;
+
+    /************* set frame_num ********************/
+    if (video->nal_unit_type == AVC_NALTYPE_IDR)
+    {
+        video->prevFrameNum = video->MaxFrameNum;
+        video->PrevRefFrameNum = 0;
+        sliceHdr->frame_num = 0;
+    }
+    /* otherwise, it's set to previous reference frame access unit's frame_num in decoding order,
+       see the end of PVAVCDecodeSlice()*/
+    /* There's also restriction on the frame_num, see page 59 of JVT-I1010.doc. */
+    /* Basically, frame_num can't be repeated unless it's opposite fields or non reference fields */
+    else
+    {
+        sliceHdr->frame_num = (video->PrevRefFrameNum + 1) % video->MaxFrameNum;
+    }
+    video->CurrPicNum = sliceHdr->frame_num;  /* for field_pic_flag = 0 */
+    //video->CurrPicNum = 2*sliceHdr->frame_num + 1; /* for field_pic_flag = 1 */
+
+    /* assign pic_order_cnt, video->PicOrderCnt */
+    status = InitPOC(encvid);
+    if (status != AVCENC_SUCCESS)  /* incorrigable fail */
+    {
+        return status;
+    }
+
+    /* Initialize refListIdx for this picture */
+    RefListInit(video);
+
+    /************* motion estimation and scene analysis ************/
+    // , to move this to MB-based MV search for comparison
+    // use sub-optimal QP for mv search
+    AVCMotionEstimation(encvid);  /* AVCENC_SUCCESS or AVCENC_NEW_IDR */
+
+    /* after this point, the picture type will be fixed to either IDR or non-IDR */
+    video->currFS->PicOrderCnt = video->PicOrderCnt;
+    video->currFS->FrameNum = video->sliceHdr->frame_num;
+    video->currPic->PicNum = video->CurrPicNum;
+    video->mbNum = 0; /* start from zero MB */
+    encvid->currSliceGroup = 0; /* start from slice group #0 */
+    encvid->numIntraMB = 0; /* reset this counter */
+
+    if (video->nal_unit_type == AVC_NALTYPE_IDR)
+    {
+        RCInitGOP(encvid);
+
+        /* calculate picture QP */
+        RCInitFrameQP(encvid);
+
+        return AVCENC_NEW_IDR;
+    }
+
+    /* calculate picture QP */
+    RCInitFrameQP(encvid); /* get QP after MV search */
+
+    return AVCENC_SUCCESS;
+}
+
+/* initialize variables for this slice */
+AVCEnc_Status InitSlice(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCPicParamSet *currPPS = video->currPicParams;
+    AVCSeqParamSet *currSPS = video->currSeqParams;
+    int slice_type = video->slice_type;
+
+    sliceHdr->first_mb_in_slice = video->mbNum;
+    if (video->mbNum) // not first slice of a frame
+    {
+        video->sliceHdr->slice_type = (AVCSliceType)slice_type;
+    }
+
+    /* sliceHdr->slice_type already set in InitFrame */
+
+    sliceHdr->pic_parameter_set_id = video->currPicParams->pic_parameter_set_id;
+
+    /* sliceHdr->frame_num already set in InitFrame */
+
+    if (!currSPS->frame_mbs_only_flag)  /* we shouldn't need this check */
+    {
+        sliceHdr->field_pic_flag = sliceHdr->bottom_field_flag = FALSE;
+        return AVCENC_TOOLS_NOT_SUPPORTED;
+    }
+
+    /* sliceHdr->idr_pic_id already set in PVAVCEncodeNAL
+
+     sliceHdr->pic_order_cnt_lsb already set in InitFrame..InitPOC
+     sliceHdr->delta_pic_order_cnt_bottom  already set in InitPOC
+
+    sliceHdr->delta_pic_order_cnt[0] already set in InitPOC
+    sliceHdr->delta_pic_order_cnt[1] already set in InitPOC
+    */
+
+    sliceHdr->redundant_pic_cnt = 0; /* default if(currPPS->redundant_pic_cnt_present_flag), range 0..127 */
+    sliceHdr->direct_spatial_mv_pred_flag = 0; // default if(slice_type == AVC_B_SLICE)
+
+    sliceHdr->num_ref_idx_active_override_flag = FALSE; /* default, if(slice_type== P,SP or B)*/
+    sliceHdr->num_ref_idx_l0_active_minus1 = 0; /* default, if (num_ref_idx_active_override_flag) */
+    sliceHdr->num_ref_idx_l1_active_minus1 = 0; /* default, if above and B_slice */
+    /* the above 2 values range from 0..15 for frame picture and 0..31 for field picture */
+
+    /* ref_pic_list_reordering(), currently we don't do anything */
+    sliceHdr->ref_pic_list_reordering_flag_l0 = FALSE; /* default */
+    sliceHdr->ref_pic_list_reordering_flag_l1 = FALSE; /* default */
+    /* if the above are TRUE, some other params must be set */
+
+    if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) ||
+            (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE))
+    {
+        //      pred_weight_table(); // not supported !!
+        return AVCENC_TOOLS_NOT_SUPPORTED;
+    }
+
+    /* dec_ref_pic_marking(), this will be done later*/
+    sliceHdr->no_output_of_prior_pics_flag = FALSE; /* default */
+    sliceHdr->long_term_reference_flag = FALSE; /* for IDR frame, do not make it long term */
+    sliceHdr->adaptive_ref_pic_marking_mode_flag = FALSE; /* default */
+    /* other params are not set here because they are not used */
+
+    sliceHdr->cabac_init_idc = 0; /* default, if entropy_coding_mode_flag && slice_type==I or SI, range 0..2  */
+    sliceHdr->slice_qp_delta = 0; /* default for now */
+    sliceHdr->sp_for_switch_flag = FALSE; /* default, if slice_type == SP */
+    sliceHdr->slice_qs_delta = 0; /* default, if slice_type == SP or SI */
+
+    /* derived variables from encParam */
+    /* deblocking filter */
+    video->FilterOffsetA = video->FilterOffsetB = 0;
+    if (currPPS->deblocking_filter_control_present_flag == TRUE)
+    {
+        video->FilterOffsetA = sliceHdr->slice_alpha_c0_offset_div2 << 1;
+        video->FilterOffsetB = sliceHdr->slice_beta_offset_div_2 << 1;
+    }
+
+    /* flexible macroblock ordering */
+    /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */
+    /* We already call it at the end of PVAVCEncInitialize(). It changes once per each PPS. */
+    if (video->currPicParams->num_slice_groups_minus1 > 0 && video->currPicParams->slice_group_map_type >= 3
+            && video->currPicParams->slice_group_map_type <= 5)
+    {
+        sliceHdr->slice_group_change_cycle = SLICE_GROUP_CHANGE_CYCLE;  /* default, don't understand how to set it!!!*/
+
+        video->MapUnitsInSliceGroup0 =
+            AVC_MIN(sliceHdr->slice_group_change_cycle * video->SliceGroupChangeRate, video->PicSizeInMapUnits);
+
+        FMOInit(video);
+    }
+
+    /* calculate SliceQPy first  */
+    /* calculate QSy first */
+
+    sliceHdr->slice_qp_delta = video->QPy - 26 - currPPS->pic_init_qp_minus26;
+    //sliceHdr->slice_qs_delta = video->QSy - 26 - currPPS->pic_init_qs_minus26;
+
+    return AVCENC_SUCCESS;
+}
+
diff --git a/media/libstagefright/codecs/avc/enc/src/intra_est.cpp b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp
new file mode 100644
index 0000000..17e5985
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp
@@ -0,0 +1,2199 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define TH_I4  0  /* threshold biasing toward I16 mode instead of I4 mode */
+#define TH_Intra  0 /* threshold biasing toward INTER mode instead of intra mode */
+
+#define FIXED_INTRAPRED_MODE  AVC_I16
+#define FIXED_I16_MODE  AVC_I16_DC
+#define FIXED_I4_MODE   AVC_I4_Diagonal_Down_Left
+#define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
+
+#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
+                 x = 0xFF & (~(x>>31));}
+
+
+bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCFrameIO *currInput = encvid->currInput;
+    int orgPitch = currInput->pitch;
+    int x_pos = (video->mb_x) << 4;
+    int y_pos = (video->mb_y) << 4;
+    uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
+    int j;
+    uint8 *topL, *leftL, *orgY_2, *orgY_3;
+    int temp, SBE, offset;
+    OsclFloat ABE;
+    bool intra = true;
+
+    if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
+            ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
+            video->intraAvailA &&
+            video->intraAvailB)
+    {
+        SBE = 0;
+        /* top neighbor */
+        topL = curL - picPitch;
+        /* left neighbor */
+        leftL = curL - 1;
+        orgY_2 = orgY - orgPitch;
+
+        for (j = 0; j < 16; j++)
+        {
+            temp = *topL++ - orgY[j];
+            SBE += ((temp >= 0) ? temp : -temp);
+            temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
+            SBE += ((temp >= 0) ? temp : -temp);
+        }
+
+        /* calculate chroma */
+        offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
+        topL = video->currPic->Scb + offset;
+        orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
+
+        leftL = topL - 1;
+        topL -= (picPitch >> 1);
+        orgY_3 = orgY_2 - (orgPitch >> 1);
+        for (j = 0; j < 8; j++)
+        {
+            temp = *topL++ - orgY_2[j];
+            SBE += ((temp >= 0) ? temp : -temp);
+            temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
+            SBE += ((temp >= 0) ? temp : -temp);
+        }
+
+        topL = video->currPic->Scr + offset;
+        orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
+
+        leftL = topL - 1;
+        topL -= (picPitch >> 1);
+        orgY_3 = orgY_2 - (orgPitch >> 1);
+        for (j = 0; j < 8; j++)
+        {
+            temp = *topL++ - orgY_2[j];
+            SBE += ((temp >= 0) ? temp : -temp);
+            temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
+            SBE += ((temp >= 0) ? temp : -temp);
+        }
+
+        /* compare mincost/384 and SBE/64 */
+        ABE = SBE / 64.0;
+        if (ABE*0.8 >= min_cost / 384.0)
+        {
+            intra = false;
+        }
+    }
+
+    return intra;
+}
+
+/* perform searching for MB mode */
+/* assuming that this is done inside the encoding loop,
+no need to call InitNeighborAvailability */
+
+void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCFrameIO *currInput = encvid->currInput;
+    AVCMacroblock *currMB = video->currMB;
+    int min_cost;
+    uint8 *orgY;
+    int x_pos = (video->mb_x) << 4;
+    int y_pos = (video->mb_y) << 4;
+    uint32 *saved_inter;
+    int j;
+    int orgPitch = currInput->pitch;
+    bool intra = true;
+
+    currMB->CBP = 0;
+
+    /* first do motion vector and variable block size search */
+    min_cost = encvid->min_cost[mbnum];
+
+    /* now perform intra prediction search */
+    /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
+       if it's not worth checking. */
+    if (video->slice_type == AVC_P_SLICE)
+    {
+        /* Decide whether intra search is necessary or not */
+        /* This one, we do it in the encoding loop so the neighboring pixel are the
+        actual reconstructed pixels. */
+        intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
+    }
+
+    if (intra == true || video->slice_type == AVC_I_SLICE)
+    {
+        orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
+
+        /* i16 mode search */
+        /* generate all the predictions */
+        intrapred_luma_16x16(encvid);
+
+        /* evaluate them one by one */
+        find_cost_16x16(encvid, orgY, &min_cost);
+
+        if (video->slice_type == AVC_P_SLICE)
+        {
+            /* save current inter prediction */
+            saved_inter = encvid->subpel_pred; /* reuse existing buffer */
+            j = 16;
+            curL -= 4;
+            picPitch -= 16;
+            while (j--)
+            {
+                *saved_inter++ = *((uint32*)(curL += 4));
+                *saved_inter++ = *((uint32*)(curL += 4));
+                *saved_inter++ = *((uint32*)(curL += 4));
+                *saved_inter++ = *((uint32*)(curL += 4));
+                curL += picPitch;
+            }
+
+        }
+
+        /* i4 mode search */
+        mb_intra4x4_search(encvid, &min_cost);
+
+        encvid->min_cost[mbnum] = min_cost; /* update min_cost */
+    }
+
+
+    if (currMB->mb_intra)
+    {
+        chroma_intra_search(encvid);
+
+        /* need to set this in order for the MBInterPrediction to work!! */
+        memset(currMB->mvL0, 0, sizeof(int32)*16);
+        currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
+                                    currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
+    }
+    else if (video->slice_type == AVC_P_SLICE && intra == true)
+    {
+        /* restore current inter prediction */
+        saved_inter = encvid->subpel_pred; /* reuse existing buffer */
+        j = 16;
+        curL -= ((picPitch + 16) << 4);
+        while (j--)
+        {
+            *((uint32*)(curL += 4)) = *saved_inter++;
+            *((uint32*)(curL += 4)) = *saved_inter++;
+            *((uint32*)(curL += 4)) = *saved_inter++;
+            *((uint32*)(curL += 4)) = *saved_inter++;
+            curL += picPitch;
+        }
+    }
+
+    return ;
+}
+
+/* generate all the prediction values */
+void intrapred_luma_16x16(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCPictureData *currPic = video->currPic;
+
+    int x_pos = (video->mb_x) << 4;
+    int y_pos = (video->mb_y) << 4;
+    int pitch = currPic->pitch;
+
+    int offset = y_pos * pitch + x_pos;
+
+    uint8 *pred, *top, *left;
+    uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
+    uint32 word1, word2, word3, word4;
+    uint32 sum = 0;
+
+    int a_16, b, c, factor_c;
+    uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
+    int H = 0, V = 0, tmp, value;
+    int i;
+
+    if (video->intraAvailB)
+    {
+        //get vertical prediction mode
+        top = curL - pitch;
+
+        pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
+
+        word1 = *((uint32*)(top));  /* read 4 bytes from top */
+        word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
+        word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
+        word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
+
+        for (i = 0; i < 16; i++)
+        {
+            *((uint32*)(pred += 16)) = word1;
+            *((uint32*)(pred + 4)) = word2;
+            *((uint32*)(pred + 8)) = word3;
+            *((uint32*)(pred + 12)) = word4;
+
+        }
+
+        sum = word1 & 0xFF00FF;
+        word1 = (word1 >> 8) & 0xFF00FF;
+        sum += word1;
+        word1 = (word2 & 0xFF00FF);
+        sum += word1;
+        word2 = (word2 >> 8) & 0xFF00FF;
+        sum += word2;
+        word1 = (word3 & 0xFF00FF);
+        sum += word1;
+        word3 = (word3 >> 8) & 0xFF00FF;
+        sum += word3;
+        word1 = (word4 & 0xFF00FF);
+        sum += word1;
+        word4 = (word4 >> 8) & 0xFF00FF;
+        sum += word4;
+
+        sum += (sum >> 16);
+        sum &= 0xFFFF;
+
+        if (!video->intraAvailA)
+        {
+            sum = (sum + 8) >> 4;
+        }
+    }
+
+    if (video->intraAvailA)
+    {
+        // get horizontal mode
+        left = curL - 1 - pitch;
+
+        pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
+
+        for (i = 0; i < 16; i++)
+        {
+            word1 = *(left += pitch);
+            sum += word1;
+
+            word1 = (word1 << 8) | word1;
+            word1 = (word1 << 16) | word1; /* make it 4 */
+
+            *(uint32*)(pred += 16) = word1;
+            *(uint32*)(pred + 4) = word1;
+            *(uint32*)(pred + 8) = word1;
+            *(uint32*)(pred + 12) = word1;
+        }
+
+        if (!video->intraAvailB)
+        {
+            sum = (sum + 8) >> 4;
+        }
+        else
+        {
+            sum = (sum + 16) >> 5;
+        }
+    }
+
+    // get DC mode
+    if (!video->intraAvailA && !video->intraAvailB)
+    {
+        sum = 0x80808080;
+    }
+    else
+    {
+        sum = (sum << 8) | sum;
+        sum = (sum << 16) | sum;
+    }
+
+    pred = encvid->pred_i16[AVC_I16_DC] - 16;
+    for (i = 0; i < 16; i++)
+    {
+        *((uint32*)(pred += 16)) = sum;
+        *((uint32*)(pred + 4)) = sum;
+        *((uint32*)(pred + 8)) = sum;
+        *((uint32*)(pred + 12)) = sum;
+    }
+
+    // get plane mode
+    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+    {
+        pred = encvid->pred_i16[AVC_I16_Plane] - 16;
+
+        comp_ref_x0 = curL - pitch + 8;
+        comp_ref_x1 = curL - pitch + 6;
+        comp_ref_y0 = curL - 1 + (pitch << 3);
+        comp_ref_y1 = curL - 1 + 6 * pitch;
+
+        for (i = 1; i < 8; i++)
+        {
+            H += i * (*comp_ref_x0++ - *comp_ref_x1--);
+            V += i * (*comp_ref_y0 - *comp_ref_y1);
+            comp_ref_y0 += pitch;
+            comp_ref_y1 -= pitch;
+        }
+
+        H += i * (*comp_ref_x0++ - curL[-pitch-1]);
+        V += i * (*comp_ref_y0 - *comp_ref_y1);
+
+
+        a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
+        b = (5 * H + 32) >> 6;
+        c = (5 * V + 32) >> 6;
+
+        tmp = 0;
+        for (i = 0; i < 16; i++)
+        {
+            factor_c = a_16 + c * (tmp++ - 7);
+            factor_c -= 7 * b;
+
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = value;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 8);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 16);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 24);
+            *((uint32*)(pred += 16)) = word1;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = value;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 8);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 16);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 24);
+            *((uint32*)(pred + 4)) = word1;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = value;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 8);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 16);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 24);
+            *((uint32*)(pred + 8)) = word1;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = value;
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 8);
+            value = factor_c >> 5;
+            factor_c += b;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 16);
+            value = factor_c >> 5;
+            CLIP_RESULT(value)
+            word1 = (word1) | (value << 24);
+            *((uint32*)(pred + 12)) = word1;
+        }
+    }
+
+    return ;
+}
+
+
+/* evaluate each prediction mode of I16 */
+void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCMacroblock *currMB = video->currMB;
+    int cost;
+    int org_pitch = encvid->currInput->pitch;
+
+    /* evaluate vertical mode */
+    if (video->intraAvailB)
+    {
+        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
+        if (cost < *min_cost)
+        {
+            *min_cost = cost;
+            currMB->mbMode = AVC_I16;
+            currMB->mb_intra = 1;
+            currMB->i16Mode = AVC_I16_Vertical;
+        }
+    }
+
+
+    /* evaluate horizontal mode */
+    if (video->intraAvailA)
+    {
+        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
+        if (cost < *min_cost)
+        {
+            *min_cost = cost;
+            currMB->mbMode = AVC_I16;
+            currMB->mb_intra = 1;
+            currMB->i16Mode = AVC_I16_Horizontal;
+        }
+    }
+
+    /* evaluate DC mode */
+    cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
+    if (cost < *min_cost)
+    {
+        *min_cost = cost;
+        currMB->mbMode = AVC_I16;
+        currMB->mb_intra = 1;
+        currMB->i16Mode = AVC_I16_DC;
+    }
+
+    /* evaluate plane mode */
+    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+    {
+        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
+        if (cost < *min_cost)
+        {
+            *min_cost = cost;
+            currMB->mbMode = AVC_I16;
+            currMB->mb_intra = 1;
+            currMB->i16Mode = AVC_I16_Plane;
+        }
+    }
+
+    return ;
+}
+
+
+int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
+{
+
+    int cost;
+    int j, k;
+    int16 res[256], *pres; // residue
+    int m0, m1, m2, m3;
+
+    // calculate SATD
+    org_pitch -= 16;
+    pres = res;
+    // horizontal transform
+    for (j = 0; j < 16; j++)
+    {
+        k = 4;
+        while (k > 0)
+        {
+            m0 = org[0] - pred[0];
+            m3 = org[3] - pred[3];
+            m0 += m3;
+            m3 = m0 - (m3 << 1);
+            m1 = org[1] - pred[1];
+            m2 = org[2] - pred[2];
+            m1 += m2;
+            m2 = m1 - (m2 << 1);
+            pres[0] = m0 + m1;
+            pres[2] = m0 - m1;
+            pres[1] = m2 + m3;
+            pres[3] = m3 - m2;
+
+            org += 4;
+            pres += 4;
+            pred += 4;
+            k--;
+        }
+        org += org_pitch;
+    }
+    /* vertical transform */
+    cost = 0;
+    for (j = 0; j < 4; j++)
+    {
+        pres = res + (j << 6);
+        k = 16;
+        while (k > 0)
+        {
+            m0 = pres[0];
+            m3 = pres[3<<4];
+            m0 += m3;
+            m3 = m0 - (m3 << 1);
+            m1 = pres[1<<4];
+            m2 = pres[2<<4];
+            m1 += m2;
+            m2 = m1 - (m2 << 1);
+            pres[0] = m0 = m0 + m1;
+
+            if (k&0x3)  // only sum up non DC values.
+            {
+                cost += ((m0 > 0) ? m0 : -m0);
+            }
+
+            m1 = m0 - (m1 << 1);
+            cost += ((m1 > 0) ? m1 : -m1);
+            m3 = m2 + m3;
+            cost += ((m3 > 0) ? m3 : -m3);
+            m2 = m3 - (m2 << 1);
+            cost += ((m2 > 0) ? m2 : -m2);
+
+            pres++;
+            k--;
+        }
+        if ((cost >> 1) > min_cost) /* early drop out */
+        {
+            return (cost >> 1);
+        }
+    }
+
+    /* Hadamard of the DC coefficient */
+    pres = res;
+    k = 4;
+    while (k > 0)
+    {
+        m0 = pres[0];
+        m3 = pres[3<<2];
+        m0 >>= 2;
+        m0 += (m3 >> 2);
+        m3 = m0 - (m3 >> 1);
+        m1 = pres[1<<2];
+        m2 = pres[2<<2];
+        m1 >>= 2;
+        m1 += (m2 >> 2);
+        m2 = m1 - (m2 >> 1);
+        pres[0] = (m0 + m1);
+        pres[2<<2] = (m0 - m1);
+        pres[1<<2] = (m2 + m3);
+        pres[3<<2] = (m3 - m2);
+        pres += (4 << 4);
+        k--;
+    }
+
+    pres = res;
+    k = 4;
+    while (k > 0)
+    {
+        m0 = pres[0];
+        m3 = pres[3<<6];
+        m0 += m3;
+        m3 = m0 - (m3 << 1);
+        m1 = pres[1<<6];
+        m2 = pres[2<<6];
+        m1 += m2;
+        m2 = m1 - (m2 << 1);
+        m0 = m0 + m1;
+        cost += ((m0 >= 0) ? m0 : -m0);
+        m1 = m0 - (m1 << 1);
+        cost += ((m1 >= 0) ? m1 : -m1);
+        m3 = m2 + m3;
+        cost += ((m3 >= 0) ? m3 : -m3);
+        m2 = m3 - (m2 << 1);
+        cost += ((m2 >= 0) ? m2 : -m2);
+        pres += 4;
+
+        if ((cost >> 1) > min_cost) /* early drop out */
+        {
+            return (cost >> 1);
+        }
+
+        k--;
+    }
+
+    return (cost >> 1);
+}
+
+
+void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCMacroblock *currMB = video->currMB;
+    AVCPictureData *currPic = video->currPic;
+    AVCFrameIO *currInput = encvid->currInput;
+    int pitch = currPic->pitch;
+    int org_pitch = currInput->pitch;
+    int offset;
+    uint8 *curL, *comp, *org4, *org8;
+    int y = video->mb_y << 4;
+    int x = video->mb_x << 4;
+
+    int b8, b4, cost4x4, blkidx;
+    int cost = 0;
+    int numcoef;
+    int dummy = 0;
+    int mb_intra = currMB->mb_intra; // save the original value
+
+    offset = y * pitch + x;
+
+    curL = currPic->Sl + offset;
+    org8 = currInput->YCbCr[0] + y * org_pitch + x;
+    video->pred_pitch = 4;
+
+    cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
+    cost <<= 2;
+
+    currMB->mb_intra = 1;  // temporary set this to one to enable the IDCT
+    // operation inside dct_luma
+
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        comp = curL;
+        org4 = org8;
+
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            blkidx = blkIdx2blkXY[b8][b4];
+            cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
+            cost += cost4x4;
+            if (cost > *min_cost)
+            {
+                currMB->mb_intra = mb_intra; // restore the value
+                return ;
+            }
+
+            /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
+            video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
+            numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
+            currMB->nz_coeff[blkidx] = numcoef;
+            if (numcoef)
+            {
+                video->cbp4x4 |= (1 << blkidx);
+                currMB->CBP |= (1 << b8);
+            }
+
+            if (b4&1)
+            {
+                comp += ((pitch << 2) - 4);
+                org4 += ((org_pitch << 2) - 4);
+            }
+            else
+            {
+                comp += 4;
+                org4 += 4;
+            }
+        }
+
+        if (b8&1)
+        {
+            curL += ((pitch << 3) - 8);
+            org8 += ((org_pitch << 3) - 8);
+        }
+        else
+        {
+            curL += 8;
+            org8 += 8;
+        }
+    }
+
+    currMB->mb_intra = mb_intra; // restore the value
+
+    if (cost < *min_cost)
+    {
+        *min_cost = cost;
+        currMB->mbMode = AVC_I4;
+        currMB->mb_intra = 1;
+    }
+
+    return ;
+}
+
+
+/* search for i4 mode for a 4x4 block */
+int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCNeighborAvailability availability;
+    AVCMacroblock *currMB = video->currMB;
+    bool top_left = FALSE;
+    int pitch = video->currPic->pitch;
+    uint8 mode_avail[AVCNumI4PredMode];
+    uint32 temp, DC;
+    uint8 *pred;
+    int org_pitch = encvid->currInput->pitch;
+    uint16 min_cost, cost;
+
+    int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
+    int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
+    uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
+    int r0, r1, r2, r3, r4, r5, r6, r7;
+    int x0, x1, x2, x3, x4, x5;
+    uint32 temp1, temp2;
+
+    int ipmode, mostProbableMode;
+    int fixedcost = 4 * encvid->lambda_mode;
+    int min_sad = 0x7FFF;
+
+    availability.left = TRUE;
+    availability.top = TRUE;
+    if (blkidx <= 3) /* top row block  (!block_y) */
+    { /* check availability up */
+        availability.top = video->intraAvailB ;
+    }
+    if (!(blkidx&0x3)) /* left column block (!block_x)*/
+    { /* check availability left */
+        availability.left = video->intraAvailA ;
+    }
+    availability.top_right = BlkTopRight[blkidx];
+
+    if (availability.top_right == 2)
+    {
+        availability.top_right = video->intraAvailB;
+    }
+    else if (availability.top_right == 3)
+    {
+        availability.top_right = video->intraAvailC;
+    }
+
+    if (availability.top == TRUE)
+    {
+        temp = *(uint32*)(cur - pitch);
+        P_A = temp & 0xFF;
+        P_B = (temp >> 8) & 0xFF;
+        P_C = (temp >> 16) & 0xFF;
+        P_D = (temp >> 24) & 0xFF;
+    }
+    else
+    {
+        P_A = P_B = P_C = P_D = 128;
+    }
+
+    if (availability.top_right == TRUE)
+    {
+        temp = *(uint32*)(cur - pitch + 4);
+        P_E = temp & 0xFF;
+        P_F = (temp >> 8) & 0xFF;
+        P_G = (temp >> 16) & 0xFF;
+        P_H = (temp >> 24) & 0xFF;
+    }
+    else
+    {
+        P_E = P_F = P_G = P_H = 128;
+    }
+
+    if (availability.left == TRUE)
+    {
+        cur--;
+        P_I = *cur;
+        P_J = *(cur += pitch);
+        P_K = *(cur += pitch);
+        P_L = *(cur + pitch);
+        cur -= (pitch << 1);
+        cur++;
+    }
+    else
+    {
+        P_I = P_J = P_K = P_L = 128;
+    }
+
+    /* check if top-left pixel is available */
+    if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
+            || ((blkidx&0x3) && video->intraAvailB)
+            || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
+    {
+        top_left = TRUE;
+        P_X = *(cur - pitch - 1);
+    }
+    else
+    {
+        P_X = 128;
+    }
+
+    //===== INTRA PREDICTION FOR 4x4 BLOCK =====
+    /* vertical */
+    mode_avail[AVC_I4_Vertical] = 0;
+    if (availability.top)
+    {
+        mode_avail[AVC_I4_Vertical] = 1;
+        pred = encvid->pred_i4[AVC_I4_Vertical];
+
+        temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
+        *((uint32*)pred) =  temp; /* write 4 at a time */
+        *((uint32*)(pred += 4)) =  temp;
+        *((uint32*)(pred += 4)) =  temp;
+        *((uint32*)(pred += 4)) =  temp;
+    }
+    /* horizontal */
+    mode_avail[AVC_I4_Horizontal] = 0;
+    mode_avail[AVC_I4_Horizontal_Up] = 0;
+    if (availability.left)
+    {
+        mode_avail[AVC_I4_Horizontal] = 1;
+        pred = encvid->pred_i4[AVC_I4_Horizontal];
+
+        temp = P_I | (P_I << 8);
+        temp = temp | (temp << 16);
+        *((uint32*)pred) = temp;
+        temp = P_J | (P_J << 8);
+        temp = temp | (temp << 16);
+        *((uint32*)(pred += 4)) = temp;
+        temp = P_K | (P_K << 8);
+        temp = temp | (temp << 16);
+        *((uint32*)(pred += 4)) = temp;
+        temp = P_L | (P_L << 8);
+        temp = temp | (temp << 16);
+        *((uint32*)(pred += 4)) = temp;
+
+        mode_avail[AVC_I4_Horizontal_Up] = 1;
+        pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
+
+        Q0 = (P_J + P_K + 1) >> 1;
+        Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
+        P0 = ((P_I + P_J + 1) >> 1);
+        P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
+
+        temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
+        temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
+        temp |= (Q1 << 24);     // [R0 D0 D1 D1]
+        *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
+
+        D0 = (P_K + 3 * P_L + 2) >> 2;
+        R0 = (P_K + P_L + 1) >> 1;
+
+        temp = Q0 | (Q1 << 8);
+        temp |= (R0 << 16);
+        temp |= (D0 << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        D1 = P_L;
+
+        temp = R0 | (D0 << 8);
+        temp |= (D1 << 16);
+        temp |= (D1 << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        temp = D1 | (D1 << 8);
+        temp |= (temp << 16);
+        *((uint32*)(pred += 4)) = temp;
+    }
+    /* DC */
+    mode_avail[AVC_I4_DC] = 1;
+    pred = encvid->pred_i4[AVC_I4_DC];
+    if (availability.left)
+    {
+        DC = P_I + P_J + P_K + P_L;
+
+        if (availability.top)
+        {
+            DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
+        }
+        else
+        {
+            DC = (DC + 2) >> 2;
+
+        }
+    }
+    else if (availability.top)
+    {
+        DC = (P_A + P_B + P_C + P_D + 2) >> 2;
+
+    }
+    else
+    {
+        DC = 128;
+    }
+
+    temp = DC | (DC << 8);
+    temp = temp | (temp << 16);
+    *((uint32*)pred) = temp;
+    *((uint32*)(pred += 4)) = temp;
+    *((uint32*)(pred += 4)) = temp;
+    *((uint32*)(pred += 4)) = temp;
+
+    /* Down-left */
+    mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
+
+    if (availability.top)
+    {
+        mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
+
+        pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
+
+        r0 = P_A;
+        r1 = P_B;
+        r2 = P_C;
+        r3 = P_D;
+
+        r0 += (r1 << 1);
+        r0 += r2;
+        r0 += 2;
+        r0 >>= 2;
+        r1 += (r2 << 1);
+        r1 += r3;
+        r1 += 2;
+        r1 >>= 2;
+
+        if (availability.top_right)
+        {
+            r4 = P_E;
+            r5 = P_F;
+            r6 = P_G;
+            r7 = P_H;
+
+            r2 += (r3 << 1);
+            r2 += r4;
+            r2 += 2;
+            r2 >>= 2;
+            r3 += (r4 << 1);
+            r3 += r5;
+            r3 += 2;
+            r3 >>= 2;
+            r4 += (r5 << 1);
+            r4 += r6;
+            r4 += 2;
+            r4 >>= 2;
+            r5 += (r6 << 1);
+            r5 += r7;
+            r5 += 2;
+            r5 >>= 2;
+            r6 += (3 * r7);
+            r6 += 2;
+            r6 >>= 2;
+            temp = r0 | (r1 << 8);
+            temp |= (r2 << 16);
+            temp |= (r3 << 24);
+            *((uint32*)pred) = temp;
+
+            temp = (temp >> 8) | (r4 << 24);
+            *((uint32*)(pred += 4)) = temp;
+
+            temp = (temp >> 8) | (r5 << 24);
+            *((uint32*)(pred += 4)) = temp;
+
+            temp = (temp >> 8) | (r6 << 24);
+            *((uint32*)(pred += 4)) = temp;
+        }
+        else
+        {
+            r2 += (r3 * 3);
+            r2 += 2;
+            r2 >>= 2;
+            r3 = ((r3 << 2) + 2);
+            r3 >>= 2;
+
+            temp = r0 | (r1 << 8);
+            temp |= (r2 << 16);
+            temp |= (r3 << 24);
+            *((uint32*)pred) = temp;
+
+            temp = (temp >> 8) | (r3 << 24);
+            *((uint32*)(pred += 4)) = temp;
+
+            temp = (temp >> 8) | (r3 << 24);
+            *((uint32*)(pred += 4)) = temp;
+
+            temp = (temp >> 8) | (r3 << 24);
+            *((uint32*)(pred += 4)) = temp;
+
+        }
+    }
+
+    /* Down Right */
+    mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
+    /* Diagonal Vertical Right */
+    mode_avail[AVC_I4_Vertical_Right] = 0;
+    /* Horizontal Down */
+    mode_avail[AVC_I4_Horizontal_Down] = 0;
+
+    if (top_left == TRUE)
+    {
+        /* Down Right */
+        mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
+        pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
+
+        Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
+        R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
+        P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
+        D   = (P_A + 2 * P_X + P_I + 2) >> 2;
+        P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
+        Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
+        R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
+
+        /* we can pack these */
+        temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
+        //[P_y D   P_x Q_x]
+        temp |= (Q_x << 16); //[Q_y P_y D   P_x]
+        temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
+        *((uint32*)pred) = temp;
+
+        temp =  P_y | (D << 8);
+        temp |= (P_x << 16);
+        temp |= (Q_x << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        temp =  Q_y | (P_y << 8);
+        temp |= (D << 16);
+        temp |= (P_x << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        temp = R_y | (Q_y << 8);
+        temp |= (P_y << 16);
+        temp |= (D << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+
+        /* Diagonal Vertical Right */
+        mode_avail[AVC_I4_Vertical_Right] = 1;
+        pred = encvid->pred_i4[AVC_I4_Vertical_Right];
+
+        Q0 = P_A + P_B + 1;
+        R0 = P_B + P_C + 1;
+        S0 = P_C + P_D + 1;
+        P0 = P_X + P_A + 1;
+        D = (P_I + 2 * P_X + P_A + 2) >> 2;
+
+        P1 = (P0 + Q0) >> 2;
+        Q1 = (Q0 + R0) >> 2;
+        R1 = (R0 + S0) >> 2;
+
+        P0 >>= 1;
+        Q0 >>= 1;
+        R0 >>= 1;
+        S0 >>= 1;
+
+        P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
+        Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
+
+        temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
+        //[D  P1 Q1 R1]
+        temp |= (R0 << 16); //[P2 P0 Q0 R0]
+        temp |= (S0 << 24); //[Q2 D  P1 Q1]
+        *((uint32*)pred) =  temp;
+
+        temp =  D | (P1 << 8);
+        temp |= (Q1 << 16);
+        temp |= (R1 << 24);
+        *((uint32*)(pred += 4)) =  temp;
+
+        temp = P2 | (P0 << 8);
+        temp |= (Q0 << 16);
+        temp |= (R0 << 24);
+        *((uint32*)(pred += 4)) =  temp;
+
+        temp = Q2 | (D << 8);
+        temp |= (P1 << 16);
+        temp |= (Q1 << 24);
+        *((uint32*)(pred += 4)) =  temp;
+
+
+        /* Horizontal Down */
+        mode_avail[AVC_I4_Horizontal_Down] = 1;
+        pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
+
+
+        Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
+        P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
+        D = (P_I + 2 * P_X + P_A + 2) >> 2;
+        P0 = P_X + P_I + 1;
+        Q0 = P_I + P_J + 1;
+        R0 = P_J + P_K + 1;
+        S0 = P_K + P_L + 1;
+
+        P1 = (P0 + Q0) >> 2;
+        Q1 = (Q0 + R0) >> 2;
+        R1 = (R0 + S0) >> 2;
+
+        P0 >>= 1;
+        Q0 >>= 1;
+        R0 >>= 1;
+        S0 >>= 1;
+
+
+        /* we can pack these */
+        temp = P0 | (D << 8);   //[P0 D  P2 Q2]
+        //[Q0 P1 P0 D ]
+        temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
+        temp |= (Q2 << 24); //[S0 R1 R0 Q1]
+        *((uint32*)pred) = temp;
+
+        temp = Q0 | (P1 << 8);
+        temp |= (P0 << 16);
+        temp |= (D << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        temp = R0 | (Q1 << 8);
+        temp |= (Q0 << 16);
+        temp |= (P1 << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+        temp = S0 | (R1 << 8);
+        temp |= (R0 << 16);
+        temp |= (Q1 << 24);
+        *((uint32*)(pred += 4)) = temp;
+
+    }
+
+    /* vertical left */
+    mode_avail[AVC_I4_Vertical_Left] = 0;
+    if (availability.top)
+    {
+        mode_avail[AVC_I4_Vertical_Left] = 1;
+        pred = encvid->pred_i4[AVC_I4_Vertical_Left];
+
+        x0 = P_A + P_B + 1;
+        x1 = P_B + P_C + 1;
+        x2 = P_C + P_D + 1;
+        if (availability.top_right)
+        {
+            x3 = P_D + P_E + 1;
+            x4 = P_E + P_F + 1;
+            x5 = P_F + P_G + 1;
+        }
+        else
+        {
+            x3 = x4 = x5 = (P_D << 1) + 1;
+        }
+
+        temp1 = (x0 >> 1);
+        temp1 |= ((x1 >> 1) << 8);
+        temp1 |= ((x2 >> 1) << 16);
+        temp1 |= ((x3 >> 1) << 24);
+
+        *((uint32*)pred) = temp1;
+
+        temp2 = ((x0 + x1) >> 2);
+        temp2 |= (((x1 + x2) >> 2) << 8);
+        temp2 |= (((x2 + x3) >> 2) << 16);
+        temp2 |= (((x3 + x4) >> 2) << 24);
+
+        *((uint32*)(pred += 4)) = temp2;
+
+        temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
+        *((uint32*)(pred += 4)) = temp1;
+
+        temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
+        *((uint32*)(pred += 4)) = temp2;
+    }
+
+    //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
+    // can re-order the search here instead of going in order
+
+    // find most probable mode
+    encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
+
+    min_cost = 0xFFFF;
+
+    for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
+    {
+        if (mode_avail[ipmode] == TRUE)
+        {
+            cost  = (ipmode == mostProbableMode) ? 0 : fixedcost;
+            pred = encvid->pred_i4[ipmode];
+
+            cost_i4(org, org_pitch, pred, &cost);
+
+            if (cost < min_cost)
+            {
+                currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
+                min_cost   = cost;
+                min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
+            }
+        }
+    }
+
+    if (blkidx == 0)
+    {
+        encvid->i4_sad = min_sad;
+    }
+    else
+    {
+        encvid->i4_sad += min_sad;
+    }
+
+    return min_cost;
+}
+
+int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
+{
+    int dcOnlyPredictionFlag;
+    AVCMacroblock *currMB = video->currMB;
+    int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
+
+
+    dcOnlyPredictionFlag = 0;
+    if (blkidx&0x3)
+    {
+        intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
+    }
+    else /* for blk 0, 4, 8, 12 */
+    {
+        if (video->intraAvailA)
+        {
+            if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
+            {
+                intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
+            }
+            else
+            {
+                intra4x4PredModeA = AVC_I4_DC;
+            }
+        }
+        else
+        {
+            dcOnlyPredictionFlag = 1;
+            goto PRED_RESULT_READY;  // skip below
+        }
+    }
+
+    if (blkidx >> 2)
+    {
+        intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
+    }
+    else /* block 0, 1, 2, 3 */
+    {
+        if (video->intraAvailB)
+        {
+            if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
+            {
+                intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
+            }
+            else
+            {
+                intra4x4PredModeB = AVC_I4_DC;
+            }
+        }
+        else
+        {
+            dcOnlyPredictionFlag = 1;
+        }
+    }
+
+PRED_RESULT_READY:
+    if (dcOnlyPredictionFlag)
+    {
+        intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
+    }
+
+    predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
+
+    return predIntra4x4PredMode;
+}
+
+void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
+{
+    int k;
+    int16 res[16], *pres;
+    int m0, m1, m2, m3, tmp1;
+    int satd = 0;
+
+    pres = res;
+    // horizontal transform
+    k = 4;
+    while (k > 0)
+    {
+        m0 = org[0] - pred[0];
+        m3 = org[3] - pred[3];
+        m0 += m3;
+        m3 = m0 - (m3 << 1);
+        m1 = org[1] - pred[1];
+        m2 = org[2] - pred[2];
+        m1 += m2;
+        m2 = m1 - (m2 << 1);
+        pres[0] = m0 + m1;
+        pres[2] = m0 - m1;
+        pres[1] = m2 + m3;
+        pres[3] = m3 - m2;
+
+        org += org_pitch;
+        pres += 4;
+        pred += 4;
+        k--;
+    }
+    /* vertical transform */
+    pres = res;
+    k = 4;
+    while (k > 0)
+    {
+        m0 = pres[0];
+        m3 = pres[12];
+        m0 += m3;
+        m3 = m0 - (m3 << 1);
+        m1 = pres[4];
+        m2 = pres[8];
+        m1 += m2;
+        m2 = m1 - (m2 << 1);
+        pres[0] = m0 + m1;
+        pres[8] = m0 - m1;
+        pres[4] = m2 + m3;
+        pres[12] = m3 - m2;
+
+        pres++;
+        k--;
+
+    }
+
+    pres = res;
+    k = 4;
+    while (k > 0)
+    {
+        tmp1 = *pres++;
+        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        k--;
+    }
+
+    satd = (satd + 1) >> 1;
+    *cost += satd;
+
+    return ;
+}
+
+void chroma_intra_search(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCPictureData *currPic = video->currPic;
+
+    int x_pos = video->mb_x << 3;
+    int y_pos = video->mb_y << 3;
+    int pitch = currPic->pitch >> 1;
+    int offset = y_pos * pitch + x_pos;
+
+    uint8 *comp_ref_x, *comp_ref_y, *pred;
+    int  sum_x0, sum_x1, sum_y0, sum_y1;
+    int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
+    uint32 pred_a, pred_b, pred_c, pred_d;
+    int i, j, component;
+    int a_16, b, c, factor_c, topleft;
+    int H, V, value;
+    uint8 *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
+
+    uint8 *curCb = currPic->Scb + offset;
+    uint8 *curCr = currPic->Scr + offset;
+
+    uint8 *orgCb, *orgCr;
+    AVCFrameIO *currInput = encvid->currInput;
+    AVCMacroblock *currMB = video->currMB;
+    int org_pitch;
+    int cost, mincost;
+
+    /* evaluate DC mode */
+    if (video->intraAvailB & video->intraAvailA)
+    {
+        comp_ref_x = curCb - pitch;
+        comp_ref_y = curCb - 1;
+
+        for (i = 0; i < 2; i++)
+        {
+            pred_a = *((uint32*)comp_ref_x);
+            comp_ref_x += 4;
+            pred_b = (pred_a >> 8) & 0xFF00FF;
+            pred_a &= 0xFF00FF;
+            pred_a += pred_b;
+            pred_a += (pred_a >> 16);
+            sum_x0 = pred_a & 0xFFFF;
+
+            pred_a = *((uint32*)comp_ref_x);
+            pred_b = (pred_a >> 8) & 0xFF00FF;
+            pred_a &= 0xFF00FF;
+            pred_a += pred_b;
+            pred_a += (pred_a >> 16);
+            sum_x1 = pred_a & 0xFFFF;
+
+            pred_1[i] = (sum_x1 + 2) >> 2;
+
+            sum_y0 = *comp_ref_y;
+            sum_y0 += *(comp_ref_y += pitch);
+            sum_y0 += *(comp_ref_y += pitch);
+            sum_y0 += *(comp_ref_y += pitch);
+
+            sum_y1 = *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+
+            pred_2[i] = (sum_y1 + 2) >> 2;
+
+            pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
+            pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
+
+            comp_ref_x = curCr - pitch;
+            comp_ref_y = curCr - 1;
+        }
+    }
+
+    else if (video->intraAvailA)
+    {
+        comp_ref_y = curCb - 1;
+        for (i = 0; i < 2; i++)
+        {
+            sum_y0 = *comp_ref_y;
+            sum_y0 += *(comp_ref_y += pitch);
+            sum_y0 += *(comp_ref_y += pitch);
+            sum_y0 += *(comp_ref_y += pitch);
+
+            sum_y1 = *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+            sum_y1 += *(comp_ref_y += pitch);
+
+            pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
+            pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
+
+            comp_ref_y = curCr - 1;
+        }
+    }
+    else if (video->intraAvailB)
+    {
+        comp_ref_x = curCb - pitch;
+        for (i = 0; i < 2; i++)
+        {
+            pred_a = *((uint32*)comp_ref_x);
+            comp_ref_x += 4;
+            pred_b = (pred_a >> 8) & 0xFF00FF;
+            pred_a &= 0xFF00FF;
+            pred_a += pred_b;
+            pred_a += (pred_a >> 16);
+            sum_x0 = pred_a & 0xFFFF;
+
+            pred_a = *((uint32*)comp_ref_x);
+            pred_b = (pred_a >> 8) & 0xFF00FF;
+            pred_a &= 0xFF00FF;
+            pred_a += pred_b;
+            pred_a += (pred_a >> 16);
+            sum_x1 = pred_a & 0xFFFF;
+
+            pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
+            pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
+
+            comp_ref_x = curCr - pitch;
+        }
+    }
+    else
+    {
+        pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
+                                                pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
+    }
+
+    pred = encvid->pred_ic[AVC_IC_DC];
+
+    pred_a = pred_0[0];
+    pred_b = pred_1[0];
+    pred_a |= (pred_a << 8);
+    pred_a |= (pred_a << 16);
+    pred_b |= (pred_b << 8);
+    pred_b |= (pred_b << 16);
+
+    pred_c = pred_0[1];
+    pred_d = pred_1[1];
+    pred_c |= (pred_c << 8);
+    pred_c |= (pred_c << 16);
+    pred_d |= (pred_d << 8);
+    pred_d |= (pred_d << 16);
+
+
+    for (j = 0; j < 4; j++) /* 4 lines */
+    {
+        *((uint32*)pred) = pred_a;
+        *((uint32*)(pred + 4)) = pred_b;
+        *((uint32*)(pred + 8)) = pred_c;
+        *((uint32*)(pred + 12)) = pred_d;
+        pred += 16; /* move to the next line */
+    }
+
+    pred_a = pred_2[0];
+    pred_b = pred_3[0];
+    pred_a |= (pred_a << 8);
+    pred_a |= (pred_a << 16);
+    pred_b |= (pred_b << 8);
+    pred_b |= (pred_b << 16);
+
+    pred_c = pred_2[1];
+    pred_d = pred_3[1];
+    pred_c |= (pred_c << 8);
+    pred_c |= (pred_c << 16);
+    pred_d |= (pred_d << 8);
+    pred_d |= (pred_d << 16);
+
+    for (j = 0; j < 4; j++) /* 4 lines */
+    {
+        *((uint32*)pred) = pred_a;
+        *((uint32*)(pred + 4)) = pred_b;
+        *((uint32*)(pred + 8)) = pred_c;
+        *((uint32*)(pred + 12)) = pred_d;
+        pred += 16; /* move to the next line */
+    }
+
+    /* predict horizontal mode */
+    if (video->intraAvailA)
+    {
+        comp_ref_y = curCb - 1;
+        comp_ref_x = curCr - 1;
+        pred = encvid->pred_ic[AVC_IC_Horizontal];
+
+        for (i = 4; i < 6; i++)
+        {
+            for (j = 0; j < 4; j++)
+            {
+                pred_a = *comp_ref_y;
+                comp_ref_y += pitch;
+                pred_a |= (pred_a << 8);
+                pred_a |= (pred_a << 16);
+                *((uint32*)pred) = pred_a;
+                *((uint32*)(pred + 4)) = pred_a;
+
+                pred_a = *comp_ref_x;
+                comp_ref_x += pitch;
+                pred_a |= (pred_a << 8);
+                pred_a |= (pred_a << 16);
+                *((uint32*)(pred + 8)) = pred_a;
+                *((uint32*)(pred + 12)) = pred_a;
+
+                pred += 16;
+            }
+        }
+    }
+
+    /* vertical mode */
+    if (video->intraAvailB)
+    {
+        comp_ref_x = curCb - pitch;
+        comp_ref_y = curCr - pitch;
+        pred = encvid->pred_ic[AVC_IC_Vertical];
+
+        pred_a = *((uint32*)comp_ref_x);
+        pred_b = *((uint32*)(comp_ref_x + 4));
+        pred_c = *((uint32*)comp_ref_y);
+        pred_d = *((uint32*)(comp_ref_y + 4));
+
+        for (j = 0; j < 8; j++)
+        {
+            *((uint32*)pred) = pred_a;
+            *((uint32*)(pred + 4)) = pred_b;
+            *((uint32*)(pred + 8)) = pred_c;
+            *((uint32*)(pred + 12)) = pred_d;
+            pred += 16;
+        }
+    }
+
+    /* Intra_Chroma_Plane */
+    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+    {
+        comp_ref_x = curCb - pitch;
+        comp_ref_y = curCb - 1;
+        topleft = curCb[-pitch-1];
+
+        pred = encvid->pred_ic[AVC_IC_Plane];
+        for (component = 0; component < 2; component++)
+        {
+            H = V = 0;
+            comp_ref_x0 = comp_ref_x + 4;
+            comp_ref_x1 = comp_ref_x + 2;
+            comp_ref_y0 = comp_ref_y + (pitch << 2);
+            comp_ref_y1 = comp_ref_y + (pitch << 1);
+            for (i = 1; i < 4; i++)
+            {
+                H += i * (*comp_ref_x0++ - *comp_ref_x1--);
+                V += i * (*comp_ref_y0 - *comp_ref_y1);
+                comp_ref_y0 += pitch;
+                comp_ref_y1 -= pitch;
+            }
+            H += i * (*comp_ref_x0++ - topleft);
+            V += i * (*comp_ref_y0 - *comp_ref_y1);
+
+            a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
+            b = (17 * H + 16) >> 5;
+            c = (17 * V + 16) >> 5;
+
+            pred_a = 0;
+            for (i = 4; i < 6; i++)
+            {
+                for (j = 0; j < 4; j++)
+                {
+                    factor_c = a_16 + c * (pred_a++ - 3);
+
+                    factor_c -= 3 * b;
+
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b = value;
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 8);
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 16);
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 24);
+                    *((uint32*)pred) = pred_b;
+
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b = value;
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 8);
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 16);
+                    value = factor_c >> 5;
+                    factor_c += b;
+                    CLIP_RESULT(value)
+                    pred_b |= (value << 24);
+                    *((uint32*)(pred + 4)) = pred_b;
+                    pred += 16;
+                }
+            }
+
+            pred -= 120; /* point to cr */
+            comp_ref_x = curCr - pitch;
+            comp_ref_y = curCr - 1;
+            topleft = curCr[-pitch-1];
+        }
+    }
+
+    /* now evaluate it */
+
+    org_pitch = (currInput->pitch) >> 1;
+    offset = x_pos + y_pos * org_pitch;
+
+    orgCb = currInput->YCbCr[1] + offset;
+    orgCr = currInput->YCbCr[2] + offset;
+
+    mincost = 0x7fffffff;
+    cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
+    if (cost < mincost)
+    {
+        mincost = cost;
+        currMB->intra_chroma_pred_mode = AVC_IC_DC;
+    }
+
+    if (video->intraAvailA)
+    {
+        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
+        if (cost < mincost)
+        {
+            mincost = cost;
+            currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
+        }
+    }
+
+    if (video->intraAvailB)
+    {
+        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
+        if (cost < mincost)
+        {
+            mincost = cost;
+            currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
+        }
+    }
+
+    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+    {
+        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
+        if (cost < mincost)
+        {
+            mincost = cost;
+            currMB->intra_chroma_pred_mode = AVC_IC_Plane;
+        }
+    }
+
+
+    return ;
+}
+
+
+int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
+{
+    int cost;
+    /* first take difference between orgCb, orgCr and pred */
+    int16 res[128], *pres; // residue
+    int m0, m1, m2, m3, tmp1;
+    int j, k;
+
+    pres = res;
+    org_pitch -= 8;
+    // horizontal transform
+    for (j = 0; j < 8; j++)
+    {
+        k = 2;
+        while (k > 0)
+        {
+            m0 = orgCb[0] - pred[0];
+            m3 = orgCb[3] - pred[3];
+            m0 += m3;
+            m3 = m0 - (m3 << 1);
+            m1 = orgCb[1] - pred[1];
+            m2 = orgCb[2] - pred[2];
+            m1 += m2;
+            m2 = m1 - (m2 << 1);
+            pres[0] = m0 + m1;
+            pres[2] = m0 - m1;
+            pres[1] = m2 + m3;
+            pres[3] = m3 - m2;
+
+            orgCb += 4;
+            pres += 4;
+            pred += 4;
+            k--;
+        }
+        orgCb += org_pitch;
+        k = 2;
+        while (k > 0)
+        {
+            m0 = orgCr[0] - pred[0];
+            m3 = orgCr[3] - pred[3];
+            m0 += m3;
+            m3 = m0 - (m3 << 1);
+            m1 = orgCr[1] - pred[1];
+            m2 = orgCr[2] - pred[2];
+            m1 += m2;
+            m2 = m1 - (m2 << 1);
+            pres[0] = m0 + m1;
+            pres[2] = m0 - m1;
+            pres[1] = m2 + m3;
+            pres[3] = m3 - m2;
+
+            orgCr += 4;
+            pres += 4;
+            pred += 4;
+            k--;
+        }
+        orgCr += org_pitch;
+    }
+
+    /* vertical transform */
+    for (j = 0; j < 2; j++)
+    {
+        pres = res + (j << 6);
+        k = 16;
+        while (k > 0)
+        {
+            m0 = pres[0];
+            m3 = pres[3<<4];
+            m0 += m3;
+            m3 = m0 - (m3 << 1);
+            m1 = pres[1<<4];
+            m2 = pres[2<<4];
+            m1 += m2;
+            m2 = m1 - (m2 << 1);
+            pres[0] = m0 + m1;
+            pres[2<<4] = m0 - m1;
+            pres[1<<4] = m2 + m3;
+            pres[3<<4] = m3 - m2;
+
+            pres++;
+            k--;
+        }
+    }
+
+    /* now sum of absolute value */
+    pres = res;
+    cost = 0;
+    k = 128;
+    while (k > 0)
+    {
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        tmp1 = *pres++;
+        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+        k -= 8;
+        if (cost > min_cost) /* early drop out */
+        {
+            return cost;
+        }
+    }
+
+    return cost;
+}
+
+
+
+///////////////////////////////// old code, unused
+/* find the best intra mode based on original (unencoded) frame */
+/* output is
+    currMB->mb_intra, currMB->mbMode,
+    currMB->i16Mode  (if currMB->mbMode == AVC_I16)
+    currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
+
+#ifdef FIXED_INTRAPRED_MODE
+void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
+{
+    (void)(mbNum);
+
+    AVCCommonObj *video = encvid->common;
+    int indx, block_x, block_y;
+
+    video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+
+    if (!video->currPicParams->constrained_intra_pred_flag)
+    {
+        video->intraAvailA = video->mbAvailA;
+        video->intraAvailB = video->mbAvailB;
+        video->intraAvailC = video->mbAvailC;
+        video->intraAvailD = video->mbAvailD;
+    }
+    else
+    {
+        if (video->mbAvailA)
+        {
+            video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+        }
+        if (video->mbAvailB)
+        {
+            video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+        }
+        if (video->mbAvailC)
+        {
+            video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+        }
+        if (video->mbAvailD)
+        {
+            video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+        }
+    }
+
+    currMB->mb_intra = TRUE;
+    currMB->mbMode = FIXED_INTRAPRED_MODE;
+
+    if (currMB->mbMode == AVC_I16)
+    {
+        currMB->i16Mode = FIXED_I16_MODE;
+
+        if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
+        {
+            currMB->i16Mode = AVC_I16_DC;
+        }
+
+        if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
+        {
+            currMB->i16Mode = AVC_I16_DC;
+        }
+
+        if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
+        {
+            currMB->i16Mode = AVC_I16_DC;
+        }
+    }
+    else //if(currMB->mbMode == AVC_I4)
+    {
+        for (indx = 0; indx < 16; indx++)
+        {
+            block_x = blkIdx2blkX[indx];
+            block_y = blkIdx2blkY[indx];
+
+            currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
+
+            if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
+                    (block_y == 0 && !video->intraAvailB))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
+                    !((block_y && block_x)
+                      || (block_y && video->intraAvailA)
+                      || (block_x && video->intraAvailB)
+                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
+                    !((block_y && block_x)
+                      || (block_y && video->intraAvailA)
+                      || (block_x && video->intraAvailB)
+                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
+                    !((block_y && block_x)
+                      || (block_y && video->intraAvailA)
+                      || (block_x && video->intraAvailB)
+                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
+                    (block_y == 0 && !video->intraAvailB))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+
+            if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
+            {
+                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+            }
+        }
+    }
+
+    currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
+
+    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
+    {
+        currMB->intra_chroma_pred_mode = AVC_IC_DC;
+    }
+
+    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
+    {
+        currMB->intra_chroma_pred_mode = AVC_IC_DC;
+    }
+
+    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
+    {
+        currMB->intra_chroma_pred_mode = AVC_IC_DC;
+    }
+
+    /* also reset the motion vectors */
+    /* set MV and Ref_Idx codes of Intra blocks in P-slices */
+    memset(currMB->mvL0, 0, sizeof(int32)*16);
+    currMB->ref_idx_L0[0] = -1;
+    currMB->ref_idx_L0[1] = -1;
+    currMB->ref_idx_L0[2] = -1;
+    currMB->ref_idx_L0[3] = -1;
+
+    // output from this function, currMB->mbMode should be set to either
+    // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
+    return ;
+}
+#else // faster combined prediction+SAD calculation
+void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCFrameIO *currInput = encvid->currInput;
+    uint8 *curL, *curCb, *curCr;
+    uint8 *comp, *pred_block;
+    int block_x, block_y, offset;
+    uint sad, sad4, sadI4, sadI16;
+    int component, SubBlock_indx, temp;
+    int pitch = video->currPic->pitch;
+
+    /* calculate the cost of each intra prediction mode  and compare to the
+    inter mode */
+    /* full search for all intra prediction */
+    offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
+    curL = currInput->YCbCr[0] + offset;
+    pred_block = video->pred_block + 84;
+
+    /* Assuming that InitNeighborAvailability has been called prior to this function */
+    video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+
+    if (!video->currPicParams->constrained_intra_pred_flag)
+    {
+        video->intraAvailA = video->mbAvailA;
+        video->intraAvailB = video->mbAvailB;
+        video->intraAvailC = video->mbAvailC;
+        video->intraAvailD = video->mbAvailD;
+    }
+    else
+    {
+        if (video->mbAvailA)
+        {
+            video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+        }
+        if (video->mbAvailB)
+        {
+            video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+        }
+        if (video->mbAvailC)
+        {
+            video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+        }
+        if (video->mbAvailD)
+        {
+            video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+        }
+    }
+
+    /* currently we're doing exhaustive search. Smart search will be used later */
+
+    /* I16 modes */
+    curL = currInput->YCbCr[0] + offset;
+    video->pintra_pred_top = curL - pitch;
+    video->pintra_pred_left = curL - 1;
+    if (video->mb_y)
+    {
+        video->intra_pred_topleft = *(curL - pitch - 1);
+    }
+
+    /* Intra_16x16_Vertical */
+    sadI16 = 65536;
+    /* check availability of top */
+    if (video->intraAvailB)
+    {
+        sad = SAD_I16_Vert(video, curL, sadI16);
+
+        if (sad < sadI16)
+        {
+            sadI16 = sad;
+            currMB->i16Mode = AVC_I16_Vertical;
+        }
+    }
+    /* Intra_16x16_Horizontal */
+    /* check availability of left */
+    if (video->intraAvailA)
+    {
+        sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
+
+        if (sad < sadI16)
+        {
+            sadI16 = sad;
+            currMB->i16Mode = AVC_I16_Horizontal;
+        }
+    }
+
+    /* Intra_16x16_DC, default mode */
+    sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
+    if (sad < sadI16)
+    {
+        sadI16 = sad;
+        currMB->i16Mode = AVC_I16_DC;
+    }
+
+    /* Intra_16x16_Plane */
+    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+    {
+        sad = SAD_I16_Plane(video, curL, sadI16);
+
+        if (sad < sadI16)
+        {
+            sadI16 = sad;
+            currMB->i16Mode = AVC_I16_Plane;
+        }
+    }
+
+    sadI16 >>= 1;  /* before comparison */
+
+    /* selection between intra4, intra16 or inter mode */
+    if (sadI16 < encvid->min_cost)
+    {
+        currMB->mb_intra = TRUE;
+        currMB->mbMode = AVC_I16;
+        encvid->min_cost = sadI16;
+    }
+
+    if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
+    {
+        /* Note that we might be able to guess the type of prediction from
+        the luma prediction type */
+
+        /* now search for the best chroma intra prediction */
+        offset = (offset >> 2) + (video->mb_x << 2);
+        curCb = currInput->YCbCr[1] + offset;
+        curCr = currInput->YCbCr[2] + offset;
+
+        pitch >>= 1;
+        video->pintra_pred_top_cb = curCb - pitch;
+        video->pintra_pred_left_cb = curCb - 1;
+        video->pintra_pred_top_cr = curCr - pitch;
+        video->pintra_pred_left_cr = curCr - 1;
+
+        if (video->mb_y)
+        {
+            video->intra_pred_topleft_cb = *(curCb - pitch - 1);
+            video->intra_pred_topleft_cr = *(curCr - pitch - 1);
+        }
+
+        /* Intra_Chroma_DC */
+        sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
+        currMB->intra_chroma_pred_mode = AVC_IC_DC;
+
+        /* Intra_Chroma_Horizontal */
+        if (video->intraAvailA)
+        {
+            /* check availability of left */
+            sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
+            if (sad < sad4)
+            {
+                sad4 = sad;
+                currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
+            }
+        }
+
+        /* Intra_Chroma_Vertical */
+        if (video->intraAvailB)
+        {
+            /* check availability of top */
+            sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
+
+            if (sad < sad4)
+            {
+                sad4 = sad;
+                currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
+            }
+        }
+
+        /* Intra_Chroma_Plane */
+        if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+        {
+            /* check availability of top and left */
+            Intra_Chroma_Plane(video, pitch);
+
+            sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
+
+            if (sad < sad4)
+            {
+                sad4 = sad;
+                currMB->intra_chroma_pred_mode = AVC_IC_Plane;
+            }
+        }
+
+        /* also reset the motion vectors */
+        /* set MV and Ref_Idx codes of Intra blocks in P-slices */
+        memset(currMB->mvL0, 0, sizeof(int32)*16);
+        memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
+
+    }
+
+    // output from this function, currMB->mbMode should be set to either
+    // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
+
+    return ;
+}
+#endif
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp
new file mode 100644
index 0000000..ac62d78
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp
@@ -0,0 +1,2156 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_int.h"
+
+
+#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
+                 x = 0xFF & (~(x>>31));}
+
+/* (blkwidth << 2) + (dy << 1) + dx */
+static void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) =
+{
+    &eChromaFullMC_SIMD,
+    &eChromaHorizontalMC_SIMD,
+    &eChromaVerticalMC_SIMD,
+    &eChromaDiagonalMC_SIMD,
+    &eChromaFullMC_SIMD,
+    &eChromaHorizontalMC2_SIMD,
+    &eChromaVerticalMC2_SIMD,
+    &eChromaDiagonalMC2_SIMD
+};
+/* Perform motion prediction and compensation with residue if exist. */
+void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video)
+{
+    (void)(encvid);
+
+    AVCMacroblock *currMB = video->currMB;
+    AVCPictureData *currPic = video->currPic;
+    int mbPartIdx, subMbPartIdx;
+    int ref_idx;
+    int offset_MbPart_indx = 0;
+    int16 *mv;
+    uint32 x_pos, y_pos;
+    uint8 *curL, *curCb, *curCr;
+    uint8 *ref_l, *ref_Cb, *ref_Cr;
+    uint8 *predBlock, *predCb, *predCr;
+    int block_x, block_y, offset_x, offset_y, offsetP, offset;
+    int x_position = (video->mb_x << 4);
+    int y_position = (video->mb_y << 4);
+    int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx;
+    int picWidth = currPic->width;
+    int picPitch = currPic->pitch;
+    int picHeight = currPic->height;
+    uint32 tmp_word;
+
+    tmp_word = y_position * picPitch;
+    curL = currPic->Sl + tmp_word + x_position;
+    offset = (tmp_word >> 2) + (x_position >> 1);
+    curCb = currPic->Scb + offset;
+    curCr = currPic->Scr + offset;
+
+    predBlock = curL;
+    predCb = curCb;
+    predCr = curCr;
+
+    GetMotionVectorPredictor(video, 1);
+
+    for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+    {
+        MbHeight = currMB->SubMbPartHeight[mbPartIdx];
+        MbWidth = currMB->SubMbPartWidth[mbPartIdx];
+        mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1);
+        mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1;
+        ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X];
+        offset_indx = 0;
+
+        ref_l = video->RefPicList0[ref_idx]->Sl;
+        ref_Cb = video->RefPicList0[ref_idx]->Scb;
+        ref_Cr = video->RefPicList0[ref_idx]->Scr;
+
+        for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+        {
+            block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1);
+            block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1);
+            mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2));
+            offset_x = x_position + (block_x << 2);
+            offset_y = y_position + (block_y << 2);
+            x_pos = (offset_x << 2) + *mv++;   /*quarter pel */
+            y_pos = (offset_y << 2) + *mv;   /*quarter pel */
+
+            //offset = offset_y * currPic->width;
+            //offsetC = (offset >> 2) + (offset_x >> 1);
+            offsetP = (block_y << 2) * picPitch + (block_x << 2);
+            eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos,
+                            /*comp_Sl + offset + offset_x,*/
+                            predBlock + offsetP, picPitch, MbWidth, MbHeight);
+
+            offsetP = (block_y * picWidth) + (block_x << 1);
+            eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
+                              /*comp_Scb +  offsetC,*/
+                              predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
+            eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
+                              /*comp_Scr +  offsetC,*/
+                              predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
+
+            offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3;
+        }
+        offset_MbPart_indx = currMB->MbPartWidth >> 4;
+    }
+
+    return ;
+}
+
+
+/* preform the actual  motion comp here */
+void eLumaMotionComp(uint8 *ref, int picpitch, int picheight,
+                     int x_pos, int y_pos,
+                     uint8 *pred, int pred_pitch,
+                     int blkwidth, int blkheight)
+{
+    (void)(picheight);
+
+    int dx, dy;
+    int temp2[21][21]; /* for intermediate results */
+    uint8 *ref2;
+
+    dx = x_pos & 3;
+    dy = y_pos & 3;
+    x_pos = x_pos >> 2;  /* round it to full-pel resolution */
+    y_pos = y_pos >> 2;
+
+    /* perform actual motion compensation */
+    if (dx == 0 && dy == 0)
+    {  /* fullpel position *//* G */
+
+        ref += y_pos * picpitch + x_pos;
+
+        eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
+
+    }   /* other positions */
+    else  if (dy == 0)
+    { /* no vertical interpolation *//* a,b,c*/
+
+        ref += y_pos * picpitch + x_pos;
+
+        eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx);
+    }
+    else if (dx == 0)
+    { /*no horizontal interpolation *//* d,h,n */
+
+        ref += y_pos * picpitch + x_pos;
+
+        eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy);
+    }
+    else if (dy == 2)
+    {  /* horizontal cross *//* i, j, k */
+
+        ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */
+
+        eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight);
+
+        eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
+    }
+    else if (dx == 2)
+    { /* vertical cross */ /* f,q */
+
+        ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */
+
+        eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5);
+        eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
+    }
+    else
+    { /* diagonal *//* e,g,p,r */
+
+        ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos;
+
+        ref += (y_pos * picpitch) + x_pos + (dx / 2);
+
+        eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
+    }
+
+    return ;
+}
+
+void eCreateAlign(uint8 *ref, int picpitch, int y_pos,
+                  uint8 *out, int blkwidth, int blkheight)
+{
+    int i, j;
+    int offset, out_offset;
+    uint32 prev_pix, result, pix1, pix2, pix4;
+
+    ref += y_pos * picpitch;// + x_pos;
+    out_offset = 24 - blkwidth;
+
+    //switch(x_pos&0x3){
+    switch (((uint32)ref)&0x3)
+    {
+        case 1:
+            offset =  picpitch - blkwidth - 3;
+            for (j = 0; j < blkheight; j++)
+            {
+                pix1 = *ref++;
+                pix2 = *((uint16*)ref);
+                ref += 2;
+                result = (pix2 << 8) | pix1;
+
+                for (i = 3; i < blkwidth; i += 4)
+                {
+                    pix4 = *((uint32*)ref);
+                    ref += 4;
+                    prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */
+                    result |= prev_pix;
+                    *((uint32*)out) = result;  /* write 4 bytes */
+                    out += 4;
+                    result = pix4 >> 8; /* for the next loop */
+                }
+                ref += offset;
+                out += out_offset;
+            }
+            break;
+        case 2:
+            offset =  picpitch - blkwidth - 2;
+            for (j = 0; j < blkheight; j++)
+            {
+                result = *((uint16*)ref);
+                ref += 2;
+                for (i = 2; i < blkwidth; i += 4)
+                {
+                    pix4 = *((uint32*)ref);
+                    ref += 4;
+                    prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */
+                    result |= prev_pix;
+                    *((uint32*)out) = result;  /* write 4 bytes */
+                    out += 4;
+                    result = pix4 >> 16; /* for the next loop */
+                }
+                ref += offset;
+                out += out_offset;
+            }
+            break;
+        case 3:
+            offset =  picpitch - blkwidth - 1;
+            for (j = 0; j < blkheight; j++)
+            {
+                result = *ref++;
+                for (i = 1; i < blkwidth; i += 4)
+                {
+                    pix4 = *((uint32*)ref);
+                    ref += 4;
+                    prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */
+                    result |= prev_pix;
+                    *((uint32*)out) = result;  /* write 4 bytes */
+                    out += 4;
+                    result = pix4 >> 24; /* for the next loop */
+                }
+                ref += offset;
+                out += out_offset;
+            }
+            break;
+    }
+}
+
+void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+                    int blkwidth, int blkheight, int dx)
+{
+    uint8 *p_ref;
+    uint32 *p_cur;
+    uint32 tmp, pkres;
+    int result, curr_offset, ref_offset;
+    int j;
+    int32 r0, r1, r2, r3, r4, r5;
+    int32 r13, r6;
+
+    p_cur = (uint32*)out; /* assume it's word aligned */
+    curr_offset = (outpitch - blkwidth) >> 2;
+    p_ref = in;
+    ref_offset = inpitch - blkwidth;
+
+    if (dx&1)
+    {
+        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
+        p_ref -= 2;
+        r13 = 0;
+        for (j = blkheight; j > 0; j--)
+        {
+            tmp = (uint32)(p_ref + blkwidth);
+            r0 = p_ref[0];
+            r1 = p_ref[2];
+            r0 |= (r1 << 16);           /* 0,c,0,a */
+            r1 = p_ref[1];
+            r2 = p_ref[3];
+            r1 |= (r2 << 16);           /* 0,d,0,b */
+            while ((uint32)p_ref < tmp)
+            {
+                r2 = *(p_ref += 4); /* move pointer to e */
+                r3 = p_ref[2];
+                r2 |= (r3 << 16);           /* 0,g,0,e */
+                r3 = p_ref[1];
+                r4 = p_ref[3];
+                r3 |= (r4 << 16);           /* 0,h,0,f */
+
+                r4 = r0 + r3;       /* c+h, a+f */
+                r5 = r0 + r1;   /* c+d, a+b */
+                r6 = r2 + r3;   /* g+h, e+f */
+                r5 >>= 16;
+                r5 |= (r6 << 16);   /* e+f, c+d */
+                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
+                r4 += 0x100010; /* +16, +16 */
+                r5 = r1 + r2;       /* d+g, b+e */
+                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+                r4 >>= 5;
+                r13 |= r4;      /* check clipping */
+
+                r5 = p_ref[dx+2];
+                r6 = p_ref[dx+4];
+                r5 |= (r6 << 16);
+                r4 += r5;
+                r4 += 0x10001;
+                r4 = (r4 >> 1) & 0xFF00FF;
+
+                r5 = p_ref[4];  /* i */
+                r6 = (r5 << 16);
+                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+                r5 += r1;       /* d+i, b+g */ /* r5 not free */
+                r1 >>= 16;
+                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+                r1 += r2;       /* f+g, d+e */
+                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
+                r0 >>= 16;
+                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+                r0 += r3;       /* e+h, c+f */
+                r5 += 0x100010; /* 16,16 */
+                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+                r5 >>= 5;
+                r13 |= r5;      /* check clipping */
+
+                r0 = p_ref[dx+3];
+                r1 = p_ref[dx+5];
+                r0 |= (r1 << 16);
+                r5 += r0;
+                r5 += 0x10001;
+                r5 = (r5 >> 1) & 0xFF00FF;
+
+                r4 |= (r5 << 8);    /* pack them together */
+                *p_cur++ = r4;
+                r1 = r3;
+                r0 = r2;
+            }
+            p_cur += curr_offset; /* move to the next line */
+            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
+
+            if (r13&0xFF000700) /* need clipping */
+            {
+                /* move back to the beginning of the line */
+                p_ref -= (ref_offset + blkwidth);   /* input */
+                p_cur -= (outpitch >> 2);
+
+                tmp = (uint32)(p_ref + blkwidth);
+                for (; (uint32)p_ref < tmp;)
+                {
+
+                    r0 = *p_ref++;
+                    r1 = *p_ref++;
+                    r2 = *p_ref++;
+                    r3 = *p_ref++;
+                    r4 = *p_ref++;
+                    /* first pixel */
+                    r5 = *p_ref++;
+                    result = (r0 + r5);
+                    r0 = (r1 + r4);
+                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                    r0 = (r2 + r3);
+                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    /* 3/4 pel,  no need to clip */
+                    result = (result + p_ref[dx] + 1);
+                    pkres = (result >> 1) ;
+                    /* second pixel */
+                    r0 = *p_ref++;
+                    result = (r1 + r0);
+                    r1 = (r2 + r5);
+                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                    r1 = (r3 + r4);
+                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    /* 3/4 pel,  no need to clip */
+                    result = (result + p_ref[dx] + 1);
+                    result = (result >> 1);
+                    pkres  |= (result << 8);
+                    /* third pixel */
+                    r1 = *p_ref++;
+                    result = (r2 + r1);
+                    r2 = (r3 + r0);
+                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                    r2 = (r4 + r5);
+                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    /* 3/4 pel,  no need to clip */
+                    result = (result + p_ref[dx] + 1);
+                    result = (result >> 1);
+                    pkres  |= (result << 16);
+                    /* fourth pixel */
+                    r2 = *p_ref++;
+                    result = (r3 + r2);
+                    r3 = (r4 + r1);
+                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                    r3 = (r5 + r0);
+                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    /* 3/4 pel,  no need to clip */
+                    result = (result + p_ref[dx] + 1);
+                    result = (result >> 1);
+                    pkres  |= (result << 24);
+                    *p_cur++ = pkres; /* write 4 pixels */
+                    p_ref -= 5;  /* offset back to the middle of filter */
+                }
+                p_cur += curr_offset;  /* move to the next line */
+                p_ref += ref_offset;    /* move to the next line */
+            }
+        }
+    }
+    else
+    {
+        p_ref -= 2;
+        r13 = 0;
+        for (j = blkheight; j > 0; j--)
+        {
+            tmp = (uint32)(p_ref + blkwidth);
+            r0 = p_ref[0];
+            r1 = p_ref[2];
+            r0 |= (r1 << 16);           /* 0,c,0,a */
+            r1 = p_ref[1];
+            r2 = p_ref[3];
+            r1 |= (r2 << 16);           /* 0,d,0,b */
+            while ((uint32)p_ref < tmp)
+            {
+                r2 = *(p_ref += 4); /* move pointer to e */
+                r3 = p_ref[2];
+                r2 |= (r3 << 16);           /* 0,g,0,e */
+                r3 = p_ref[1];
+                r4 = p_ref[3];
+                r3 |= (r4 << 16);           /* 0,h,0,f */
+
+                r4 = r0 + r3;       /* c+h, a+f */
+                r5 = r0 + r1;   /* c+d, a+b */
+                r6 = r2 + r3;   /* g+h, e+f */
+                r5 >>= 16;
+                r5 |= (r6 << 16);   /* e+f, c+d */
+                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
+                r4 += 0x100010; /* +16, +16 */
+                r5 = r1 + r2;       /* d+g, b+e */
+                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+                r4 >>= 5;
+                r13 |= r4;      /* check clipping */
+                r4 &= 0xFF00FF; /* mask */
+
+                r5 = p_ref[4];  /* i */
+                r6 = (r5 << 16);
+                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+                r5 += r1;       /* d+i, b+g */ /* r5 not free */
+                r1 >>= 16;
+                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+                r1 += r2;       /* f+g, d+e */
+                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
+                r0 >>= 16;
+                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+                r0 += r3;       /* e+h, c+f */
+                r5 += 0x100010; /* 16,16 */
+                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+                r5 >>= 5;
+                r13 |= r5;      /* check clipping */
+                r5 &= 0xFF00FF; /* mask */
+
+                r4 |= (r5 << 8);    /* pack them together */
+                *p_cur++ = r4;
+                r1 = r3;
+                r0 = r2;
+            }
+            p_cur += curr_offset; /* move to the next line */
+            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
+
+            if (r13&0xFF000700) /* need clipping */
+            {
+                /* move back to the beginning of the line */
+                p_ref -= (ref_offset + blkwidth);   /* input */
+                p_cur -= (outpitch >> 2);
+
+                tmp = (uint32)(p_ref + blkwidth);
+                for (; (uint32)p_ref < tmp;)
+                {
+
+                    r0 = *p_ref++;
+                    r1 = *p_ref++;
+                    r2 = *p_ref++;
+                    r3 = *p_ref++;
+                    r4 = *p_ref++;
+                    /* first pixel */
+                    r5 = *p_ref++;
+                    result = (r0 + r5);
+                    r0 = (r1 + r4);
+                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                    r0 = (r2 + r3);
+                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    pkres  = result;
+                    /* second pixel */
+                    r0 = *p_ref++;
+                    result = (r1 + r0);
+                    r1 = (r2 + r5);
+                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                    r1 = (r3 + r4);
+                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    pkres  |= (result << 8);
+                    /* third pixel */
+                    r1 = *p_ref++;
+                    result = (r2 + r1);
+                    r2 = (r3 + r0);
+                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                    r2 = (r4 + r5);
+                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    pkres  |= (result << 16);
+                    /* fourth pixel */
+                    r2 = *p_ref++;
+                    result = (r3 + r2);
+                    r3 = (r4 + r1);
+                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                    r3 = (r5 + r0);
+                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    pkres  |= (result << 24);
+                    *p_cur++ = pkres;   /* write 4 pixels */
+                    p_ref -= 5;
+                }
+                p_cur += curr_offset; /* move to the next line */
+                p_ref += ref_offset;
+            }
+        }
+    }
+
+    return ;
+}
+
+void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
+                    int blkwidth, int blkheight, int dx)
+{
+    int *p_ref;
+    uint32 *p_cur;
+    uint32 tmp, pkres;
+    int result, result2, curr_offset, ref_offset;
+    int j, r0, r1, r2, r3, r4, r5;
+
+    p_cur = (uint32*)out; /* assume it's word aligned */
+    curr_offset = (outpitch - blkwidth) >> 2;
+    p_ref = in;
+    ref_offset = inpitch - blkwidth;
+
+    if (dx&1)
+    {
+        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
+
+        for (j = blkheight; j > 0 ; j--)
+        {
+            tmp = (uint32)(p_ref + blkwidth);
+            for (; (uint32)p_ref < tmp;)
+            {
+
+                r0 = p_ref[-2];
+                r1 = p_ref[-1];
+                r2 = *p_ref++;
+                r3 = *p_ref++;
+                r4 = *p_ref++;
+                /* first pixel */
+                r5 = *p_ref++;
+                result = (r0 + r5);
+                r0 = (r1 + r4);
+                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                r0 = (r2 + r3);
+                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dx] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                pkres = (result >> 1);
+                /* second pixel */
+                r0 = *p_ref++;
+                result = (r1 + r0);
+                r1 = (r2 + r5);
+                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                r1 = (r3 + r4);
+                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dx] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                pkres  |= (result << 8);
+                /* third pixel */
+                r1 = *p_ref++;
+                result = (r2 + r1);
+                r2 = (r3 + r0);
+                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                r2 = (r4 + r5);
+                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dx] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                pkres  |= (result << 16);
+                /* fourth pixel */
+                r2 = *p_ref++;
+                result = (r3 + r2);
+                r3 = (r4 + r1);
+                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                r3 = (r5 + r0);
+                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dx] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                pkres  |= (result << 24);
+                *p_cur++ = pkres; /* write 4 pixels */
+                p_ref -= 3;  /* offset back to the middle of filter */
+            }
+            p_cur += curr_offset;  /* move to the next line */
+            p_ref += ref_offset;    /* move to the next line */
+        }
+    }
+    else
+    {
+        for (j = blkheight; j > 0 ; j--)
+        {
+            tmp = (uint32)(p_ref + blkwidth);
+            for (; (uint32)p_ref < tmp;)
+            {
+
+                r0 = p_ref[-2];
+                r1 = p_ref[-1];
+                r2 = *p_ref++;
+                r3 = *p_ref++;
+                r4 = *p_ref++;
+                /* first pixel */
+                r5 = *p_ref++;
+                result = (r0 + r5);
+                r0 = (r1 + r4);
+                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                r0 = (r2 + r3);
+                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                pkres  = result;
+                /* second pixel */
+                r0 = *p_ref++;
+                result = (r1 + r0);
+                r1 = (r2 + r5);
+                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                r1 = (r3 + r4);
+                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                pkres  |= (result << 8);
+                /* third pixel */
+                r1 = *p_ref++;
+                result = (r2 + r1);
+                r2 = (r3 + r0);
+                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                r2 = (r4 + r5);
+                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                pkres  |= (result << 16);
+                /* fourth pixel */
+                r2 = *p_ref++;
+                result = (r3 + r2);
+                r3 = (r4 + r1);
+                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                r3 = (r5 + r0);
+                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                pkres  |= (result << 24);
+                *p_cur++ = pkres; /* write 4 pixels */
+                p_ref -= 3;  /* offset back to the middle of filter */
+            }
+            p_cur += curr_offset;  /* move to the next line */
+            p_ref += ref_offset;    /* move to the next line */
+        }
+    }
+
+    return ;
+}
+
+void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
+                    int blkwidth, int blkheight)
+{
+    uint8 *p_ref;
+    int   *p_cur;
+    uint32 tmp;
+    int result, curr_offset, ref_offset;
+    int j, r0, r1, r2, r3, r4, r5;
+
+    p_cur = out;
+    curr_offset = (outpitch - blkwidth);
+    p_ref = in;
+    ref_offset = inpitch - blkwidth;
+
+    for (j = blkheight; j > 0 ; j--)
+    {
+        tmp = (uint32)(p_ref + blkwidth);
+        for (; (uint32)p_ref < tmp;)
+        {
+
+            r0 = p_ref[-2];
+            r1 = p_ref[-1];
+            r2 = *p_ref++;
+            r3 = *p_ref++;
+            r4 = *p_ref++;
+            /* first pixel */
+            r5 = *p_ref++;
+            result = (r0 + r5);
+            r0 = (r1 + r4);
+            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+            r0 = (r2 + r3);
+            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+            *p_cur++ = result;
+            /* second pixel */
+            r0 = *p_ref++;
+            result = (r1 + r0);
+            r1 = (r2 + r5);
+            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+            r1 = (r3 + r4);
+            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+            *p_cur++ = result;
+            /* third pixel */
+            r1 = *p_ref++;
+            result = (r2 + r1);
+            r2 = (r3 + r0);
+            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+            r2 = (r4 + r5);
+            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+            *p_cur++ = result;
+            /* fourth pixel */
+            r2 = *p_ref++;
+            result = (r3 + r2);
+            r3 = (r4 + r1);
+            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+            r3 = (r5 + r0);
+            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+            *p_cur++ = result;
+            p_ref -= 3; /* move back to the middle of the filter */
+        }
+        p_cur += curr_offset; /* move to the next line */
+        p_ref += ref_offset;
+    }
+
+    return ;
+}
+void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+                    int blkwidth, int blkheight, int dy)
+{
+    uint8 *p_cur, *p_ref;
+    uint32 tmp;
+    int result, curr_offset, ref_offset;
+    int j, i;
+    int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13;
+    uint8  tmp_in[24][24];
+
+    /* not word-aligned */
+    if (((uint32)in)&0x3)
+    {
+        eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
+        in = &tmp_in[2][0];
+        inpitch = 24;
+    }
+    p_cur = out;
+    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+    ref_offset = blkheight * inpitch; /* for limit */
+
+    curr_offset += 3;
+
+    if (dy&1)
+    {
+        dy = (dy >> 1) ? 0 : -inpitch;
+
+        for (j = 0; j < blkwidth; j += 4, in += 4)
+        {
+            r13 = 0;
+            p_ref = in;
+            p_cur -= outpitch;  /* compensate for the first offset */
+            tmp = (uint32)(p_ref + ref_offset); /* limit */
+            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
+            {
+                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+                p_ref += inpitch;
+                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+                r0 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+
+                r0 += r1;
+                r6 += r7;
+
+                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+                r8 = (r2 >> 8) & 0xFF00FF;
+                r2 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+                r1 += r2;
+
+                r7 += r8;
+
+                r0 += 20 * r1;
+                r6 += 20 * r7;
+                r0 += 0x100010;
+                r6 += 0x100010;
+
+                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+                r8 = (r2 >> 8) & 0xFF00FF;
+                r2 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+                r1 += r2;
+
+                r7 += r8;
+
+                r0 -= 5 * r1;
+                r6 -= 5 * r7;
+
+                r0 >>= 5;
+                r6 >>= 5;
+                /* clip */
+                r13 |= r6;
+                r13 |= r0;
+                //CLIPPACK(r6,result)
+
+                r1 = *((uint32*)(p_ref + dy));
+                r2 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+                r0 += r1;
+                r6 += r2;
+                r0 += 0x10001;
+                r6 += 0x10001;
+                r0 = (r0 >> 1) & 0xFF00FF;
+                r6 = (r6 >> 1) & 0xFF00FF;
+
+                r0 |= (r6 << 8);  /* pack it back */
+                *((uint32*)(p_cur += outpitch)) = r0;
+            }
+            p_cur += curr_offset; /* offset to the next pixel */
+            if (r13 & 0xFF000700) /* this column need clipping */
+            {
+                p_cur -= 4;
+                for (i = 0; i < 4; i++)
+                {
+                    p_ref = in + i;
+                    p_cur -= outpitch;  /* compensate for the first offset */
+
+                    tmp = (uint32)(p_ref + ref_offset); /* limit */
+                    while ((uint32)p_ref < tmp)
+                    {                           /* loop un-rolled */
+                        r0 = *(p_ref - (inpitch << 1));
+                        r1 = *(p_ref - inpitch);
+                        r2 = *p_ref;
+                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+                        r4 = *(p_ref += inpitch);
+                        /* first pixel */
+                        r5 = *(p_ref += inpitch);
+                        result = (r0 + r5);
+                        r0 = (r1 + r4);
+                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                        r0 = (r2 + r3);
+                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        /* 3/4 pel,  no need to clip */
+                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
+                        result = (result >> 1);
+                        *(p_cur += outpitch) = result;
+                        /* second pixel */
+                        r0 = *(p_ref += inpitch);
+                        result = (r1 + r0);
+                        r1 = (r2 + r5);
+                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                        r1 = (r3 + r4);
+                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        /* 3/4 pel,  no need to clip */
+                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
+                        result = (result >> 1);
+                        *(p_cur += outpitch) = result;
+                        /* third pixel */
+                        r1 = *(p_ref += inpitch);
+                        result = (r2 + r1);
+                        r2 = (r3 + r0);
+                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                        r2 = (r4 + r5);
+                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        /* 3/4 pel,  no need to clip */
+                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
+                        result = (result >> 1);
+                        *(p_cur += outpitch) = result;
+                        /* fourth pixel */
+                        r2 = *(p_ref += inpitch);
+                        result = (r3 + r2);
+                        r3 = (r4 + r1);
+                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                        r3 = (r5 + r0);
+                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        /* 3/4 pel,  no need to clip */
+                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
+                        result = (result >> 1);
+                        *(p_cur += outpitch) = result;
+                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+                    }
+                    p_cur += (curr_offset - 3);
+                }
+            }
+        }
+    }
+    else
+    {
+        for (j = 0; j < blkwidth; j += 4, in += 4)
+        {
+            r13 = 0;
+            p_ref = in;
+            p_cur -= outpitch;  /* compensate for the first offset */
+            tmp = (uint32)(p_ref + ref_offset); /* limit */
+            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
+            {
+                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+                p_ref += inpitch;
+                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+                r0 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+
+                r0 += r1;
+                r6 += r7;
+
+                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+                r8 = (r2 >> 8) & 0xFF00FF;
+                r2 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+                r1 += r2;
+
+                r7 += r8;
+
+                r0 += 20 * r1;
+                r6 += 20 * r7;
+                r0 += 0x100010;
+                r6 += 0x100010;
+
+                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+                r8 = (r2 >> 8) & 0xFF00FF;
+                r2 &= 0xFF00FF;
+
+                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+                r7 = (r1 >> 8) & 0xFF00FF;
+                r1 &= 0xFF00FF;
+                r1 += r2;
+
+                r7 += r8;
+
+                r0 -= 5 * r1;
+                r6 -= 5 * r7;
+
+                r0 >>= 5;
+                r6 >>= 5;
+                /* clip */
+                r13 |= r6;
+                r13 |= r0;
+                //CLIPPACK(r6,result)
+                r0 &= 0xFF00FF;
+                r6 &= 0xFF00FF;
+                r0 |= (r6 << 8);  /* pack it back */
+                *((uint32*)(p_cur += outpitch)) = r0;
+            }
+            p_cur += curr_offset; /* offset to the next pixel */
+            if (r13 & 0xFF000700) /* this column need clipping */
+            {
+                p_cur -= 4;
+                for (i = 0; i < 4; i++)
+                {
+                    p_ref = in + i;
+                    p_cur -= outpitch;  /* compensate for the first offset */
+                    tmp = (uint32)(p_ref + ref_offset); /* limit */
+                    while ((uint32)p_ref < tmp)
+                    {                           /* loop un-rolled */
+                        r0 = *(p_ref - (inpitch << 1));
+                        r1 = *(p_ref - inpitch);
+                        r2 = *p_ref;
+                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+                        r4 = *(p_ref += inpitch);
+                        /* first pixel */
+                        r5 = *(p_ref += inpitch);
+                        result = (r0 + r5);
+                        r0 = (r1 + r4);
+                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                        r0 = (r2 + r3);
+                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        *(p_cur += outpitch) = result;
+                        /* second pixel */
+                        r0 = *(p_ref += inpitch);
+                        result = (r1 + r0);
+                        r1 = (r2 + r5);
+                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                        r1 = (r3 + r4);
+                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        *(p_cur += outpitch) = result;
+                        /* third pixel */
+                        r1 = *(p_ref += inpitch);
+                        result = (r2 + r1);
+                        r2 = (r3 + r0);
+                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                        r2 = (r4 + r5);
+                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        *(p_cur += outpitch) = result;
+                        /* fourth pixel */
+                        r2 = *(p_ref += inpitch);
+                        result = (r3 + r2);
+                        r3 = (r4 + r1);
+                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                        r3 = (r5 + r0);
+                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                        result = (result + 16) >> 5;
+                        CLIP_RESULT(result)
+                        *(p_cur += outpitch) = result;
+                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+                    }
+                    p_cur += (curr_offset - 3);
+                }
+            }
+        }
+    }
+
+    return ;
+}
+
+void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
+                    int blkwidth, int blkheight)
+{
+    int *p_cur;
+    uint8 *p_ref;
+    uint32 tmp;
+    int result, curr_offset, ref_offset;
+    int j, r0, r1, r2, r3, r4, r5;
+
+    p_cur = out;
+    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+    ref_offset = blkheight * inpitch; /* for limit */
+
+    for (j = 0; j < blkwidth; j++)
+    {
+        p_cur -= outpitch; /* compensate for the first offset */
+        p_ref = in++;
+
+        tmp = (uint32)(p_ref + ref_offset); /* limit */
+        while ((uint32)p_ref < tmp)
+        {                           /* loop un-rolled */
+            r0 = *(p_ref - (inpitch << 1));
+            r1 = *(p_ref - inpitch);
+            r2 = *p_ref;
+            r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+            r4 = *(p_ref += inpitch);
+            /* first pixel */
+            r5 = *(p_ref += inpitch);
+            result = (r0 + r5);
+            r0 = (r1 + r4);
+            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+            r0 = (r2 + r3);
+            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+            *(p_cur += outpitch) = result;
+            /* second pixel */
+            r0 = *(p_ref += inpitch);
+            result = (r1 + r0);
+            r1 = (r2 + r5);
+            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+            r1 = (r3 + r4);
+            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+            *(p_cur += outpitch) = result;
+            /* third pixel */
+            r1 = *(p_ref += inpitch);
+            result = (r2 + r1);
+            r2 = (r3 + r0);
+            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+            r2 = (r4 + r5);
+            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+            *(p_cur += outpitch) = result;
+            /* fourth pixel */
+            r2 = *(p_ref += inpitch);
+            result = (r3 + r2);
+            r3 = (r4 + r1);
+            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+            r3 = (r5 + r0);
+            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+            *(p_cur += outpitch) = result;
+            p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+        }
+        p_cur += curr_offset;
+    }
+
+    return ;
+}
+
+void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
+                    int blkwidth, int blkheight, int dy)
+{
+    uint8 *p_cur;
+    int *p_ref;
+    uint32 tmp;
+    int result, result2, curr_offset, ref_offset;
+    int j, r0, r1, r2, r3, r4, r5;
+
+    p_cur = out;
+    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+    ref_offset = blkheight * inpitch; /* for limit */
+
+    if (dy&1)
+    {
+        dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch;
+
+        for (j = 0; j < blkwidth; j++)
+        {
+            p_cur -= outpitch; /* compensate for the first offset */
+            p_ref = in++;
+
+            tmp = (uint32)(p_ref + ref_offset); /* limit */
+            while ((uint32)p_ref < tmp)
+            {                           /* loop un-rolled */
+                r0 = *(p_ref - (inpitch << 1));
+                r1 = *(p_ref - inpitch);
+                r2 = *p_ref;
+                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+                r4 = *(p_ref += inpitch);
+                /* first pixel */
+                r5 = *(p_ref += inpitch);
+                result = (r0 + r5);
+                r0 = (r1 + r4);
+                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                r0 = (r2 + r3);
+                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dy] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                *(p_cur += outpitch) = result;
+                /* second pixel */
+                r0 = *(p_ref += inpitch);
+                result = (r1 + r0);
+                r1 = (r2 + r5);
+                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                r1 = (r3 + r4);
+                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dy] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                *(p_cur += outpitch) = result;
+                /* third pixel */
+                r1 = *(p_ref += inpitch);
+                result = (r2 + r1);
+                r2 = (r3 + r0);
+                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                r2 = (r4 + r5);
+                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dy] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                *(p_cur += outpitch) = result;
+                /* fourth pixel */
+                r2 = *(p_ref += inpitch);
+                result = (r3 + r2);
+                r3 = (r4 + r1);
+                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                r3 = (r5 + r0);
+                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                result2 = ((p_ref[dy] + 16) >> 5);
+                CLIP_RESULT(result2)
+                /* 3/4 pel,  no need to clip */
+                result = (result + result2 + 1);
+                result = (result >> 1);
+                *(p_cur += outpitch) = result;
+                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+            }
+            p_cur += curr_offset;
+        }
+    }
+    else
+    {
+        for (j = 0; j < blkwidth; j++)
+        {
+            p_cur -= outpitch; /* compensate for the first offset */
+            p_ref = in++;
+
+            tmp = (uint32)(p_ref + ref_offset); /* limit */
+            while ((uint32)p_ref < tmp)
+            {                           /* loop un-rolled */
+                r0 = *(p_ref - (inpitch << 1));
+                r1 = *(p_ref - inpitch);
+                r2 = *p_ref;
+                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+                r4 = *(p_ref += inpitch);
+                /* first pixel */
+                r5 = *(p_ref += inpitch);
+                result = (r0 + r5);
+                r0 = (r1 + r4);
+                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                r0 = (r2 + r3);
+                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                *(p_cur += outpitch) = result;
+                /* second pixel */
+                r0 = *(p_ref += inpitch);
+                result = (r1 + r0);
+                r1 = (r2 + r5);
+                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                r1 = (r3 + r4);
+                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                *(p_cur += outpitch) = result;
+                /* third pixel */
+                r1 = *(p_ref += inpitch);
+                result = (r2 + r1);
+                r2 = (r3 + r0);
+                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                r2 = (r4 + r5);
+                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                *(p_cur += outpitch) = result;
+                /* fourth pixel */
+                r2 = *(p_ref += inpitch);
+                result = (r3 + r2);
+                r3 = (r4 + r1);
+                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                r3 = (r5 + r0);
+                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                result = (result + 512) >> 10;
+                CLIP_RESULT(result)
+                *(p_cur += outpitch) = result;
+                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+            }
+            p_cur += curr_offset;
+        }
+    }
+
+    return ;
+}
+
+void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
+                       uint8 *out, int outpitch,
+                       int blkwidth, int blkheight)
+{
+    int j, i;
+    int result;
+    uint8 *p_cur, *p_ref, *p_tmp8;
+    int curr_offset, ref_offset;
+    uint8 tmp_res[24][24], tmp_in[24][24];
+    uint32 *p_tmp;
+    uint32 tmp, pkres, tmp_result;
+    int32 r0, r1, r2, r3, r4, r5;
+    int32 r6, r7, r8, r9, r10, r13;
+
+    ref_offset = inpitch - blkwidth;
+    p_ref = in1 - 2;
+    /* perform horizontal interpolation */
+    /* not word-aligned */
+    /* It is faster to read 1 byte at time to avoid calling CreateAlign */
+    /*  if(((uint32)p_ref)&0x3)
+        {
+            CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight);
+            p_ref = &tmp_in[0][0];
+            ref_offset = 24-blkwidth;
+        }*/
+
+    p_tmp = (uint32*) & (tmp_res[0][0]);
+    for (j = blkheight; j > 0; j--)
+    {
+        r13 = 0;
+        tmp = (uint32)(p_ref + blkwidth);
+
+        //r0 = *((uint32*)p_ref);   /* d,c,b,a */
+        //r1 = (r0>>8)&0xFF00FF;    /* 0,d,0,b */
+        //r0 &= 0xFF00FF;           /* 0,c,0,a */
+        /* It is faster to read 1 byte at a time */
+        r0 = p_ref[0];
+        r1 = p_ref[2];
+        r0 |= (r1 << 16);           /* 0,c,0,a */
+        r1 = p_ref[1];
+        r2 = p_ref[3];
+        r1 |= (r2 << 16);           /* 0,d,0,b */
+
+        while ((uint32)p_ref < tmp)
+        {
+            //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */
+            //r3 = (r2>>8)&0xFF00FF;  /* 0,h,0,f */
+            //r2 &= 0xFF00FF;           /* 0,g,0,e */
+            /* It is faster to read 1 byte at a time */
+            r2 = *(p_ref += 4);
+            r3 = p_ref[2];
+            r2 |= (r3 << 16);           /* 0,g,0,e */
+            r3 = p_ref[1];
+            r4 = p_ref[3];
+            r3 |= (r4 << 16);           /* 0,h,0,f */
+
+            r4 = r0 + r3;       /* c+h, a+f */
+            r5 = r0 + r1;   /* c+d, a+b */
+            r6 = r2 + r3;   /* g+h, e+f */
+            r5 >>= 16;
+            r5 |= (r6 << 16);   /* e+f, c+d */
+            r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
+            r4 += 0x100010; /* +16, +16 */
+            r5 = r1 + r2;       /* d+g, b+e */
+            r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+            r4 >>= 5;
+            r13 |= r4;      /* check clipping */
+            r4 &= 0xFF00FF; /* mask */
+
+            r5 = p_ref[4];  /* i */
+            r6 = (r5 << 16);
+            r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+            r5 += r1;       /* d+i, b+g */ /* r5 not free */
+            r1 >>= 16;
+            r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+            r1 += r2;       /* f+g, d+e */
+            r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
+            r0 >>= 16;
+            r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+            r0 += r3;       /* e+h, c+f */
+            r5 += 0x100010; /* 16,16 */
+            r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+            r5 >>= 5;
+            r13 |= r5;      /* check clipping */
+            r5 &= 0xFF00FF; /* mask */
+
+            r4 |= (r5 << 8);    /* pack them together */
+            *p_tmp++ = r4;
+            r1 = r3;
+            r0 = r2;
+        }
+        p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
+        p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
+
+        if (r13&0xFF000700) /* need clipping */
+        {
+            /* move back to the beginning of the line */
+            p_ref -= (ref_offset + blkwidth);   /* input */
+            p_tmp -= 6; /* intermediate output */
+            tmp = (uint32)(p_ref + blkwidth);
+            while ((uint32)p_ref < tmp)
+            {
+                r0 = *p_ref++;
+                r1 = *p_ref++;
+                r2 = *p_ref++;
+                r3 = *p_ref++;
+                r4 = *p_ref++;
+                /* first pixel */
+                r5 = *p_ref++;
+                result = (r0 + r5);
+                r0 = (r1 + r4);
+                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                r0 = (r2 + r3);
+                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                result = (result + 16) >> 5;
+                CLIP_RESULT(result)
+                pkres = result;
+                /* second pixel */
+                r0 = *p_ref++;
+                result = (r1 + r0);
+                r1 = (r2 + r5);
+                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                r1 = (r3 + r4);
+                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                result = (result + 16) >> 5;
+                CLIP_RESULT(result)
+                pkres |= (result << 8);
+                /* third pixel */
+                r1 = *p_ref++;
+                result = (r2 + r1);
+                r2 = (r3 + r0);
+                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                r2 = (r4 + r5);
+                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                result = (result + 16) >> 5;
+                CLIP_RESULT(result)
+                pkres |= (result << 16);
+                /* fourth pixel */
+                r2 = *p_ref++;
+                result = (r3 + r2);
+                r3 = (r4 + r1);
+                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                r3 = (r5 + r0);
+                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                result = (result + 16) >> 5;
+                CLIP_RESULT(result)
+                pkres |= (result << 24);
+
+                *p_tmp++ = pkres; /* write 4 pixel */
+                p_ref -= 5;
+            }
+            p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
+            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
+        }
+    }
+
+    /*  perform vertical interpolation */
+    /* not word-aligned */
+    if (((uint32)in2)&0x3)
+    {
+        eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
+        in2 = &tmp_in[2][0];
+        inpitch = 24;
+    }
+
+    p_cur = out;
+    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */
+    pkres = blkheight * inpitch; /* reuse it for limit */
+
+    curr_offset += 3;
+
+    for (j = 0; j < blkwidth; j += 4, in2 += 4)
+    {
+        r13 = 0;
+        p_ref = in2;
+        p_tmp8 = &(tmp_res[0][j]); /* intermediate result */
+        p_tmp8 -= 24;  /* compensate for the first offset */
+        p_cur -= outpitch;  /* compensate for the first offset */
+        tmp = (uint32)(p_ref + pkres); /* limit */
+        while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
+        {
+            /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */
+            /*p_ref8 = p_ref-(inpitch<<1);          r0 = p_ref8[0];         r1 = p_ref8[2];
+            r0 |= (r1<<16);         r6 = p_ref8[1];         r1 = p_ref8[3];
+            r6 |= (r1<<16);         p_ref+=inpitch; */
+            r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+            p_ref += inpitch;
+            r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+            r0 &= 0xFF00FF;
+
+            /*p_ref8 = p_ref+(inpitch<<1);
+            r1 = p_ref8[0];         r7 = p_ref8[2];         r1 |= (r7<<16);
+            r7 = p_ref8[1];         r2 = p_ref8[3];         r7 |= (r2<<16);*/
+            r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
+            r7 = (r1 >> 8) & 0xFF00FF;
+            r1 &= 0xFF00FF;
+
+            r0 += r1;
+            r6 += r7;
+
+            /*r2 = p_ref[0];            r8 = p_ref[2];          r2 |= (r8<<16);
+            r8 = p_ref[1];          r1 = p_ref[3];          r8 |= (r1<<16);*/
+            r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+            r8 = (r2 >> 8) & 0xFF00FF;
+            r2 &= 0xFF00FF;
+
+            /*p_ref8 = p_ref-inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
+            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
+            r2 = p_ref8[3];         r7 |= (r2<<16);*/
+            r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+            r7 = (r1 >> 8) & 0xFF00FF;
+            r1 &= 0xFF00FF;
+            r1 += r2;
+
+            r7 += r8;
+
+            r0 += 20 * r1;
+            r6 += 20 * r7;
+            r0 += 0x100010;
+            r6 += 0x100010;
+
+            /*p_ref8 = p_ref-(inpitch<<1);          r2 = p_ref8[0];         r8 = p_ref8[2];
+            r2 |= (r8<<16);         r8 = p_ref8[1];         r1 = p_ref8[3];         r8 |= (r1<<16);*/
+            r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+            r8 = (r2 >> 8) & 0xFF00FF;
+            r2 &= 0xFF00FF;
+
+            /*p_ref8 = p_ref+inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
+            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
+            r2 = p_ref8[3];         r7 |= (r2<<16);*/
+            r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+            r7 = (r1 >> 8) & 0xFF00FF;
+            r1 &= 0xFF00FF;
+            r1 += r2;
+
+            r7 += r8;
+
+            r0 -= 5 * r1;
+            r6 -= 5 * r7;
+
+            r0 >>= 5;
+            r6 >>= 5;
+            /* clip */
+            r13 |= r6;
+            r13 |= r0;
+            //CLIPPACK(r6,result)
+            /* add with horizontal results */
+            r10 = *((uint32*)(p_tmp8 += 24));
+            r9 = (r10 >> 8) & 0xFF00FF;
+            r10 &= 0xFF00FF;
+
+            r0 += r10;
+            r0 += 0x10001;
+            r0 = (r0 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
+
+            r6 += r9;
+            r6 += 0x10001;
+            r6 = (r6 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
+
+            r0 |= (r6 << 8);  /* pack it back */
+            *((uint32*)(p_cur += outpitch)) = r0;
+        }
+        p_cur += curr_offset; /* offset to the next pixel */
+        if (r13 & 0xFF000700) /* this column need clipping */
+        {
+            p_cur -= 4;
+            for (i = 0; i < 4; i++)
+            {
+                p_ref = in2 + i;
+                p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */
+                p_tmp8 -= 24;  /* compensate for the first offset */
+                p_cur -= outpitch;  /* compensate for the first offset */
+                tmp = (uint32)(p_ref + pkres); /* limit */
+                while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
+                {
+                    r0 = *(p_ref - (inpitch << 1));
+                    r1 = *(p_ref - inpitch);
+                    r2 = *p_ref;
+                    r3 = *(p_ref += inpitch);  /* modify pointer before loading */
+                    r4 = *(p_ref += inpitch);
+                    /* first pixel */
+                    r5 = *(p_ref += inpitch);
+                    result = (r0 + r5);
+                    r0 = (r1 + r4);
+                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
+                    r0 = (r2 + r3);
+                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    tmp_result = *(p_tmp8 += 24);  /* modify pointer before loading */
+                    result = (result + tmp_result + 1);  /* no clip */
+                    result = (result >> 1);
+                    *(p_cur += outpitch) = result;
+                    /* second pixel */
+                    r0 = *(p_ref += inpitch);
+                    result = (r1 + r0);
+                    r1 = (r2 + r5);
+                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
+                    r1 = (r3 + r4);
+                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
+                    result = (result + tmp_result + 1);  /* no clip */
+                    result = (result >> 1);
+                    *(p_cur += outpitch) = result;
+                    /* third pixel */
+                    r1 = *(p_ref += inpitch);
+                    result = (r2 + r1);
+                    r2 = (r3 + r0);
+                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
+                    r2 = (r4 + r5);
+                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
+                    result = (result + tmp_result + 1);  /* no clip */
+                    result = (result >> 1);
+                    *(p_cur += outpitch) = result;
+                    /* fourth pixel */
+                    r2 = *(p_ref += inpitch);
+                    result = (r3 + r2);
+                    r3 = (r4 + r1);
+                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
+                    r3 = (r5 + r0);
+                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
+                    result = (result + 16) >> 5;
+                    CLIP_RESULT(result)
+                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
+                    result = (result + tmp_result + 1);  /* no clip */
+                    result = (result >> 1);
+                    *(p_cur += outpitch) = result;
+                    p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
+                }
+                p_cur += (curr_offset - 3);
+            }
+        }
+    }
+
+    return ;
+}
+
+/* position G */
+void eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+                int blkwidth, int blkheight)
+{
+    int i, j;
+    int offset_in = inpitch - blkwidth;
+    int offset_out = outpitch - blkwidth;
+    uint32 temp;
+    uint8 byte;
+
+    if (((uint32)in)&3)
+    {
+        for (j = blkheight; j > 0; j--)
+        {
+            for (i = blkwidth; i > 0; i -= 4)
+            {
+                temp = *in++;
+                byte = *in++;
+                temp |= (byte << 8);
+                byte = *in++;
+                temp |= (byte << 16);
+                byte = *in++;
+                temp |= (byte << 24);
+
+                *((uint32*)out) = temp; /* write 4 bytes */
+                out += 4;
+            }
+            out += offset_out;
+            in += offset_in;
+        }
+    }
+    else
+    {
+        for (j = blkheight; j > 0; j--)
+        {
+            for (i = blkwidth; i > 0; i -= 4)
+            {
+                temp = *((uint32*)in);
+                *((uint32*)out) = temp;
+                in += 4;
+                out += 4;
+            }
+            out += offset_out;
+            in += offset_in;
+        }
+    }
+    return ;
+}
+
+void ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos)
+{
+    int pad_height;
+    int pad_width;
+    uint8 *start;
+    uint32 word1, word2, word3;
+    int offset, j;
+
+
+    pad_height = 8 + ((y_pos & 7) ? 1 : 0);
+    pad_width = 8 + ((x_pos & 7) ? 1 : 0);
+
+    y_pos >>= 3;
+    x_pos >>= 3;
+    // pad vertical first
+    if (y_pos < 0) // need to pad up
+    {
+        if (x_pos < -8) start = ref - 8;
+        else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width;
+        else start = ref + x_pos;
+
+        /* word-align start */
+        offset = (uint32)start & 0x3;
+        if (offset) start -= offset;
+
+        word1 = *((uint32*)start);
+        word2 = *((uint32*)(start + 4));
+        word3 = *((uint32*)(start + 8));
+
+        /* pad up N rows */
+        j = -y_pos;
+        if (j > 8) j = 8;
+        while (j--)
+        {
+            *((uint32*)(start -= picpitch)) = word1;
+            *((uint32*)(start + 4)) = word2;
+            *((uint32*)(start + 8)) = word3;
+        }
+
+    }
+    else if (y_pos + pad_height >= picheight) /* pad down */
+    {
+        if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8;
+        else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) +
+                    picwidth + 7 - pad_width;
+        else    start = ref + picpitch * (picheight - 1) + x_pos;
+
+        /* word-align start */
+        offset = (uint32)start & 0x3;
+        if (offset) start -= offset;
+
+        word1 = *((uint32*)start);
+        word2 = *((uint32*)(start + 4));
+        word3 = *((uint32*)(start + 8));
+
+        /* pad down N rows */
+        j = y_pos + pad_height - picheight;
+        if (j > 8) j = 8;
+        while (j--)
+        {
+            *((uint32*)(start += picpitch)) = word1;
+            *((uint32*)(start + 4)) = word2;
+            *((uint32*)(start + 8)) = word3;
+        }
+    }
+
+    /* now pad horizontal */
+    if (x_pos < 0) // pad left
+    {
+        if (y_pos < -8) start = ref - (picpitch << 3);
+        else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch;
+        else start = ref + y_pos * picpitch;
+
+        // now pad left 8 pixels for pad_height rows */
+        j = pad_height;
+        start -= picpitch;
+        while (j--)
+        {
+            word1 = *(start += picpitch);
+            word1 |= (word1 << 8);
+            word1 |= (word1 << 16);
+            *((uint32*)(start - 8)) = word1;
+            *((uint32*)(start - 4)) = word1;
+        }
+    }
+    else if (x_pos + pad_width >= picwidth) /* pad right */
+    {
+        if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1;
+        else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1;
+        else start = ref + y_pos * picpitch + picwidth - 1;
+
+        // now pad right 8 pixels for pad_height rows */
+        j = pad_height;
+        start -= picpitch;
+        while (j--)
+        {
+            word1 = *(start += picpitch);
+            word1 |= (word1 << 8);
+            word1 |= (word1 << 16);
+            *((uint32*)(start + 1)) = word1;
+            *((uint32*)(start + 5)) = word1;
+        }
+    }
+
+    return ;
+}
+
+
+void eChromaMotionComp(uint8 *ref, int picwidth, int picheight,
+                       int x_pos, int y_pos,
+                       uint8 *pred, int picpitch,
+                       int blkwidth, int blkheight)
+{
+    int dx, dy;
+    int offset_dx, offset_dy;
+    int index;
+
+    ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos);
+
+    dx = x_pos & 7;
+    dy = y_pos & 7;
+    offset_dx = (dx + 7) >> 3;
+    offset_dy = (dy + 7) >> 3;
+    x_pos = x_pos >> 3;  /* round it to full-pel resolution */
+    y_pos = y_pos >> 3;
+
+    ref += y_pos * picpitch + x_pos;
+
+    index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7);
+
+    (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight);
+    return ;
+}
+
+
+/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */
+void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                            uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    int32 r0, r1, r2, r3, result0, result1;
+    uint8 temp[288];
+    uint8 *ref, *out;
+    int i, j;
+    int dx_8 = 8 - dx;
+    int dy_8 = 8 - dy;
+
+    /* horizontal first */
+    out = temp;
+    for (i = 0; i < blkheight + 1; i++)
+    {
+        ref = pRef;
+        r0 = ref[0];
+        for (j = 0; j < blkwidth; j += 4)
+        {
+            r0 |= (ref[2] << 16);
+            result0 = dx_8 * r0;
+
+            r1 = ref[1] | (ref[3] << 16);
+            result0 += dx * r1;
+            *(int32 *)out = result0;
+
+            result0 = dx_8 * r1;
+
+            r2 = ref[4];
+            r0 = r0 >> 16;
+            r1 = r0 | (r2 << 16);
+            result0 += dx * r1;
+            *(int32 *)(out + 16) = result0;
+
+            ref += 4;
+            out += 4;
+            r0 = r2;
+        }
+        pRef += srcPitch;
+        out += (32 - blkwidth);
+    }
+
+//  pRef -= srcPitch*(blkheight+1);
+    ref = temp;
+
+    for (j = 0; j < blkwidth; j += 4)
+    {
+        r0 = *(int32 *)ref;
+        r1 = *(int32 *)(ref + 16);
+        ref += 32;
+        out = pOut;
+        for (i = 0; i < (blkheight >> 1); i++)
+        {
+            result0 = dy_8 * r0 + 0x00200020;
+            r2 = *(int32 *)ref;
+            result0 += dy * r2;
+            result0 >>= 6;
+            result0 &= 0x00FF00FF;
+            r0 = r2;
+
+            result1 = dy_8 * r1 + 0x00200020;
+            r3 = *(int32 *)(ref + 16);
+            result1 += dy * r3;
+            result1 >>= 6;
+            result1 &= 0x00FF00FF;
+            r1 = r3;
+            *(int32 *)out = result0 | (result1 << 8);
+            out += predPitch;
+            ref += 32;
+
+            result0 = dy_8 * r0 + 0x00200020;
+            r2 = *(int32 *)ref;
+            result0 += dy * r2;
+            result0 >>= 6;
+            result0 &= 0x00FF00FF;
+            r0 = r2;
+
+            result1 = dy_8 * r1 + 0x00200020;
+            r3 = *(int32 *)(ref + 16);
+            result1 += dy * r3;
+            result1 >>= 6;
+            result1 &= 0x00FF00FF;
+            r1 = r3;
+            *(int32 *)out = result0 | (result1 << 8);
+            out += predPitch;
+            ref += 32;
+        }
+        pOut += 4;
+        ref = temp + 4; /* since it can only iterate twice max */
+    }
+    return;
+}
+
+void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                              uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    (void)(dy);
+
+    int32 r0, r1, r2, result0, result1;
+    uint8 *ref, *out;
+    int i, j;
+    int dx_8 = 8 - dx;
+
+    /* horizontal first */
+    for (i = 0; i < blkheight; i++)
+    {
+        ref = pRef;
+        out = pOut;
+
+        r0 = ref[0];
+        for (j = 0; j < blkwidth; j += 4)
+        {
+            r0 |= (ref[2] << 16);
+            result0 = dx_8 * r0 + 0x00040004;
+
+            r1 = ref[1] | (ref[3] << 16);
+            result0 += dx * r1;
+            result0 >>= 3;
+            result0 &= 0x00FF00FF;
+
+            result1 = dx_8 * r1 + 0x00040004;
+
+            r2 = ref[4];
+            r0 = r0 >> 16;
+            r1 = r0 | (r2 << 16);
+            result1 += dx * r1;
+            result1 >>= 3;
+            result1 &= 0x00FF00FF;
+
+            *(int32 *)out = result0 | (result1 << 8);
+
+            ref += 4;
+            out += 4;
+            r0 = r2;
+        }
+
+        pRef += srcPitch;
+        pOut += predPitch;
+    }
+    return;
+}
+
+void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                            uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    (void)(dx);
+
+    int32 r0, r1, r2, r3, result0, result1;
+    int i, j;
+    uint8 *ref, *out;
+    int dy_8 = 8 - dy;
+    /* vertical first */
+    for (i = 0; i < blkwidth; i += 4)
+    {
+        ref = pRef;
+        out = pOut;
+
+        r0 = ref[0] | (ref[2] << 16);
+        r1 = ref[1] | (ref[3] << 16);
+        ref += srcPitch;
+        for (j = 0; j < blkheight; j++)
+        {
+            result0 = dy_8 * r0 + 0x00040004;
+            r2 = ref[0] | (ref[2] << 16);
+            result0 += dy * r2;
+            result0 >>= 3;
+            result0 &= 0x00FF00FF;
+            r0 = r2;
+
+            result1 = dy_8 * r1 + 0x00040004;
+            r3 = ref[1] | (ref[3] << 16);
+            result1 += dy * r3;
+            result1 >>= 3;
+            result1 &= 0x00FF00FF;
+            r1 = r3;
+            *(int32 *)out = result0 | (result1 << 8);
+            ref += srcPitch;
+            out += predPitch;
+        }
+        pOut += 4;
+        pRef += 4;
+    }
+    return;
+}
+
+void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                             uint8 *pOut,  int predPitch, int blkwidth, int blkheight)
+{
+    (void)(blkwidth);
+
+    int32 r0, r1, temp0, temp1, result;
+    int32 temp[9];
+    int32 *out;
+    int i, r_temp;
+    int dy_8 = 8 - dy;
+
+    /* horizontal first */
+    out = temp;
+    for (i = 0; i < blkheight + 1; i++)
+    {
+        r_temp = pRef[1];
+        temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]);
+        temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp);
+        r0 = temp0 | (temp1 << 16);
+        *out++ = r0;
+        pRef += srcPitch;
+    }
+
+    pRef -= srcPitch * (blkheight + 1);
+
+    out = temp;
+
+    r0 = *out++;
+
+    for (i = 0; i < blkheight; i++)
+    {
+        result = dy_8 * r0 + 0x00200020;
+        r1 = *out++;
+        result += dy * r1;
+        result >>= 6;
+        result &= 0x00FF00FF;
+        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
+        r0 = r1;
+        pOut += predPitch;
+    }
+    return;
+}
+
+void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                               uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    (void)(dy);
+    (void)(blkwidth);
+
+    int i, temp, temp0, temp1;
+
+    /* horizontal first */
+    for (i = 0; i < blkheight; i++)
+    {
+        temp = pRef[1];
+        temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3;
+        temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3;
+
+        *(int16 *)pOut = temp0 | (temp1 << 8);
+        pRef += srcPitch;
+        pOut += predPitch;
+
+    }
+    return;
+}
+void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                             uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    (void)(dx);
+    (void)(blkwidth);
+
+    int32 r0, r1, result;
+    int i;
+    int dy_8 = 8 - dy;
+    r0 = pRef[0] | (pRef[1] << 16);
+    pRef += srcPitch;
+    for (i = 0; i < blkheight; i++)
+    {
+        result = dy_8 * r0 + 0x00040004;
+        r1 = pRef[0] | (pRef[1] << 16);
+        result += dy * r1;
+        result >>= 3;
+        result &= 0x00FF00FF;
+        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
+        r0 = r1;
+        pRef += srcPitch;
+        pOut += predPitch;
+    }
+    return;
+}
+
+void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+                        uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+    (void)(dx);
+    (void)(dy);
+
+    int i, j;
+    int offset_in = srcPitch - blkwidth;
+    int offset_out = predPitch - blkwidth;
+    uint16 temp;
+    uint8 byte;
+
+    if (((uint32)pRef)&1)
+    {
+        for (j = blkheight; j > 0; j--)
+        {
+            for (i = blkwidth; i > 0; i -= 2)
+            {
+                temp = *pRef++;
+                byte = *pRef++;
+                temp |= (byte << 8);
+                *((uint16*)pOut) = temp; /* write 2 bytes */
+                pOut += 2;
+            }
+            pOut += offset_out;
+            pRef += offset_in;
+        }
+    }
+    else
+    {
+        for (j = blkheight; j > 0; j--)
+        {
+            for (i = blkwidth; i > 0; i -= 2)
+            {
+                temp = *((uint16*)pRef);
+                *((uint16*)pOut) = temp;
+                pRef += 2;
+                pOut += 2;
+            }
+            pOut += offset_out;
+            pRef += offset_in;
+        }
+    }
+    return ;
+}
diff --git a/media/libstagefright/codecs/avc/enc/src/motion_est.cpp b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp
new file mode 100644
index 0000000..f650ef9
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp
@@ -0,0 +1,1774 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define MIN_GOP     1   /* minimum size of GOP, 1/23/01, need to be tested */
+
+#define DEFAULT_REF_IDX     0  /* always from the first frame in the reflist */
+
+#define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
+
+
+/* from TMN 3.2 */
+#define PREF_NULL_VEC 129   /* zero vector bias */
+#define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
+#define PREF_INTRA  3024//512       /* bias for INTRA coding */
+
+const static int tab_exclude[9][9] =  // [last_loc][curr_loc]
+{
+    {0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 1, 1, 1, 0, 0},
+    {0, 0, 0, 0, 1, 1, 1, 1, 1},
+    {0, 0, 0, 0, 0, 0, 1, 1, 1},
+    {0, 1, 1, 0, 0, 0, 1, 1, 1},
+    {0, 1, 1, 0, 0, 0, 0, 0, 1},
+    {0, 1, 1, 1, 1, 0, 0, 0, 1},
+    {0, 0, 1, 1, 1, 0, 0, 0, 0},
+    {0, 0, 1, 1, 1, 1, 1, 0, 0}
+}; //to decide whether to continue or compute
+
+const static int refine_next[8][2] =    /* [curr_k][increment] */
+{
+    {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
+};
+
+#ifdef _SAD_STAT
+uint32 num_MB = 0;
+uint32 num_cand = 0;
+#endif
+
+/************************************************************************/
+#define TH_INTER_2  100  /* temporary for now */
+
+//#define FIXED_INTERPRED_MODE  AVC_P16
+#define FIXED_REF_IDX   0
+#define FIXED_MVX 0
+#define FIXED_MVY 0
+
+// only use when AVC_P8 or AVC_P8ref0
+#define FIXED_SUBMB_MODE    AVC_4x4
+/*************************************************************************/
+
+/* Initialize arrays necessary for motion search */
+AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    int search_range = rateCtrl->mvRange;
+    int number_of_subpel_positions = 4 * (2 * search_range + 3);
+    int max_mv_bits, max_mvd;
+    int temp_bits = 0;
+    uint8 *mvbits;
+    int bits, imax, imin, i;
+    uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
+
+
+    while (number_of_subpel_positions > 0)
+    {
+        temp_bits++;
+        number_of_subpel_positions >>= 1;
+    }
+
+    max_mv_bits = 3 + 2 * temp_bits;
+    max_mvd  = (1 << (max_mv_bits >> 1)) - 1;
+
+    encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+                           sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
+
+    if (encvid->mvbits_array == NULL)
+    {
+        return AVCENC_MEMORY_FAIL;
+    }
+
+    mvbits = encvid->mvbits  = encvid->mvbits_array + max_mvd;
+
+    mvbits[0] = 1;
+    for (bits = 3; bits <= max_mv_bits; bits += 2)
+    {
+        imax = 1    << (bits >> 1);
+        imin = imax >> 1;
+
+        for (i = imin; i < imax; i++)   mvbits[-i] = mvbits[i] = bits;
+    }
+
+    /* initialize half-pel search */
+    encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
+    encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
+    encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+    encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+    encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+    encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+
+    /* For quarter-pel interpolation around best half-pel result */
+
+    encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
+
+
+    encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
+    encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+
+    encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
+    encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+
+    encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+    encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
+    encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+    encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
+    encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+    encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+    encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+    encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
+    encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+    encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
+    encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
+    encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+
+    encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
+    encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+
+    encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
+    encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
+    encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+
+
+    return AVCENC_SUCCESS;
+}
+
+/* Clean-up memory */
+void CleanMotionSearchModule(AVCHandle *avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+
+    if (encvid->mvbits_array)
+    {
+        avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array));
+        encvid->mvbits = NULL;
+    }
+
+    return ;
+}
+
+
+bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
+{
+    int j;
+    uint8 *out;
+    int temp, SBE;
+    OsclFloat ABE;
+    bool intra = true;
+
+    SBE = 0;
+    /* top neighbor */
+    out = cur - pitch;
+    for (j = 0; j < 16; j++)
+    {
+        temp = out[j] - cur[j];
+        SBE += ((temp >= 0) ? temp : -temp);
+    }
+
+    /* left neighbor */
+    out = cur - 1;
+    out -= pitch;
+    cur -= pitch;
+    for (j = 0; j < 16; j++)
+    {
+        temp = *(out += pitch) - *(cur += pitch);
+        SBE += ((temp >= 0) ? temp : -temp);
+    }
+
+    /* compare mincost/384 and SBE/64 */
+    ABE = SBE / 32.0; //ABE = SBE/64.0; //
+    if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
+    {
+        intra = false; // no possibility of intra, just use inter
+    }
+    else
+    {
+        if (ave == true)
+        {
+            *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
+        }
+        else
+        {
+            *min_cost = (int)(SBE * 8);
+        }
+    }
+
+    return intra;
+}
+
+/******* main function for macroblock prediction for the entire frame ***/
+/* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
+void AVCMotionEstimation(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    int slice_type = video->slice_type;
+    AVCFrameIO *currInput = encvid->currInput;
+    AVCPictureData *refPic = video->RefPicList0[0];
+    int i, j, k;
+    int mbwidth = video->PicWidthInMbs;
+    int mbheight = video->PicHeightInMbs;
+    int totalMB = video->PicSizeInMbs;
+    int pitch = currInput->pitch;
+    AVCMacroblock *currMB, *mblock = video->mblock;
+    AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
+    // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    uint8 *intraSearch = encvid->intraSearch;
+    uint FS_en = encvid->fullsearch_enable;
+
+    int NumIntraSearch, start_i, numLoop, incr_i;
+    int mbnum, offset;
+    uint8 *cur, *best_cand[5];
+    int totalSAD = 0;   /* average SAD for rate control */
+    int type_pred;
+    int abe_cost;
+
+#ifdef HTFM
+    /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
+    int collect = 0;
+    HTFM_Stat htfm_stat;
+    double newvar[16];
+    double exp_lamda[15];
+    /*********************************/
+#endif
+    int hp_guess = 0;
+    uint32 mv_uint32;
+
+    offset = 0;
+
+    if (slice_type == AVC_I_SLICE)
+    {
+        /* cannot do I16 prediction here because it needs full decoding. */
+        for (i = 0; i < totalMB; i++)
+        {
+            encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
+        }
+
+        memset(intraSearch, 1, sizeof(uint8)*totalMB);
+
+        encvid->firstIntraRefreshMBIndx = 0; /* reset this */
+
+        return ;
+    }
+    else   // P_SLICE
+    {
+        for (i = 0; i < totalMB; i++)
+        {
+            mblock[i].mb_intra = 0;
+        }
+        memset(intraSearch, 1, sizeof(uint8)*totalMB);
+    }
+
+    if (refPic->padded == 0)
+    {
+        AVCPaddingEdge(refPic);
+        refPic->padded = 1;
+    }
+    /* Random INTRA update */
+    if (rateCtrl->intraMBRate)
+    {
+        AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
+    }
+
+    encvid->sad_extra_info = NULL;
+#ifdef HTFM
+    /***** HYPOTHESIS TESTING ********/
+    InitHTFM(video, &htfm_stat, newvar, &collect);
+    /*********************************/
+#endif
+
+    if ((rateCtrl->scdEnable == 1)
+            && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
+        /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
+    {
+        incr_i = 2;
+        numLoop = 2;
+        start_i = 1;
+        type_pred = 0; /* for initial candidate selection */
+    }
+    else
+    {
+        incr_i = 1;
+        numLoop = 1;
+        start_i = 0;
+        type_pred = 2;
+    }
+
+    /* First pass, loop thru half the macroblock */
+    /* determine scene change */
+    /* Second pass, for the rest of macroblocks */
+    NumIntraSearch = 0; // to be intra searched in the encoding loop.
+    while (numLoop--)
+    {
+        for (j = 0; j < mbheight; j++)
+        {
+            if (incr_i > 1)
+                start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
+
+            offset = pitch * (j << 4) + (start_i << 4);
+
+            mbnum = j * mbwidth + start_i;
+
+            for (i = start_i; i < mbwidth; i += incr_i)
+            {
+                video->mbNum = mbnum;
+                video->currMB = currMB = mblock + mbnum;
+                mot_mb_16x16 = mot16x16 + mbnum;
+
+                cur = currInput->YCbCr[0] + offset;
+
+                if (currMB->mb_intra == 0) /* for INTER mode */
+                {
+#if defined(HTFM)
+                    HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
+#else
+                    AVCPrepareCurMB(encvid, cur, pitch);
+#endif
+                    /************************************************************/
+                    /******** full-pel 1MV search **********************/
+
+                    AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
+                                      FS_en, &hp_guess);
+
+                    abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
+
+                    /* set mbMode and MVs */
+                    currMB->mbMode = AVC_P16;
+                    currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
+                    mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
+                    for (k = 0; k < 32; k += 2)
+                    {
+                        currMB->mvL0[k>>1] = mv_uint32;
+                    }
+
+                    /* make a decision whether it should be tested for intra or not */
+                    if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
+                    {
+                        if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
+                        {
+                            intraSearch[mbnum] = 0;
+                        }
+                        else
+                        {
+                            NumIntraSearch++;
+                            rateCtrl->MADofMB[mbnum] = abe_cost;
+                        }
+                    }
+                    else // boundary MBs, always do intra search
+                    {
+                        NumIntraSearch++;
+                    }
+
+                    totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
+                }
+                else    /* INTRA update, use for prediction */
+                {
+                    mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
+
+                    /* reset all other MVs to zero */
+                    /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
+                    abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF;  /* max value for int */
+
+                    if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
+                    {
+                        IntraDecisionABE(&abe_cost, cur, pitch, false);
+
+                        rateCtrl->MADofMB[mbnum] = abe_cost;
+                        totalSAD += abe_cost;
+                    }
+
+                    NumIntraSearch++ ;
+                    /* cannot do I16 prediction here because it needs full decoding. */
+                    // intraSearch[mbnum] = 1;
+
+                }
+
+                mbnum += incr_i;
+                offset += (incr_i << 4);
+
+            } /* for i */
+        } /* for j */
+
+        /* since we cannot do intra/inter decision here, the SCD has to be
+        based on other criteria such as motion vectors coherency or the SAD */
+        if (incr_i > 1 && numLoop) /* scene change on and first loop */
+        {
+            //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
+            if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
+                /* need to do more investigation about this threshold since the NumIntraSearch
+                only show potential intra MBs, not the actual one */
+            {
+                /* we can choose to just encode I_SLICE without IDR */
+                //video->nal_unit_type = AVC_NALTYPE_IDR;
+                video->nal_unit_type = AVC_NALTYPE_SLICE;
+                video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
+                video->slice_type = AVC_I_SLICE;
+                memset(intraSearch, 1, sizeof(uint8)*totalMB);
+                i = totalMB;
+                while (i--)
+                {
+                    mblock[i].mb_intra = 1;
+                    encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
+                }
+
+                rateCtrl->totalSAD = totalSAD * 2;  /* SAD */
+
+                return ;
+            }
+        }
+        /******** no scene change, continue motion search **********************/
+        start_i = 0;
+        type_pred++; /* second pass */
+    }
+
+    rateCtrl->totalSAD = totalSAD;  /* SAD */
+
+#ifdef HTFM
+    /***** HYPOTHESIS TESTING ********/
+    if (collect)
+    {
+        collect = 0;
+        UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
+    }
+    /*********************************/
+#endif
+
+    return ;
+}
+
+/*=====================================================================
+    Function:   PaddingEdge
+    Date:       09/16/2000
+    Purpose:    Pad edge of a Vop
+=====================================================================*/
+
+void  AVCPaddingEdge(AVCPictureData *refPic)
+{
+    uint8 *src, *dst;
+    int i;
+    int pitch, width, height;
+    uint32 temp1, temp2;
+
+    width = refPic->width;
+    height = refPic->height;
+    pitch = refPic->pitch;
+
+    /* pad top */
+    src = refPic->Sl;
+
+    temp1 = *src; /* top-left corner */
+    temp2 = src[width-1]; /* top-right corner */
+    temp1 |= (temp1 << 8);
+    temp1 |= (temp1 << 16);
+    temp2 |= (temp2 << 8);
+    temp2 |= (temp2 << 16);
+
+    dst = src - (pitch << 4);
+
+    *((uint32*)(dst - 16)) = temp1;
+    *((uint32*)(dst - 12)) = temp1;
+    *((uint32*)(dst - 8)) = temp1;
+    *((uint32*)(dst - 4)) = temp1;
+
+    memcpy(dst, src, width);
+
+    *((uint32*)(dst += width)) = temp2;
+    *((uint32*)(dst + 4)) = temp2;
+    *((uint32*)(dst + 8)) = temp2;
+    *((uint32*)(dst + 12)) = temp2;
+
+    dst = dst - width - 16;
+
+    i = 15;
+    while (i--)
+    {
+        memcpy(dst + pitch, dst, pitch);
+        dst += pitch;
+    }
+
+    /* pad sides */
+    dst += (pitch + 16);
+    src = dst;
+    i = height;
+    while (i--)
+    {
+        temp1 = *src;
+        temp2 = src[width-1];
+        temp1 |= (temp1 << 8);
+        temp1 |= (temp1 << 16);
+        temp2 |= (temp2 << 8);
+        temp2 |= (temp2 << 16);
+
+        *((uint32*)(dst - 16)) = temp1;
+        *((uint32*)(dst - 12)) = temp1;
+        *((uint32*)(dst - 8)) = temp1;
+        *((uint32*)(dst - 4)) = temp1;
+
+        *((uint32*)(dst += width)) = temp2;
+        *((uint32*)(dst + 4)) = temp2;
+        *((uint32*)(dst + 8)) = temp2;
+        *((uint32*)(dst + 12)) = temp2;
+
+        src += pitch;
+        dst = src;
+    }
+
+    /* pad bottom */
+    dst -= 16;
+    i = 16;
+    while (i--)
+    {
+        memcpy(dst, dst - pitch, pitch);
+        dst += pitch;
+    }
+
+
+    return ;
+}
+
+/*===========================================================================
+    Function:   AVCRasterIntraUpdate
+    Date:       2/26/01
+    Purpose:    To raster-scan assign INTRA-update .
+                N macroblocks are updated (also was programmable).
+===========================================================================*/
+void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
+{
+    int indx, i;
+
+    indx = encvid->firstIntraRefreshMBIndx;
+    for (i = 0; i < numRefresh && indx < totalMB; i++)
+    {
+        (mblock + indx)->mb_intra = 1;
+        encvid->intraSearch[indx++] = 1;
+    }
+
+    /* if read the end of frame, reset and loop around */
+    if (indx >= totalMB - 1)
+    {
+        indx = 0;
+        while (i < numRefresh && indx < totalMB)
+        {
+            (mblock + indx)->mb_intra = 1;
+            encvid->intraSearch[indx++] = 1;
+            i++;
+        }
+    }
+
+    encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
+
+    return ;
+}
+
+
+#ifdef HTFM
+void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
+{
+    AVCCommonObj *video = encvid->common;
+    int i;
+    int lx = video->currPic->width; // padding
+    int lx2 = lx << 1;
+    int lx3 = lx2 + lx;
+    int rx = video->currPic->pitch;
+    int rx2 = rx << 1;
+    int rx3 = rx2 + rx;
+
+    int *offset, *offset2;
+
+    /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
+    if (((int)video->sliceHdr->frame_num) % 30 == 1)
+    {
+
+        *collect = 1;
+
+        htfm_stat->countbreak = 0;
+        htfm_stat->abs_dif_mad_avg = 0;
+
+        for (i = 0; i < 16; i++)
+        {
+            newvar[i] = 0.0;
+        }
+//      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
+        encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
+        encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+        encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
+        encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
+        encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
+        encvid->sad_extra_info = (void*)(htfm_stat);
+        offset = htfm_stat->offsetArray;
+        offset2 = htfm_stat->offsetRef;
+    }
+    else
+    {
+//      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
+        encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
+        encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+        encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
+        encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
+        encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
+        encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
+        offset = encvid->nrmlz_th + 16;
+        offset2 = encvid->nrmlz_th + 32;
+    }
+
+    offset[0] = 0;
+    offset[1] = lx2 + 2;
+    offset[2] = 2;
+    offset[3] = lx2;
+    offset[4] = lx + 1;
+    offset[5] = lx3 + 3;
+    offset[6] = lx + 3;
+    offset[7] = lx3 + 1;
+    offset[8] = lx;
+    offset[9] = lx3 + 2;
+    offset[10] = lx3 ;
+    offset[11] = lx + 2 ;
+    offset[12] = 1;
+    offset[13] = lx2 + 3;
+    offset[14] = lx2 + 1;
+    offset[15] = 3;
+
+    offset2[0] = 0;
+    offset2[1] = rx2 + 2;
+    offset2[2] = 2;
+    offset2[3] = rx2;
+    offset2[4] = rx + 1;
+    offset2[5] = rx3 + 3;
+    offset2[6] = rx + 3;
+    offset2[7] = rx3 + 1;
+    offset2[8] = rx;
+    offset2[9] = rx3 + 2;
+    offset2[10] = rx3 ;
+    offset2[11] = rx + 2 ;
+    offset2[12] = 1;
+    offset2[13] = rx2 + 3;
+    offset2[14] = rx2 + 1;
+    offset2[15] = 3;
+
+    return ;
+}
+
+void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
+{
+    if (htfm_stat->countbreak == 0)
+        htfm_stat->countbreak = 1;
+
+    newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
+
+    if (newvar[0] < 0.001)
+    {
+        newvar[0] = 0.001; /* to prevent floating overflow */
+    }
+    exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
+    exp_lamda[1] = exp_lamda[0] * 1.5825;
+    exp_lamda[2] = exp_lamda[0] * 2.1750;
+    exp_lamda[3] = exp_lamda[0] * 3.5065;
+    exp_lamda[4] = exp_lamda[0] * 3.1436;
+    exp_lamda[5] = exp_lamda[0] * 3.5315;
+    exp_lamda[6] = exp_lamda[0] * 3.7449;
+    exp_lamda[7] = exp_lamda[0] * 4.5854;
+    exp_lamda[8] = exp_lamda[0] * 4.6191;
+    exp_lamda[9] = exp_lamda[0] * 5.4041;
+    exp_lamda[10] = exp_lamda[0] * 6.5974;
+    exp_lamda[11] = exp_lamda[0] * 10.5341;
+    exp_lamda[12] = exp_lamda[0] * 10.0719;
+    exp_lamda[13] = exp_lamda[0] * 12.0516;
+    exp_lamda[14] = exp_lamda[0] * 15.4552;
+
+    CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
+    return ;
+}
+
+
+void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
+{
+    int i;
+    double temp[15];
+    //  printf("\nLamda: ");
+
+    /* parametric PREMODELling */
+    for (i = 0; i < 15; i++)
+    {
+        //    printf("%g ",exp_lamda[i]);
+        if (pf < 0.5)
+            temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
+        else
+            temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
+    }
+
+    nrmlz_th[15] = 0;
+    for (i = 0; i < 15; i++)        /* scale upto no.pixels */
+        nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
+
+    return ;
+}
+
+void    HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
+{
+    AVCCommonObj *video = encvid->common;
+    uint32 *htfmMB = (uint32*)(encvid->currYMB);
+    uint8 *ptr, byte;
+    int *offset;
+    int i;
+    uint32 word;
+
+    if (((int)video->sliceHdr->frame_num) % 30 == 1)
+    {
+        offset = htfm_stat->offsetArray;
+    }
+    else
+    {
+        offset = encvid->nrmlz_th + 16;
+    }
+
+    for (i = 0; i < 16; i++)
+    {
+        ptr = cur + offset[i];
+        word = ptr[0];
+        byte = ptr[4];
+        word |= (byte << 8);
+        byte = ptr[8];
+        word |= (byte << 16);
+        byte = ptr[12];
+        word |= (byte << 24);
+        *htfmMB++ = word;
+
+        word = *(ptr += (pitch << 2));
+        byte = ptr[4];
+        word |= (byte << 8);
+        byte = ptr[8];
+        word |= (byte << 16);
+        byte = ptr[12];
+        word |= (byte << 24);
+        *htfmMB++ = word;
+
+        word = *(ptr += (pitch << 2));
+        byte = ptr[4];
+        word |= (byte << 8);
+        byte = ptr[8];
+        word |= (byte << 16);
+        byte = ptr[12];
+        word |= (byte << 24);
+        *htfmMB++ = word;
+
+        word = *(ptr += (pitch << 2));
+        byte = ptr[4];
+        word |= (byte << 8);
+        byte = ptr[8];
+        word |= (byte << 16);
+        byte = ptr[12];
+        word |= (byte << 24);
+        *htfmMB++ = word;
+    }
+
+    return ;
+}
+
+
+#endif // HTFM
+
+void    AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
+{
+    void* tmp = (void*)(encvid->currYMB);
+    uint32 *currYMB = (uint32*) tmp;
+    int i;
+
+    cur -= pitch;
+
+    for (i = 0; i < 16; i++)
+    {
+        *currYMB++ = *((uint32*)(cur += pitch));
+        *currYMB++ = *((uint32*)(cur + 4));
+        *currYMB++ = *((uint32*)(cur + 8));
+        *currYMB++ = *((uint32*)(cur + 12));
+    }
+
+    return ;
+}
+
+#ifdef FIXED_INTERPRED_MODE
+
+/* due to the complexity of the predicted motion vector, we may not decide to skip
+a macroblock here just yet. */
+/* We will find the best motion vector and the best intra prediction mode for each block. */
+/* output are
+    currMB->NumMbPart,  currMB->MbPartWidth, currMB->MbPartHeight,
+    currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
+    currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
+    currMB->RefIdx[], currMB->ref_idx_L0[],
+    currMB->mvL0[], currMB->mvL1[]
+    */
+
+AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
+                                int num_pass)
+{
+    AVCCommonObj *video = encvid->common;
+    int mbPartIdx, subMbPartIdx;
+    int16 *mv;
+    int i;
+    int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
+
+    /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
+
+    currMB->mbMode = FIXED_INTERPRED_MODE;
+    currMB->mb_intra = 0;
+
+    if (currMB->mbMode == AVC_P16)
+    {
+        currMB->NumMbPart = 1;
+        currMB->MbPartWidth = 16;
+        currMB->MbPartHeight = 16;
+        currMB->SubMbPartHeight[0] = 16;
+        currMB->SubMbPartWidth[0] = 16;
+        currMB->NumSubMbPart[0] =  1;
+    }
+    else if (currMB->mbMode == AVC_P16x8)
+    {
+        currMB->NumMbPart = 2;
+        currMB->MbPartWidth = 16;
+        currMB->MbPartHeight = 8;
+        for (i = 0; i < 2; i++)
+        {
+            currMB->SubMbPartWidth[i] = 16;
+            currMB->SubMbPartHeight[i] = 8;
+            currMB->NumSubMbPart[i] = 1;
+        }
+    }
+    else if (currMB->mbMode == AVC_P8x16)
+    {
+        currMB->NumMbPart = 2;
+        currMB->MbPartWidth = 8;
+        currMB->MbPartHeight = 16;
+        for (i = 0; i < 2; i++)
+        {
+            currMB->SubMbPartWidth[i] = 8;
+            currMB->SubMbPartHeight[i] = 16;
+            currMB->NumSubMbPart[i] = 1;
+        }
+    }
+    else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
+    {
+        currMB->NumMbPart = 4;
+        currMB->MbPartWidth = 8;
+        currMB->MbPartHeight = 8;
+        if (FIXED_SUBMB_MODE == AVC_8x8)
+        {
+            SubMbPartHeight = 8;
+            SubMbPartWidth = 8;
+            NumSubMbPart = 1;
+        }
+        else if (FIXED_SUBMB_MODE == AVC_8x4)
+        {
+            SubMbPartHeight = 4;
+            SubMbPartWidth = 8;
+            NumSubMbPart = 2;
+        }
+        else if (FIXED_SUBMB_MODE == AVC_4x8)
+        {
+            SubMbPartHeight = 8;
+            SubMbPartWidth = 4;
+            NumSubMbPart = 2;
+        }
+        else if (FIXED_SUBMB_MODE == AVC_4x4)
+        {
+            SubMbPartHeight = 4;
+            SubMbPartWidth = 4;
+            NumSubMbPart = 4;
+        }
+
+        for (i = 0; i < 4; i++)
+        {
+            currMB->subMbMode[i] = FIXED_SUBMB_MODE;
+            currMB->SubMbPartHeight[i] = SubMbPartHeight;
+            currMB->SubMbPartWidth[i] = SubMbPartWidth;
+            currMB->NumSubMbPart[i] = NumSubMbPart;
+        }
+    }
+    else /* it's probably intra mode */
+    {
+        return AVCENC_SUCCESS;
+    }
+
+    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+    {
+        currMB->MBPartPredMode[mbPartIdx][0]  = AVC_Pred_L0;
+        currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
+        currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
+
+        for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
+        {
+            mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
+
+            *mv++ = FIXED_MVX;
+            *mv = FIXED_MVY;
+        }
+    }
+
+    encvid->min_cost = 0;
+
+    return AVCENC_SUCCESS;
+}
+
+#else /* perform the search */
+
+/* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
+  The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
+  partition. At each level, a decision can be made to stop the search if the expected
+  prediction gain is not worth the computation. The decision can also be made at the finest
+  level for more fullsearch-like behavior with the price of heavier computation. */
+void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
+                       int i0, int j0, int type_pred, int FS_en, int *hp_guess)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCPictureData *currPic = video->currPic;
+    AVCSeqParamSet *currSPS = video->currSeqParams;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCMacroblock *currMB = video->currMB;
+    uint8 *ref, *cand, *ncand;
+    void *extra_info = encvid->sad_extra_info;
+    int mbnum = video->mbNum;
+    int width = currPic->width; /* 6/12/01, must be multiple of 16 */
+    int height = currPic->height;
+    AVCMV *mot16x16 = encvid->mot16x16;
+    int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
+
+    int range = rateCtrl->mvRange;
+
+    int lx = currPic->pitch; /*  padding */
+    int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
+    int d, dmin, dn[9];
+    int k;
+    int mvx[5], mvy[5];
+    int num_can, center_again;
+    int last_loc, new_loc = 0;
+    int step, max_step = range >> 1;
+    int next;
+
+    int cmvx, cmvy; /* estimated predicted MV */
+    int lev_idx;
+    int lambda_motion = encvid->lambda_motion;
+    uint8 *mvbits = encvid->mvbits;
+    int mvshift = 2;
+    int mvcost;
+
+    int min_sad = 65535;
+
+    ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
+
+    /* have to initialize these params, necessary for interprediction part */
+    currMB->NumMbPart = 1;
+    currMB->SubMbPartHeight[0] = 16;
+    currMB->SubMbPartWidth[0] = 16;
+    currMB->NumSubMbPart[0] = 1;
+    currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
+                                currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
+    currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
+                                currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
+    currMB->RefIdx[0] = currMB->RefIdx[1] =
+                            currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
+
+    cur = encvid->currYMB; /* use smaller memory space for current MB */
+
+    /*  find limit of the search (adjusting search range)*/
+    lev_idx = mapLev2Idx[currSPS->level_idc];
+
+    /* we can make this part dynamic based on previous statistics */
+    ilow = i0 - range;
+    if (i0 - ilow > 2047) /* clip to conform with the standard */
+    {
+        ilow = i0 - 2047;
+    }
+    if (ilow < -13)  // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
+    {
+        ilow = -13;
+    }
+
+    ihigh = i0 + range - 1;
+    if (ihigh - i0 > 2047) /* clip to conform with the standard */
+    {
+        ihigh = i0 + 2047;
+    }
+    if (ihigh > width - 3)
+    {
+        ihigh = width - 3;  // change from width-1 to width-3 for the same reason as above
+    }
+
+    jlow = j0 - range;
+    if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
+    {
+        jlow = j0 - MaxVmvR[lev_idx] + 1;
+    }
+    if (jlow < -13)     // same reason as above
+    {
+        jlow = -13;
+    }
+
+    jhigh = j0 + range - 1;
+    if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
+    {
+        jhigh = j0 + MaxVmvR[lev_idx] - 1;
+    }
+    if (jhigh > height - 3) // same reason as above
+    {
+        jhigh = height - 3;
+    }
+
+    /* find initial motion vector & predicted MV*/
+    AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
+
+    imin = i0;
+    jmin = j0; /* needed for fullsearch */
+    ncand = ref + i0 + j0 * lx;
+
+    /* for first row of MB, fullsearch can be used */
+    if (FS_en)
+    {
+        *hp_guess = 0; /* no guess for fast half-pel */
+
+        dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
+
+        ncand = ref + imin + jmin * lx;
+    }
+    else
+    {   /*       fullsearch the top row to only upto (0,3) MB */
+        /*       upto 30% complexity saving with the same complexity */
+        if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
+        {
+            *hp_guess = 0; /* no guess for fast half-pel */
+            dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
+            ncand = ref + imin + jmin * lx;
+        }
+        else
+        {
+            /************** initialize candidate **************************/
+
+            dmin = 65535;
+
+            /* check if all are equal */
+            if (num_can == ALL_CAND_EQUAL)
+            {
+                i = i0 + mvx[0];
+                j = j0 + mvy[0];
+
+                if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+                {
+                    cand = ref + i + j * lx;
+
+                    d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+                    mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+                    d +=  mvcost;
+
+                    if (d < dmin)
+                    {
+                        dmin = d;
+                        imin = i;
+                        jmin = j;
+                        ncand = cand;
+                        min_sad = d - mvcost; // for rate control
+                    }
+                }
+            }
+            else
+            {
+                /************** evaluate unique candidates **********************/
+                for (k = 0; k < num_can; k++)
+                {
+                    i = i0 + mvx[k];
+                    j = j0 + mvy[k];
+
+                    if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+                    {
+                        cand = ref + i + j * lx;
+                        d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+                        mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+                        d +=  mvcost;
+
+                        if (d < dmin)
+                        {
+                            dmin = d;
+                            imin = i;
+                            jmin = j;
+                            ncand = cand;
+                            min_sad = d - mvcost; // for rate control
+                        }
+                    }
+                }
+            }
+
+            /******************* local refinement ***************************/
+            center_again = 0;
+            last_loc = new_loc = 0;
+            //          ncand = ref + jmin*lx + imin;  /* center of the search */
+            step = 0;
+            dn[0] = dmin;
+            while (!center_again && step <= max_step)
+            {
+
+                AVCMoveNeighborSAD(dn, last_loc);
+
+                center_again = 1;
+                i = imin;
+                j = jmin - 1;
+                cand = ref + i + j * lx;
+
+                /*  starting from [0,-1] */
+                /* spiral check one step at a time*/
+                for (k = 2; k <= 8; k += 2)
+                {
+                    if (!tab_exclude[last_loc][k]) /* exclude last step computation */
+                    {       /* not already computed */
+                        if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+                        {
+                            d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+                            mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+                            d += mvcost;
+
+                            dn[k] = d; /* keep it for half pel use */
+
+                            if (d < dmin)
+                            {
+                                ncand = cand;
+                                dmin = d;
+                                imin = i;
+                                jmin = j;
+                                center_again = 0;
+                                new_loc = k;
+                                min_sad = d - mvcost; // for rate control
+                            }
+                        }
+                    }
+                    if (k == 8)  /* end side search*/
+                    {
+                        if (!center_again)
+                        {
+                            k = -1; /* start diagonal search */
+                            cand -= lx;
+                            j--;
+                        }
+                    }
+                    else
+                    {
+                        next = refine_next[k][0];
+                        i += next;
+                        cand += next;
+                        next = refine_next[k][1];
+                        j += next;
+                        cand += lx * next;
+                    }
+                }
+                last_loc = new_loc;
+                step ++;
+            }
+            if (!center_again)
+                AVCMoveNeighborSAD(dn, last_loc);
+
+            *hp_guess = AVCFindMin(dn);
+
+            encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+        }
+    }
+
+    mot16x16[mbnum].sad = dmin;
+    mot16x16[mbnum].x = (imin - i0) << 2;
+    mot16x16[mbnum].y = (jmin - j0) << 2;
+    best_cand[0] = ncand;
+
+    if (rateCtrl->subPelEnable) // always enable half-pel search
+    {
+        /* find half-pel resolution motion vector */
+        min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
+
+        encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+
+
+        if (encvid->best_qpel_pos == -1)
+        {
+            ncand = encvid->hpel_cand[encvid->best_hpel_pos];
+        }
+        else
+        {
+            ncand = encvid->qpel_cand[encvid->best_qpel_pos];
+        }
+    }
+    else
+    {
+        encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+    }
+
+    /** do motion comp here for now */
+    ref = currPic->Sl + i0 + j0 * lx;
+    /* copy from the best result to current Picture */
+    for (j = 0; j < 16; j++)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            *ref++ = *ncand++;
+        }
+        ref += (lx - 16);
+        ncand += 8;
+    }
+
+    return ;
+}
+
+#endif
+
+/*===============================================================================
+    Function:   AVCFullSearch
+    Date:       09/16/2000
+    Purpose:    Perform full-search motion estimation over the range of search
+                region in a spiral-outward manner.
+    Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
+                current VOP, current coord (also output), boundaries.
+===============================================================================*/
+int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
+                  int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
+                  int cmvx, int cmvy)
+{
+    int range = encvid->rateCtrl->mvRange;
+    AVCPictureData *currPic = encvid->common->currPic;
+    uint8 *cand;
+    int i, j, k, l;
+    int d, dmin;
+    int i0 = *imin; /* current position */
+    int j0 = *jmin;
+    int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
+    void *extra_info = encvid->sad_extra_info;
+    int lx = currPic->pitch; /* with padding */
+
+    int offset = i0 + j0 * lx;
+
+    int lambda_motion = encvid->lambda_motion;
+    uint8 *mvbits = encvid->mvbits;
+    int mvshift = 2;
+    int mvcost;
+    int min_sad;
+
+    cand = prev + offset;
+
+    dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
+    mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
+    min_sad = dmin;
+    dmin += mvcost;
+
+    /* perform spiral search */
+    for (k = 1; k <= range; k++)
+    {
+
+        i = i0 - k;
+        j = j0 - k;
+
+        cand = prev + i + j * lx;
+
+        for (l = 0; l < 8*k; l++)
+        {
+            /* no need for boundary checking again */
+            if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+            {
+                d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
+                mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+                d +=  mvcost;
+
+                if (d < dmin)
+                {
+                    dmin = d;
+                    *imin = i;
+                    *jmin = j;
+                    min_sad = d - mvcost;
+                }
+            }
+
+            if (l < (k << 1))
+            {
+                i++;
+                cand++;
+            }
+            else if (l < (k << 2))
+            {
+                j++;
+                cand += lx;
+            }
+            else if (l < ((k << 2) + (k << 1)))
+            {
+                i--;
+                cand--;
+            }
+            else
+            {
+                j--;
+                cand -= lx;
+            }
+        }
+    }
+
+    encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
+
+    return dmin;
+}
+
+/*===============================================================================
+    Function:   AVCCandidateSelection
+    Date:       09/16/2000
+    Purpose:    Fill up the list of candidate using spatio-temporal correlation
+                among neighboring blocks.
+    Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
+    Modified:   , 09/23/01, get rid of redundant candidates before passing back.
+                , 09/11/07, added return for modified predicted MV, this will be
+                    needed for both fast search and fullsearch.
+===============================================================================*/
+
+void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
+                           AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCMV *mot16x16 = encvid->mot16x16;
+    AVCMV *pmot;
+    int mbnum = video->mbNum;
+    int mbwidth = video->PicWidthInMbs;
+    int mbheight = video->PicHeightInMbs;
+    int i, j, same, num1;
+
+    /* this part is for predicted MV */
+    int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
+    int availA = 0, availB = 0, availC = 0;
+
+    *num_can = 0;
+
+    if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
+    {
+        /* Spatio-Temporal Candidate (five candidates) */
+        if (type_pred == 0) /* first pass */
+        {
+            pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
+            mvx[(*num_can)] = (pmot->x) >> 2;
+            mvy[(*num_can)++] = (pmot->y) >> 2;
+            if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum-1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum+1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+
+            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum+mbwidth];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            else if (jmb > 0)   /*upper neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum-mbwidth];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+
+            if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
+            {
+                pmot = &mot16x16[mbnum-mbwidth-1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
+            {
+                pmot = &mot16x16[mbnum-mbwidth+1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+        }
+        else    /* second pass */
+            /* original ST1 algorithm */
+        {
+            pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
+            mvx[(*num_can)] = (pmot->x) >> 2;
+            mvy[(*num_can)++] = (pmot->y) >> 2;
+
+            if (imb > 0)  /*left neighbor current frame */
+            {
+                pmot = &mot16x16[mbnum-1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (jmb > 0)  /*upper neighbor current frame */
+            {
+                pmot = &mot16x16[mbnum-mbwidth];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (imb < mbwidth - 1)  /*right neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum+1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
+            {
+                pmot = &mot16x16[mbnum+mbwidth];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+        }
+
+        /* get predicted MV */
+        if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
+        {
+            availA = 1;
+            pmot = &mot16x16[mbnum-1];
+            pmvA_x = pmot->x;
+            pmvA_y = pmot->y;
+        }
+
+        if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
+        {
+            availB = 1;
+            pmot = &mot16x16[mbnum-mbwidth];
+            pmvB_x = pmot->x;
+            pmvB_y = pmot->y;
+
+            availC = 1;
+
+            if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
+            {
+                pmot = &mot16x16[mbnum-mbwidth+1];
+            }
+            else /* get MV from top-left (D) neighbor of current frame */
+            {
+                pmot = &mot16x16[mbnum-mbwidth-1];
+            }
+            pmvC_x = pmot->x;
+            pmvC_y = pmot->y;
+        }
+
+    }
+    else  /* only Spatial Candidate (four candidates)*/
+    {
+        if (type_pred == 0) /*first pass*/
+        {
+            if (imb > 1)  /* neighbor two blocks away to the left */
+            {
+                pmot = &mot16x16[mbnum-2];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (imb > 0 && jmb > 0)  /* upper-left neighbor */
+            {
+                pmot = &mot16x16[mbnum-mbwidth-1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
+            {
+                pmot = &mot16x16[mbnum-mbwidth+1];
+                mvx[(*num_can)] = (pmot->x) >> 2;
+                mvy[(*num_can)++] = (pmot->y) >> 2;
+            }
+
+            /* get predicted MV */
+            if (imb > 1)    /* get MV from 2nd left (A) neighbor either of current frame */
+            {
+                availA = 1;
+                pmot = &mot16x16[mbnum-2];
+                pmvA_x = pmot->x;
+                pmvA_y = pmot->y;
+            }
+
+            if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
+            {
+                availB = 1;
+                pmot = &mot16x16[mbnum-mbwidth-1];
+                pmvB_x = pmot->x;
+                pmvB_y = pmot->y;
+            }
+
+            if (jmb > 0 && imb < mbwidth - 1)
+            {
+                availC = 1;
+                pmot = &mot16x16[mbnum-mbwidth+1];
+                pmvC_x = pmot->x;
+                pmvC_y = pmot->y;
+            }
+        }
+//#ifdef SCENE_CHANGE_DETECTION
+        /* second pass (ST2 algorithm)*/
+        else
+        {
+            if (type_pred == 1) /*  4/7/01 */
+            {
+                if (imb > 0)  /*left neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum-1];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+                }
+                if (jmb > 0)  /*upper neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum-mbwidth];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+                }
+                if (imb < mbwidth - 1)  /*right neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum+1];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+                }
+                if (jmb < mbheight - 1)  /*bottom neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum+mbwidth];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+                }
+            }
+            //#else
+            else /* original ST1 algorithm */
+            {
+                if (imb > 0)  /*left neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum-1];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+
+                    if (jmb > 0)  /*upper-left neighbor current frame */
+                    {
+                        pmot = &mot16x16[mbnum-mbwidth-1];
+                        mvx[(*num_can)] = (pmot->x) >> 2;
+                        mvy[(*num_can)++] = (pmot->y) >> 2;
+                    }
+
+                }
+                if (jmb > 0)  /*upper neighbor current frame */
+                {
+                    pmot = &mot16x16[mbnum-mbwidth];
+                    mvx[(*num_can)] = (pmot->x) >> 2;
+                    mvy[(*num_can)++] = (pmot->y) >> 2;
+
+                    if (imb < mbheight - 1)  /*upper-right neighbor current frame */
+                    {
+                        pmot = &mot16x16[mbnum-mbwidth+1];
+                        mvx[(*num_can)] = (pmot->x) >> 2;
+                        mvy[(*num_can)++] = (pmot->y) >> 2;
+                    }
+                }
+            }
+
+            /* get predicted MV */
+            if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
+            {
+                availA = 1;
+                pmot = &mot16x16[mbnum-1];
+                pmvA_x = pmot->x;
+                pmvA_y = pmot->y;
+            }
+
+            if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
+            {
+                availB = 1;
+                pmot = &mot16x16[mbnum-mbwidth];
+                pmvB_x = pmot->x;
+                pmvB_y = pmot->y;
+
+                availC = 1;
+
+                if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
+                {
+                    pmot = &mot16x16[mbnum-mbwidth+1];
+                }
+                else /* get MV from top-left (D) neighbor of current frame */
+                {
+                    pmot = &mot16x16[mbnum-mbwidth-1];
+                }
+                pmvC_x = pmot->x;
+                pmvC_y = pmot->y;
+            }
+        }
+//#endif
+    }
+
+    /*  3/23/01, remove redundant candidate (possible k-mean) */
+    num1 = *num_can;
+    *num_can = 1;
+    for (i = 1; i < num1; i++)
+    {
+        same = 0;
+        j = 0;
+        while (!same && j < *num_can)
+        {
+#if (CANDIDATE_DISTANCE==0)
+            if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
+#else
+            // modified k-mean,  3/24/01, shouldn't be greater than 3
+            if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
+#endif
+                same = 1;
+            j++;
+        }
+        if (!same)
+        {
+            mvx[*num_can] = mvx[i];
+            mvy[*num_can] = mvy[i];
+            (*num_can)++;
+        }
+    }
+
+    if (num1 == 5 && *num_can == 1)
+        *num_can = ALL_CAND_EQUAL; /* all are equal */
+
+    /* calculate predicted MV */
+
+    if (availA && !(availB || availC))
+    {
+        *cmvx = pmvA_x;
+        *cmvy = pmvA_y;
+    }
+    else
+    {
+        *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
+        *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
+    }
+
+    return ;
+}
+
+
+/*************************************************************
+    Function:   AVCMoveNeighborSAD
+    Date:       3/27/01
+    Purpose:    Move neighboring SAD around when center has shifted
+*************************************************************/
+
+void AVCMoveNeighborSAD(int dn[], int new_loc)
+{
+    int tmp[9];
+    tmp[0] = dn[0];
+    tmp[1] = dn[1];
+    tmp[2] = dn[2];
+    tmp[3] = dn[3];
+    tmp[4] = dn[4];
+    tmp[5] = dn[5];
+    tmp[6] = dn[6];
+    tmp[7] = dn[7];
+    tmp[8] = dn[8];
+    dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
+
+    switch (new_loc)
+    {
+        case 0:
+            break;
+        case 1:
+            dn[4] = tmp[2];
+            dn[5] = tmp[0];
+            dn[6] = tmp[8];
+            break;
+        case 2:
+            dn[4] = tmp[3];
+            dn[5] = tmp[4];
+            dn[6] = tmp[0];
+            dn[7] = tmp[8];
+            dn[8] = tmp[1];
+            break;
+        case 3:
+            dn[6] = tmp[4];
+            dn[7] = tmp[0];
+            dn[8] = tmp[2];
+            break;
+        case 4:
+            dn[1] = tmp[2];
+            dn[2] = tmp[3];
+            dn[6] = tmp[5];
+            dn[7] = tmp[6];
+            dn[8] = tmp[0];
+            break;
+        case 5:
+            dn[1] = tmp[0];
+            dn[2] = tmp[4];
+            dn[8] = tmp[6];
+            break;
+        case 6:
+            dn[1] = tmp[8];
+            dn[2] = tmp[0];
+            dn[3] = tmp[4];
+            dn[4] = tmp[5];
+            dn[8] = tmp[7];
+            break;
+        case 7:
+            dn[2] = tmp[8];
+            dn[3] = tmp[0];
+            dn[4] = tmp[6];
+            break;
+        case 8:
+            dn[2] = tmp[1];
+            dn[3] = tmp[2];
+            dn[4] = tmp[0];
+            dn[5] = tmp[6];
+            dn[6] = tmp[7];
+            break;
+    }
+    dn[0] = tmp[new_loc];
+
+    return ;
+}
+
+/*  3/28/01, find minimal of dn[9] */
+
+int AVCFindMin(int dn[])
+{
+    int min, i;
+    int dmin;
+
+    dmin = dn[1];
+    min = 1;
+    for (i = 2; i < 9; i++)
+    {
+        if (dn[i] < dmin)
+        {
+            dmin = dn[i];
+            min = i;
+        }
+    }
+
+    return min;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/rate_control.cpp b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp
new file mode 100644
index 0000000..15b55fb
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp
@@ -0,0 +1,981 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include <math.h>
+
+/* rate control variables */
+#define RC_MAX_QUANT 51
+#define RC_MIN_QUANT 0   //cap to 10 to prevent rate fluctuation    
+
+#define MAD_MIN 1 /* handle the case of devision by zero in RC */
+
+
+/* local functions */
+double QP2Qstep(int QP);
+int Qstep2QP(double Qstep);
+
+double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl);
+
+void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video,
+                                  AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void AVCSaveRDSamples(MultiPass *pMP, int counter_samples);
+
+void updateRateControl(AVCRateControl *rateControl, int nal_type);
+
+int GetAvgFrameQP(AVCRateControl *rateCtrl)
+{
+    return rateCtrl->Qc;
+}
+
+AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    uint32 modTimeRef = encvid->modTimeRef;
+    int32  currFrameNum ;
+    int  frameInc;
+
+
+    /* check with the buffer fullness to make sure that we have enough bits to encode this frame */
+    /* we can use a threshold to guarantee minimum picture quality */
+    /**********************************/
+
+    /* for now, the default is to encode every frame, To Be Changed */
+    if (rateCtrl->first_frame)
+    {
+        encvid->modTimeRef = modTime;
+        encvid->wrapModTime = 0;
+        encvid->prevFrameNum = 0;
+        encvid->prevProcFrameNum = 0;
+
+        *frameNum = 0;
+
+        /* set frame type to IDR-frame */
+        video->nal_unit_type = AVC_NALTYPE_IDR;
+        sliceHdr->slice_type = AVC_I_ALL_SLICE;
+        video->slice_type = AVC_I_SLICE;
+
+        return AVCENC_SUCCESS;
+    }
+    else
+    {
+        if (modTime < modTimeRef) /* modTime wrapped around */
+        {
+            encvid->wrapModTime += ((uint32)0xFFFFFFFF - modTimeRef) + 1;
+            encvid->modTimeRef = modTimeRef = 0;
+        }
+        modTime += encvid->wrapModTime; /* wrapModTime is non zero after wrap-around */
+
+        currFrameNum = (int32)(((modTime - modTimeRef) * rateCtrl->frame_rate + 200) / 1000); /* add small roundings */
+
+        if (currFrameNum <= (int32)encvid->prevProcFrameNum)
+        {
+            return AVCENC_FAIL;  /* this is a late frame do not encode it */
+        }
+
+        frameInc = currFrameNum - encvid->prevProcFrameNum;
+
+        if (frameInc < rateCtrl->skip_next_frame + 1)
+        {
+            return AVCENC_FAIL;  /* frame skip required to maintain the target bit rate. */
+        }
+
+        RCUpdateBuffer(video, rateCtrl, frameInc - rateCtrl->skip_next_frame);  /* in case more frames dropped */
+
+        *frameNum = currFrameNum;
+
+        /* This part would be similar to DetermineVopType of m4venc */
+        if ((*frameNum >= (uint)rateCtrl->idrPeriod && rateCtrl->idrPeriod > 0) || (*frameNum > video->MaxFrameNum)) /* first frame or IDR*/
+        {
+            /* set frame type to IDR-frame */
+            if (rateCtrl->idrPeriod)
+            {
+                encvid->modTimeRef += (uint32)(rateCtrl->idrPeriod * 1000 / rateCtrl->frame_rate);
+                *frameNum -= rateCtrl->idrPeriod;
+            }
+            else
+            {
+                encvid->modTimeRef += (uint32)(video->MaxFrameNum * 1000 / rateCtrl->frame_rate);
+                *frameNum -= video->MaxFrameNum;
+            }
+
+            video->nal_unit_type = AVC_NALTYPE_IDR;
+            sliceHdr->slice_type = AVC_I_ALL_SLICE;
+            video->slice_type = AVC_I_SLICE;
+            encvid->prevProcFrameNum = *frameNum;
+        }
+        else
+        {
+            video->nal_unit_type = AVC_NALTYPE_SLICE;
+            sliceHdr->slice_type = AVC_P_ALL_SLICE;
+            video->slice_type = AVC_P_SLICE;
+            encvid->prevProcFrameNum = currFrameNum;
+        }
+
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc)
+{
+    int tmp;
+    MultiPass *pMP = rateCtrl->pMP;
+
+    OSCL_UNUSED_ARG(video);
+
+    if (rateCtrl->rcEnable == TRUE)
+    {
+        if (frameInc > 1)
+        {
+            tmp = rateCtrl->bitsPerFrame * (frameInc - 1);
+            rateCtrl->VBV_fullness -= tmp;
+            pMP->counter_BTsrc += 10 * (frameInc - 1);
+
+            /* Check buffer underflow */
+            if (rateCtrl->VBV_fullness < rateCtrl->low_bound)
+            {
+                rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2;
+                rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound;
+                pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+            }
+        }
+    }
+}
+
+
+AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCCommonObj *video = encvid->common;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    double L1, L2, L3, bpp;
+    int qp;
+    int i, j;
+
+    rateCtrl->basicUnit = video->PicSizeInMbs;
+
+    rateCtrl->MADofMB = (double*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+                        video->PicSizeInMbs * sizeof(double), DEFAULT_ATTR);
+
+    if (!rateCtrl->MADofMB)
+    {
+        goto CLEANUP_RC;
+    }
+
+    if (rateCtrl->rcEnable == TRUE)
+    {
+        rateCtrl->pMP = (MultiPass*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, sizeof(MultiPass), DEFAULT_ATTR);
+        if (!rateCtrl->pMP)
+        {
+            goto CLEANUP_RC;
+        }
+        memset(rateCtrl->pMP, 0, sizeof(MultiPass));
+        rateCtrl->pMP->encoded_frames = -1; /* forget about the very first I frame */
+
+        /* RDInfo **pRDSamples */
+        rateCtrl->pMP->pRDSamples = (RDInfo **)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (30 * sizeof(RDInfo *)), DEFAULT_ATTR);
+        if (!rateCtrl->pMP->pRDSamples)
+        {
+            goto CLEANUP_RC;
+        }
+
+        for (i = 0; i < 30; i++)
+        {
+            rateCtrl->pMP->pRDSamples[i] = (RDInfo *)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (32 * sizeof(RDInfo)), DEFAULT_ATTR);
+            if (!rateCtrl->pMP->pRDSamples[i])
+            {
+                goto CLEANUP_RC;
+            }
+            for (j = 0; j < 32; j++)    memset(&(rateCtrl->pMP->pRDSamples[i][j]), 0, sizeof(RDInfo));
+        }
+        rateCtrl->pMP->frameRange = (int)(rateCtrl->frame_rate * 1.0); /* 1.0s time frame*/
+        rateCtrl->pMP->frameRange = AVC_MAX(rateCtrl->pMP->frameRange, 5);
+        rateCtrl->pMP->frameRange = AVC_MIN(rateCtrl->pMP->frameRange, 30);
+
+        rateCtrl->pMP->framePos = -1;
+
+
+        rateCtrl->bitsPerFrame = (int32)(rateCtrl->bitRate / rateCtrl->frame_rate);
+
+        /* BX rate control */
+        rateCtrl->skip_next_frame = 0; /* must be initialized */
+
+        rateCtrl->Bs = rateCtrl->cpbSize;
+        rateCtrl->TMN_W = 0;
+        rateCtrl->VBV_fullness = (int)(rateCtrl->Bs * 0.5); /* rateCtrl->Bs */
+        rateCtrl->encoded_frames = 0;
+
+        rateCtrl->TMN_TH = rateCtrl->bitsPerFrame;
+
+        rateCtrl->max_BitVariance_num = (int)((OsclFloat)(rateCtrl->Bs - rateCtrl->VBV_fullness) / (rateCtrl->bitsPerFrame / 10.0)) - 5;
+        if (rateCtrl->max_BitVariance_num < 0) rateCtrl->max_BitVariance_num += 5;
+
+        // Set the initial buffer fullness
+        /* According to the spec, the initial buffer fullness needs to be set to 1/3 */
+        rateCtrl->VBV_fullness = (int)(rateCtrl->Bs / 3.0 - rateCtrl->Bs / 2.0); /* the buffer range is [-Bs/2, Bs/2] */
+        rateCtrl->pMP->counter_BTsrc = (int)((rateCtrl->Bs / 2.0 - rateCtrl->Bs / 3.0) / (rateCtrl->bitsPerFrame / 10.0));
+        rateCtrl->TMN_W = (int)(rateCtrl->VBV_fullness + rateCtrl->pMP->counter_BTsrc * (rateCtrl->bitsPerFrame / 10.0));
+
+        rateCtrl->low_bound = -rateCtrl->Bs / 2;
+        rateCtrl->VBV_fullness_offset = 0;
+
+        /* Setting the bitrate and framerate */
+        rateCtrl->pMP->bitrate = rateCtrl->bitRate;
+        rateCtrl->pMP->framerate = rateCtrl->frame_rate;
+        rateCtrl->pMP->target_bits_per_frame = rateCtrl->pMP->bitrate / rateCtrl->pMP->framerate;
+
+        /*compute the initial QP*/
+        bpp = 1.0 * rateCtrl->bitRate / (rateCtrl->frame_rate * (video->PicSizeInMbs << 8));
+        if (video->PicWidthInSamplesL == 176)
+        {
+            L1 = 0.1;
+            L2 = 0.3;
+            L3 = 0.6;
+        }
+        else if (video->PicWidthInSamplesL == 352)
+        {
+            L1 = 0.2;
+            L2 = 0.6;
+            L3 = 1.2;
+        }
+        else
+        {
+            L1 = 0.6;
+            L2 = 1.4;
+            L3 = 2.4;
+        }
+
+        if (rateCtrl->initQP == 0)
+        {
+            if (bpp <= L1)
+                qp = 35;
+            else if (bpp <= L2)
+                qp = 25;
+            else if (bpp <= L3)
+                qp = 20;
+            else
+                qp = 15;
+            rateCtrl->initQP = qp;
+        }
+
+        rateCtrl->Qc = rateCtrl->initQP;
+    }
+
+    return AVCENC_SUCCESS;
+
+CLEANUP_RC:
+
+    CleanupRateControlModule(avcHandle);
+    return AVCENC_MEMORY_FAIL;
+
+}
+
+
+void CleanupRateControlModule(AVCHandle *avcHandle)
+{
+    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    int i;
+
+    if (rateCtrl->MADofMB)
+    {
+        avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->MADofMB));
+    }
+
+    if (rateCtrl->pMP)
+    {
+        if (rateCtrl->pMP->pRDSamples)
+        {
+            for (i = 0; i < 30; i++)
+            {
+                if (rateCtrl->pMP->pRDSamples[i])
+                {
+                    avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples[i]);
+                }
+            }
+            avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples);
+        }
+        avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->pMP));
+    }
+
+    return ;
+}
+
+void RCInitGOP(AVCEncObject *encvid)
+{
+    /* in BX RC, there's no GOP-level RC */
+
+    OSCL_UNUSED_ARG(encvid);
+
+    return ;
+}
+
+
+void RCInitFrameQP(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCPicParamSet *picParam = video->currPicParams;
+    MultiPass *pMP = rateCtrl->pMP;
+
+    if (rateCtrl->rcEnable == TRUE)
+    {
+        /* frame layer rate control */
+        if (rateCtrl->encoded_frames == 0)
+        {
+            video->QPy = rateCtrl->Qc = rateCtrl->initQP;
+        }
+        else
+        {
+            calculateQuantizer_Multipass(encvid, video, rateCtrl, pMP);
+            video->QPy = rateCtrl->Qc;
+        }
+
+        rateCtrl->NumberofHeaderBits = 0;
+        rateCtrl->NumberofTextureBits = 0;
+        rateCtrl->numFrameBits = 0; // reset
+
+        /* update pMP->framePos */
+        if (++pMP->framePos == pMP->frameRange) pMP->framePos = 0;
+
+        if (rateCtrl->T == 0)
+        {
+            pMP->counter_BTdst = (int)(rateCtrl->frame_rate * 7.5 + 0.5); /* 0.75s time frame */
+            pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, (int)(rateCtrl->max_BitVariance_num / 2 * 0.40)); /* 0.75s time frame may go beyond VBV buffer if we set the buffer size smaller than 0.75s */
+            pMP->counter_BTdst = AVC_MAX(pMP->counter_BTdst, (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.30 / (rateCtrl->TMN_TH / 10.0) + 0.5)); /* At least 30% of VBV buffer size/2 */
+            pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, 20); /* Limit the target to be smaller than 3C */
+
+            pMP->target_bits = rateCtrl->T = rateCtrl->TMN_TH = (int)(rateCtrl->TMN_TH * (1.0 + pMP->counter_BTdst * 0.1));
+            pMP->diff_counter = pMP->counter_BTdst;
+        }
+
+        /* collect the necessary data: target bits, actual bits, mad and QP */
+        pMP->target_bits = rateCtrl->T;
+        pMP->QP  = video->QPy;
+
+        pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl);
+        if (pMP->mad < MAD_MIN) pMP->mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+
+        pMP->bitrate = rateCtrl->bitRate; /* calculated in RCVopQPSetting */
+        pMP->framerate = rateCtrl->frame_rate;
+
+        /* first pass encoding */
+        pMP->nRe_Quantized = 0;
+
+    } // rcEnable
+    else
+    {
+        video->QPy = rateCtrl->initQP;
+    }
+
+//  printf(" %d ",video->QPy);
+
+    if (video->CurrPicNum == 0 && encvid->outOfBandParamSet == FALSE)
+    {
+        picParam->pic_init_qs_minus26 = 0;
+        picParam->pic_init_qp_minus26 = video->QPy - 26;
+    }
+
+    // need this for motion estimation
+    encvid->lambda_mode = QP2QUANT[AVC_MAX(0, video->QPy-SHIFT_QP)];
+    encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode);
+    return ;
+}
+
+/* Mad based variable bit allocation + QP calculation with a new quadratic method */
+void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video,
+                                  AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+    int prev_actual_bits = 0, curr_target, /*pos=0,*/i, j;
+    OsclFloat Qstep, prev_QP = 0.625;
+
+    OsclFloat curr_mad, prev_mad, curr_RD, prev_RD, average_mad, aver_QP;
+
+    /* Mad based variable bit allocation */
+    targetBitCalculation(encvid, video, rateCtrl, pMP);
+
+    if (rateCtrl->T <= 0 || rateCtrl->totalSAD == 0)
+    {
+        if (rateCtrl->T < 0)    rateCtrl->Qc = RC_MAX_QUANT;
+        return;
+    }
+
+    /* ---------------------------------------------------------------------------------------------------*/
+    /* current frame QP estimation */
+    curr_target = rateCtrl->T;
+    curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs;
+    if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+    curr_RD  = (OsclFloat)curr_target / curr_mad;
+
+    if (rateCtrl->skip_next_frame == -1) // previous was skipped
+    {
+        i = pMP->framePos;
+        prev_mad = pMP->pRDSamples[i][0].mad;
+        prev_QP = pMP->pRDSamples[i][0].QP;
+        prev_actual_bits = pMP->pRDSamples[i][0].actual_bits;
+    }
+    else
+    {
+        /* Another version of search the optimal point */
+        prev_mad = 0.0;
+        i = 0;
+        while (i < pMP->frameRange && prev_mad < 0.001) /* find first one with nonzero prev_mad */
+        {
+            prev_mad = pMP->pRDSamples[i][0].mad;
+            i++;
+        }
+
+        if (i < pMP->frameRange)
+        {
+            prev_actual_bits = pMP->pRDSamples[i-1][0].actual_bits;
+
+            for (j = 0; i < pMP->frameRange; i++)
+            {
+                if (pMP->pRDSamples[i][0].mad != 0 &&
+                        AVC_ABS(prev_mad - curr_mad) > AVC_ABS(pMP->pRDSamples[i][0].mad - curr_mad))
+                {
+                    prev_mad = pMP->pRDSamples[i][0].mad;
+                    prev_actual_bits = pMP->pRDSamples[i][0].actual_bits;
+                    j = i;
+                }
+            }
+            prev_QP = QP2Qstep(pMP->pRDSamples[j][0].QP);
+
+            for (i = 1; i < pMP->samplesPerFrame[j]; i++)
+            {
+                if (AVC_ABS(prev_actual_bits - curr_target) > AVC_ABS(pMP->pRDSamples[j][i].actual_bits - curr_target))
+                {
+                    prev_actual_bits = pMP->pRDSamples[j][i].actual_bits;
+                    prev_QP = QP2Qstep(pMP->pRDSamples[j][i].QP);
+                }
+            }
+        }
+    }
+
+    // quadratic approximation
+    if (prev_mad > 0.001) // only when prev_mad is greater than 0, otherwise keep using the same QP
+    {
+        prev_RD = (OsclFloat)prev_actual_bits / prev_mad;
+        //rateCtrl->Qc = (Int)(prev_QP * sqrt(prev_actual_bits/curr_target) + 0.4);
+        if (prev_QP == 0.625) // added this to allow getting out of QP = 0 easily
+        {
+            Qstep = (int)(prev_RD / curr_RD + 0.5);
+        }
+        else
+        {
+            //      rateCtrl->Qc =(Int)(prev_QP * M4VENC_SQRT(prev_RD/curr_RD) + 0.9);
+
+            if (prev_RD / curr_RD > 0.5 && prev_RD / curr_RD < 2.0)
+                Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + prev_RD / curr_RD) / 2.0 + 0.9); /* Quadratic and linear approximation */
+            else
+                Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + pow(prev_RD / curr_RD, 1.0 / 3.0)) / 2.0 + 0.9);
+        }
+        // lower bound on Qc should be a function of curr_mad
+        // When mad is already low, lower bound on Qc doesn't have to be small.
+        // Note, this doesn't work well for low complexity clip encoded at high bit rate
+        // it doesn't hit the target bit rate due to this QP lower bound.
+        /// if((curr_mad < 8) && (rateCtrl->Qc < 12))   rateCtrl->Qc = 12;
+        //  else    if((curr_mad < 128) && (rateCtrl->Qc < 3)) rateCtrl->Qc = 3;
+
+        rateCtrl->Qc = Qstep2QP(Qstep);
+
+        if (rateCtrl->Qc < RC_MIN_QUANT) rateCtrl->Qc = RC_MIN_QUANT;
+        if (rateCtrl->Qc > RC_MAX_QUANT)    rateCtrl->Qc = RC_MAX_QUANT;
+    }
+
+    /* active bit resource protection */
+    aver_QP = (pMP->encoded_frames == 0 ? 0 : pMP->sum_QP / (OsclFloat)pMP->encoded_frames);
+    average_mad = (pMP->encoded_frames == 0 ? 0 : pMP->sum_mad / (OsclFloat)pMP->encoded_frames); /* this function is called from the scond encoded frame*/
+    if (pMP->diff_counter == 0 &&
+            ((OsclFloat)rateCtrl->Qc <= aver_QP*1.1 || curr_mad <= average_mad*1.1) &&
+            pMP->counter_BTsrc <= (pMP->counter_BTdst + (int)(pMP->framerate*1.0 + 0.5)))
+    {
+        rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame / 10.0);
+        rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+        pMP->counter_BTsrc++;
+        pMP->diff_counter--;
+    }
+
+}
+
+void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+    OSCL_UNUSED_ARG(encvid);
+    OsclFloat curr_mad;//, average_mad;
+    int diff_counter_BTsrc, diff_counter_BTdst, prev_counter_diff, curr_counter_diff, bound;
+    /* BT = Bit Transfer, for pMP->counter_BTsrc, pMP->counter_BTdst */
+
+    /* some stuff about frame dropping remained here to be done because pMP cannot be inserted into updateRateControl()*/
+    updateRC_PostProc(rateCtrl, pMP);
+
+    /* update pMP->counter_BTsrc and pMP->counter_BTdst to avoid interger overflow */
+    if (pMP->counter_BTsrc > 1000 && pMP->counter_BTdst > 1000)
+    {
+        pMP->counter_BTsrc -= 1000;
+        pMP->counter_BTdst -= 1000;
+    }
+
+    /* ---------------------------------------------------------------------------------------------------*/
+    /* target calculation */
+    curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs;
+    if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+    diff_counter_BTsrc = diff_counter_BTdst = 0;
+    pMP->diff_counter = 0;
+
+
+    /*1.calculate average mad */
+    pMP->sum_mad += curr_mad;
+    //average_mad = (pMP->encoded_frames < 1 ? curr_mad : pMP->sum_mad/(OsclFloat)(pMP->encoded_frames+1)); /* this function is called from the scond encoded frame*/
+    //pMP->aver_mad = average_mad;
+    if (pMP->encoded_frames >= 0) /* pMP->encoded_frames is set to -1 initially, so forget about the very first I frame */
+        pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames + curr_mad) / (pMP->encoded_frames + 1);
+
+    if (pMP->overlapped_win_size > 0 && pMP->encoded_frames_prev >= 0)
+        pMP->aver_mad_prev = (pMP->aver_mad_prev * pMP->encoded_frames_prev + curr_mad) / (pMP->encoded_frames_prev + 1);
+
+    /*2.average_mad, mad ==> diff_counter_BTsrc, diff_counter_BTdst */
+    if (pMP->overlapped_win_size == 0)
+    {
+        /* original verison */
+        if (curr_mad > pMP->aver_mad*1.1)
+        {
+            if (curr_mad / (pMP->aver_mad + 0.0001) > 2)
+                diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.4) - 10;
+            //diff_counter_BTdst = (int)((sqrt(curr_mad/pMP->aver_mad)*2+curr_mad/pMP->aver_mad)/(3*0.1) + 0.4) - 10;
+            else
+                diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad + 0.0001) * 10 + 0.4) - 10;
+        }
+        else /* curr_mad <= average_mad*1.1 */
+            //diff_counter_BTsrc = 10 - (int)((sqrt(curr_mad/pMP->aver_mad) + pow(curr_mad/pMP->aver_mad, 1.0/3.0))/(2.0*0.1) + 0.4);
+            diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.5);
+
+        /* actively fill in the possible gap */
+        if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 &&
+                curr_mad <= pMP->aver_mad*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst)
+            diff_counter_BTsrc = 1;
+
+    }
+    else if (pMP->overlapped_win_size > 0)
+    {
+        /* transition time: use previous average mad "pMP->aver_mad_prev" instead of the current average mad "pMP->aver_mad" */
+        if (curr_mad > pMP->aver_mad_prev*1.1)
+        {
+            if (curr_mad / pMP->aver_mad_prev > 2)
+                diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.4) - 10;
+            //diff_counter_BTdst = (int)((M4VENC_SQRT(curr_mad/pMP->aver_mad_prev)*2+curr_mad/pMP->aver_mad_prev)/(3*0.1) + 0.4) - 10;
+            else
+                diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad_prev + 0.0001) * 10 + 0.4) - 10;
+        }
+        else /* curr_mad <= average_mad*1.1 */
+            //diff_counter_BTsrc = 10 - (Int)((sqrt(curr_mad/pMP->aver_mad_prev) + pow(curr_mad/pMP->aver_mad_prev, 1.0/3.0))/(2.0*0.1) + 0.4);
+            diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.5);
+
+        /* actively fill in the possible gap */
+        if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 &&
+                curr_mad <= pMP->aver_mad_prev*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst)
+            diff_counter_BTsrc = 1;
+
+        if (--pMP->overlapped_win_size <= 0)    pMP->overlapped_win_size = 0;
+    }
+
+
+    /* if difference is too much, do clipping */
+    /* First, set the upper bound for current bit allocation variance: 80% of available buffer */
+    bound = (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.6 / (pMP->target_bits_per_frame / 10)); /* rateCtrl->Bs */
+    diff_counter_BTsrc =  AVC_MIN(diff_counter_BTsrc, bound);
+    diff_counter_BTdst =  AVC_MIN(diff_counter_BTdst, bound);
+
+    /* Second, set another upper bound for current bit allocation: 4-5*bitrate/framerate */
+    bound = 50;
+//  if(video->encParams->RC_Type == CBR_LOWDELAY)
+//  not necessary       bound = 10;  -- For Low delay */
+
+    diff_counter_BTsrc =  AVC_MIN(diff_counter_BTsrc, bound);
+    diff_counter_BTdst =  AVC_MIN(diff_counter_BTdst, bound);
+
+
+    /* Third, check the buffer */
+    prev_counter_diff = pMP->counter_BTdst - pMP->counter_BTsrc;
+    curr_counter_diff = prev_counter_diff + (diff_counter_BTdst - diff_counter_BTsrc);
+
+    if (AVC_ABS(prev_counter_diff) >= rateCtrl->max_BitVariance_num || AVC_ABS(curr_counter_diff) >= rateCtrl->max_BitVariance_num)
+    {   //diff_counter_BTsrc = diff_counter_BTdst = 0;
+
+        if (curr_counter_diff > rateCtrl->max_BitVariance_num && diff_counter_BTdst)
+        {
+            diff_counter_BTdst = (rateCtrl->max_BitVariance_num - prev_counter_diff) + diff_counter_BTsrc;
+            if (diff_counter_BTdst < 0) diff_counter_BTdst = 0;
+        }
+
+        else if (curr_counter_diff < -rateCtrl->max_BitVariance_num && diff_counter_BTsrc)
+        {
+            diff_counter_BTsrc = diff_counter_BTdst - (-rateCtrl->max_BitVariance_num - prev_counter_diff);
+            if (diff_counter_BTsrc < 0) diff_counter_BTsrc = 0;
+        }
+    }
+
+
+    /*3.diff_counter_BTsrc, diff_counter_BTdst ==> TMN_TH */
+    rateCtrl->TMN_TH = (int)(pMP->target_bits_per_frame);
+    pMP->diff_counter = 0;
+
+    if (diff_counter_BTsrc)
+    {
+        rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame * diff_counter_BTsrc * 0.1);
+        pMP->diff_counter = -diff_counter_BTsrc;
+    }
+    else if (diff_counter_BTdst)
+    {
+        rateCtrl->TMN_TH += (int)(pMP->target_bits_per_frame * diff_counter_BTdst * 0.1);
+        pMP->diff_counter = diff_counter_BTdst;
+    }
+
+
+    /*4.update pMP->counter_BTsrc, pMP->counter_BTdst */
+    pMP->counter_BTsrc += diff_counter_BTsrc;
+    pMP->counter_BTdst += diff_counter_BTdst;
+
+
+    /*5.target bit calculation */
+    rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+
+    return ;
+}
+
+void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+    if (rateCtrl->skip_next_frame > 0) /* skip next frame */
+    {
+        pMP->counter_BTsrc += 10 * rateCtrl->skip_next_frame;
+
+    }
+    else if (rateCtrl->skip_next_frame == -1) /* skip current frame */
+    {
+        pMP->counter_BTdst -= pMP->diff_counter;
+        pMP->counter_BTsrc += 10;
+
+        pMP->sum_mad -= pMP->mad;
+        pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames - pMP->mad) / (pMP->encoded_frames - 1 + 0.0001);
+        pMP->sum_QP  -= pMP->QP;
+        pMP->encoded_frames --;
+    }
+    /* some stuff in update VBV_fullness remains here */
+    //if(rateCtrl->VBV_fullness < -rateCtrl->Bs/2) /* rateCtrl->Bs */
+    if (rateCtrl->VBV_fullness < rateCtrl->low_bound)
+    {
+        rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2;
+        rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound;
+        pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+    }
+}
+
+
+void RCInitChromaQP(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCMacroblock *currMB = video->currMB;
+    int q_bits;
+
+    /* we have to do the same thing for AVC_CLIP3(0,51,video->QSy) */
+
+    video->QPy_div_6 = (currMB->QPy * 43) >> 8;
+    video->QPy_mod_6 = currMB->QPy - 6 * video->QPy_div_6;
+    currMB->QPc = video->QPc = mapQPi2QPc[AVC_CLIP3(0, 51, currMB->QPy + video->currPicParams->chroma_qp_index_offset)];
+    video->QPc_div_6 = (video->QPc * 43) >> 8;
+    video->QPc_mod_6 = video->QPc - 6 * video->QPc_div_6;
+
+    /* pre-calculate this to save computation */
+    q_bits = 4 + video->QPy_div_6;
+    if (video->slice_type == AVC_I_SLICE)
+    {
+        encvid->qp_const = 682 << q_bits;       // intra
+    }
+    else
+    {
+        encvid->qp_const = 342 << q_bits;       // inter
+    }
+
+    q_bits = 4 + video->QPc_div_6;
+    if (video->slice_type == AVC_I_SLICE)
+    {
+        encvid->qp_const_c = 682 << q_bits;    // intra
+    }
+    else
+    {
+        encvid->qp_const_c = 342 << q_bits;    // inter
+    }
+
+    encvid->lambda_mode = QP2QUANT[AVC_MAX(0, currMB->QPy-SHIFT_QP)];
+    encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode);
+
+    return ;
+}
+
+
+void RCInitMBQP(AVCEncObject *encvid)
+{
+    AVCCommonObj *video =  encvid->common;
+    AVCMacroblock *currMB = video->currMB;
+
+    currMB->QPy = video->QPy; /* set to previous value or picture level */
+
+    RCInitChromaQP(encvid);
+
+}
+
+void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits)
+{
+    OSCL_UNUSED_ARG(video);
+    rateCtrl->numMBHeaderBits = num_header_bits;
+    rateCtrl->numMBTextureBits = num_texture_bits;
+    rateCtrl->NumberofHeaderBits += rateCtrl->numMBHeaderBits;
+    rateCtrl->NumberofTextureBits += rateCtrl->numMBTextureBits;
+}
+
+void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid)
+{
+    currMB->QPy = video->QPy; /* use previous QP */
+    RCInitChromaQP(encvid);
+
+    return ;
+}
+
+
+void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    uint32 dmin_lx;
+
+    if (rateCtrl->rcEnable == TRUE)
+    {
+        if (currMB->mb_intra)
+        {
+            if (currMB->mbMode == AVC_I16)
+            {
+                dmin_lx = (0xFFFF << 16) | orgPitch;
+                rateCtrl->MADofMB[video->mbNum] = AVCSAD_Macroblock_C(orgL,
+                                                  encvid->pred_i16[currMB->i16Mode], dmin_lx, NULL);
+            }
+            else /* i4 */
+            {
+                rateCtrl->MADofMB[video->mbNum] = encvid->i4_sad / 256.;
+            }
+        }
+        /* for INTER, we have already saved it with the MV search */
+    }
+
+    return ;
+}
+
+
+
+AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid)
+{
+    AVCCommonObj *video = encvid->common;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    MultiPass *pMP = rateCtrl->pMP;
+    int diff_BTCounter;
+    int nal_type = video->nal_unit_type;
+
+    /* update the complexity weight of I, P, B frame */
+
+    if (rateCtrl->rcEnable == TRUE)
+    {
+        pMP->actual_bits = rateCtrl->numFrameBits;
+        pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl);
+
+        AVCSaveRDSamples(pMP, 0);
+
+        pMP->encoded_frames++;
+
+        /* for pMP->samplesPerFrame */
+        pMP->samplesPerFrame[pMP->framePos] = 0;
+
+        pMP->sum_QP += pMP->QP;
+
+        /* update pMP->counter_BTsrc, pMP->counter_BTdst */
+        /* re-allocate the target bit again and then stop encoding */
+        diff_BTCounter = (int)((OsclFloat)(rateCtrl->TMN_TH - rateCtrl->TMN_W - pMP->actual_bits) /
+                               (pMP->bitrate / (pMP->framerate + 0.0001) + 0.0001) / 0.1);
+        if (diff_BTCounter >= 0)
+            pMP->counter_BTsrc += diff_BTCounter; /* pMP->actual_bits is smaller */
+        else
+            pMP->counter_BTdst -= diff_BTCounter; /* pMP->actual_bits is bigger */
+
+        rateCtrl->TMN_TH -= (int)((OsclFloat)pMP->bitrate / (pMP->framerate + 0.0001) * (diff_BTCounter * 0.1));
+        rateCtrl->T = pMP->target_bits = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+        pMP->diff_counter -= diff_BTCounter;
+
+        rateCtrl->Rc = rateCtrl->numFrameBits;  /* Total Bits for current frame */
+        rateCtrl->Hc = rateCtrl->NumberofHeaderBits;    /* Total Bits in Header and Motion Vector */
+
+        /* BX_RC */
+        updateRateControl(rateCtrl, nal_type);
+        if (rateCtrl->skip_next_frame == -1) // skip current frame
+        {
+            status = AVCENC_SKIPPED_PICTURE;
+        }
+    }
+
+    rateCtrl->first_frame = 0;  // reset here after we encode the first frame.
+
+    return status;
+}
+
+void AVCSaveRDSamples(MultiPass *pMP, int counter_samples)
+{
+    /* for pMP->pRDSamples */
+    pMP->pRDSamples[pMP->framePos][counter_samples].QP    = pMP->QP;
+    pMP->pRDSamples[pMP->framePos][counter_samples].actual_bits = pMP->actual_bits;
+    pMP->pRDSamples[pMP->framePos][counter_samples].mad   = pMP->mad;
+    pMP->pRDSamples[pMP->framePos][counter_samples].R_D = (OsclFloat)pMP->actual_bits / (pMP->mad + 0.0001);
+
+    return ;
+}
+
+void updateRateControl(AVCRateControl *rateCtrl, int nal_type)
+{
+    int  frame_bits;
+    MultiPass *pMP = rateCtrl->pMP;
+
+    /* BX rate contro\l */
+    frame_bits = (int)(rateCtrl->bitRate / rateCtrl->frame_rate);
+    rateCtrl->TMN_W += (rateCtrl->Rc - rateCtrl->TMN_TH);
+    rateCtrl->VBV_fullness += (rateCtrl->Rc - frame_bits); //rateCtrl->Rp);
+    //if(rateCtrl->VBV_fullness < 0) rateCtrl->VBV_fullness = -1;
+
+    rateCtrl->encoded_frames++;
+
+    /* frame dropping */
+    rateCtrl->skip_next_frame = 0;
+
+    if ((rateCtrl->VBV_fullness > rateCtrl->Bs / 2) && nal_type != AVC_NALTYPE_IDR) /* skip the current frame */ /* rateCtrl->Bs */
+    {
+        rateCtrl->TMN_W -= (rateCtrl->Rc - rateCtrl->TMN_TH);
+        rateCtrl->VBV_fullness -= rateCtrl->Rc;
+        rateCtrl->skip_next_frame = -1;
+    }
+    else if ((OsclFloat)(rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95) /* skip next frame */
+    {
+        rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp;
+        rateCtrl->skip_next_frame = 1;
+        pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+        /* BX_1, skip more than 1 frames  */
+        //while(rateCtrl->VBV_fullness > rateCtrl->Bs*0.475)
+        while ((rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95)
+        {
+            rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp;
+            rateCtrl->skip_next_frame++;
+            pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+        }
+
+        /* END BX_1 */
+    }
+}
+
+
+double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl)
+{
+    double TotalMAD;
+    int i;
+    TotalMAD = 0.0;
+    for (i = 0; i < (int)video->PicSizeInMbs; i++)
+        TotalMAD += rateCtrl->MADofMB[i];
+    TotalMAD /= video->PicSizeInMbs;
+    return TotalMAD;
+}
+
+
+
+
+
+/* convert from QP to Qstep */
+double QP2Qstep(int QP)
+{
+    int i;
+    double Qstep;
+    static const double QP2QSTEP[6] = { 0.625, 0.6875, 0.8125, 0.875, 1.0, 1.125 };
+
+    Qstep = QP2QSTEP[QP % 6];
+    for (i = 0; i < (QP / 6); i++)
+        Qstep *= 2;
+
+    return Qstep;
+}
+
+/* convert from step size to QP */
+int Qstep2QP(double Qstep)
+{
+    int q_per = 0, q_rem = 0;
+
+    //  assert( Qstep >= QP2Qstep(0) && Qstep <= QP2Qstep(51) );
+    if (Qstep < QP2Qstep(0))
+        return 0;
+    else if (Qstep > QP2Qstep(51))
+        return 51;
+
+    while (Qstep > QP2Qstep(5))
+    {
+        Qstep /= 2;
+        q_per += 1;
+    }
+
+    if (Qstep <= (0.625 + 0.6875) / 2)
+    {
+        Qstep = 0.625;
+        q_rem = 0;
+    }
+    else if (Qstep <= (0.6875 + 0.8125) / 2)
+    {
+        Qstep = 0.6875;
+        q_rem = 1;
+    }
+    else if (Qstep <= (0.8125 + 0.875) / 2)
+    {
+        Qstep = 0.8125;
+        q_rem = 2;
+    }
+    else if (Qstep <= (0.875 + 1.0) / 2)
+    {
+        Qstep = 0.875;
+        q_rem = 3;
+    }
+    else if (Qstep <= (1.0 + 1.125) / 2)
+    {
+        Qstep = 1.0;
+        q_rem = 4;
+    }
+    else
+    {
+        Qstep = 1.125;
+        q_rem = 5;
+    }
+
+    return (q_per * 6 + q_rem);
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/residual.cpp b/media/libstagefright/codecs/avc/enc/src/residual.cpp
new file mode 100644
index 0000000..42eb910
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/residual.cpp
@@ -0,0 +1,389 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+AVCEnc_Status EncodeIntraPCM(AVCEncObject *encvid)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    AVCCommonObj *video = encvid->common;
+    AVCFrameIO  *currInput = encvid->currInput;
+    AVCEncBitstream *stream = encvid->bitstream;
+    int x_position = (video->mb_x << 4);
+    int y_position = (video->mb_y << 4);
+    int orgPitch = currInput->pitch;
+    int offset1 = y_position * orgPitch + x_position;
+    int i, j;
+    int offset;
+    uint8 *pDst, *pSrc;
+    uint code;
+
+    ue_v(stream, 25);
+
+    i = stream->bit_left & 0x7;
+    if (i) /* not byte-aligned */
+    {
+        BitstreamWriteBits(stream, 0, i);
+    }
+
+    pSrc = currInput->YCbCr[0] + offset1;
+    pDst = video->currPic->Sl + offset1;
+    offset = video->PicWidthInSamplesL - 16;
+
+    /* at this point bitstream is byte-aligned */
+    j = 16;
+    while (j > 0)
+    {
+#if (WORD_SIZE==32)
+        for (i = 0; i < 4; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 4;
+            *((uint*)pDst) = code;
+            pDst += 4;
+            status = BitstreamWriteBits(stream, 32, code);
+        }
+#else
+        for (i = 0; i < 8; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 2;
+            *((uint*)pDst) = code;
+            pDst += 2;
+            status = BitstreamWriteBits(stream, 16, code);
+        }
+#endif
+        pDst += offset;
+        pSrc += offset;
+        j--;
+    }
+    if (status != AVCENC_SUCCESS)  /* check only once per line */
+        return status;
+
+    pDst = video->currPic->Scb + ((offset1 + x_position) >> 2);
+    pSrc = currInput->YCbCr[1] + ((offset1 + x_position) >> 2);
+    offset >>= 1;
+
+    j = 8;
+    while (j > 0)
+    {
+#if (WORD_SIZE==32)
+        for (i = 0; i < 2; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 4;
+            *((uint*)pDst) = code;
+            pDst += 4;
+            status = BitstreamWriteBits(stream, 32, code);
+        }
+#else
+        for (i = 0; i < 4; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 2;
+            *((uint*)pDst) = code;
+            pDst += 2;
+            status = BitstreamWriteBits(stream, 16, code);
+        }
+#endif
+        pDst += offset;
+        pSrc += offset;
+        j--;
+    }
+
+    if (status != AVCENC_SUCCESS)  /* check only once per line */
+        return status;
+
+    pDst = video->currPic->Scr + ((offset1 + x_position) >> 2);
+    pSrc = currInput->YCbCr[2] + ((offset1 + x_position) >> 2);
+
+    j = 8;
+    while (j > 0)
+    {
+#if (WORD_SIZE==32)
+        for (i = 0; i < 2; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 4;
+            *((uint*)pDst) = code;
+            pDst += 4;
+            status = BitstreamWriteBits(stream, 32, code);
+        }
+#else
+        for (i = 0; i < 4; i++)
+        {
+            code = *((uint*)pSrc);
+            pSrc += 2;
+            *((uint*)pDst) = code;
+            pDst += 2;
+            status = BitstreamWriteBits(stream, 16, code);
+        }
+#endif
+        pDst += offset;
+        pSrc += offset;
+        j--;
+    }
+
+    return status;
+}
+
+
+AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int cindx, AVCMacroblock *currMB)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    AVCCommonObj *video = encvid->common;
+    int i, maxNumCoeff, nC;
+    int cdc = 0, cac = 0;
+    int TrailingOnes;
+    AVCEncBitstream *stream = encvid->bitstream;
+    uint trailing_ones_sign_flag;
+    int zerosLeft;
+    int *level, *run;
+    int TotalCoeff;
+    const static int incVlc[] = {0, 3, 6, 12, 24, 48, 32768};  // maximum vlc = 6
+    int escape, numPrefix, sufmask, suffix, shift, sign, value, absvalue, vlcnum, level_two_or_higher;
+    int bindx = blkIdx2blkXY[cindx>>2][cindx&3] ; // raster scan index
+
+    switch (type)
+    {
+        case AVC_Luma:
+            maxNumCoeff = 16;
+            level = encvid->level[cindx];
+            run = encvid->run[cindx];
+            TotalCoeff = currMB->nz_coeff[bindx];
+            break;
+        case AVC_Intra16DC:
+            maxNumCoeff = 16;
+            level = encvid->leveldc;
+            run = encvid->rundc;
+            TotalCoeff = cindx; /* special case */
+            bindx = 0;
+            cindx = 0;
+            break;
+        case AVC_Intra16AC:
+            maxNumCoeff = 15;
+            level = encvid->level[cindx];
+            run = encvid->run[cindx];
+            TotalCoeff = currMB->nz_coeff[bindx];
+            break;
+        case AVC_ChromaDC:  /* how to differentiate Cb from Cr */
+            maxNumCoeff = 4;
+            cdc = 1;
+            if (cindx >= 8)
+            {
+                level = encvid->levelcdc + 4;
+                run = encvid->runcdc + 4;
+                TotalCoeff = cindx - 8;  /* special case */
+            }
+            else
+            {
+                level = encvid->levelcdc;
+                run = encvid->runcdc;
+                TotalCoeff = cindx;  /* special case */
+            }
+            break;
+        case AVC_ChromaAC:
+            maxNumCoeff = 15;
+            cac = 1;
+            level = encvid->level[cindx];
+            run = encvid->run[cindx];
+            cindx -= 16;
+            bindx = 16 + blkIdx2blkXY[cindx>>2][cindx&3];
+            cindx += 16;
+            TotalCoeff = currMB->nz_coeff[bindx];
+            break;
+        default:
+            return AVCENC_FAIL;
+    }
+
+
+    /* find TrailingOnes */
+    TrailingOnes = 0;
+    zerosLeft = 0;
+    i = TotalCoeff - 1;
+    nC = 1;
+    while (i >= 0)
+    {
+        zerosLeft += run[i];
+        if (nC && (level[i] == 1 || level[i] == -1))
+        {
+            TrailingOnes++;
+        }
+        else
+        {
+            nC = 0;
+        }
+        i--;
+    }
+    if (TrailingOnes > 3)
+    {
+        TrailingOnes = 3; /* clip it */
+    }
+
+    if (!cdc)
+    {
+        if (!cac)  /* not chroma */
+        {
+            nC = predict_nnz(video, bindx & 3, bindx >> 2);
+        }
+        else /* chroma ac but not chroma dc */
+        {
+            nC = predict_nnz_chroma(video, bindx & 3, bindx >> 2);
+        }
+
+        status = ce_TotalCoeffTrailingOnes(stream, TrailingOnes, TotalCoeff, nC);
+    }
+    else
+    {
+        nC = -1; /* Chroma DC level */
+        status = ce_TotalCoeffTrailingOnesChromaDC(stream, TrailingOnes, TotalCoeff);
+    }
+
+    /* This part is done quite differently in ReadCoef4x4_CAVLC() */
+    if (TotalCoeff > 0)
+    {
+
+        i = TotalCoeff - 1;
+
+        if (TrailingOnes) /* keep reading the sign of those trailing ones */
+        {
+            nC = TrailingOnes;
+            trailing_ones_sign_flag = 0;
+            while (nC)
+            {
+                trailing_ones_sign_flag <<= 1;
+                trailing_ones_sign_flag |= ((uint32)level[i--] >> 31); /* 0 or positive, 1 for negative */
+                nC--;
+            }
+
+            /* instead of writing one bit at a time, read the whole thing at once */
+            status = BitstreamWriteBits(stream, TrailingOnes, trailing_ones_sign_flag);
+        }
+
+        level_two_or_higher = 1;
+        if (TotalCoeff > 3 && TrailingOnes == 3)
+        {
+            level_two_or_higher = 0;
+        }
+
+        if (TotalCoeff > 10 && TrailingOnes < 3)
+        {
+            vlcnum = 1;
+        }
+        else
+        {
+            vlcnum = 0;
+        }
+
+        /* then do this TotalCoeff-TrailingOnes times */
+        for (i = TotalCoeff - TrailingOnes - 1; i >= 0; i--)
+        {
+            value = level[i];
+            absvalue = (value >= 0) ? value : -value;
+
+            if (level_two_or_higher)
+            {
+                if (value > 0) value--;
+                else    value++;
+                level_two_or_higher = 0;
+            }
+
+            if (value >= 0)
+            {
+                sign = 0;
+            }
+            else
+            {
+                sign = 1;
+                value = -value;
+            }
+
+            if (vlcnum == 0) // VLC1
+            {
+                if (value < 8)
+                {
+                    status = BitstreamWriteBits(stream, value * 2 + sign - 1, 1);
+                }
+                else if (value < 8 + 8)
+                {
+                    status = BitstreamWriteBits(stream, 14 + 1 + 4, (1 << 4) | ((value - 8) << 1) | sign);
+                }
+                else
+                {
+                    status = BitstreamWriteBits(stream, 14 + 2 + 12, (1 << 12) | ((value - 16) << 1) | sign) ;
+                }
+            }
+            else  // VLCN
+            {
+                shift = vlcnum - 1;
+                escape = (15 << shift) + 1;
+                numPrefix = (value - 1) >> shift;
+                sufmask = ~((0xffffffff) << shift);
+                suffix = (value - 1) & sufmask;
+                if (value < escape)
+                {
+                    status = BitstreamWriteBits(stream, numPrefix + vlcnum + 1, (1 << (shift + 1)) | (suffix << 1) | sign);
+                }
+                else
+                {
+                    status = BitstreamWriteBits(stream, 28, (1 << 12) | ((value - escape) << 1) | sign);
+                }
+
+            }
+
+            if (absvalue > incVlc[vlcnum])
+                vlcnum++;
+
+            if (i == TotalCoeff - TrailingOnes - 1 && absvalue > 3)
+                vlcnum = 2;
+        }
+
+        if (status != AVCENC_SUCCESS)  /* occasionally check the bitstream */
+        {
+            return status;
+        }
+        if (TotalCoeff < maxNumCoeff)
+        {
+            if (!cdc)
+            {
+                ce_TotalZeros(stream, zerosLeft, TotalCoeff);
+            }
+            else
+            {
+                ce_TotalZerosChromaDC(stream, zerosLeft, TotalCoeff);
+            }
+        }
+        else
+        {
+            zerosLeft = 0;
+        }
+
+        i = TotalCoeff - 1;
+        while (i > 0) /* don't do the last one */
+        {
+            if (zerosLeft > 0)
+            {
+                ce_RunBefore(stream, run[i], zerosLeft);
+            }
+
+            zerosLeft = zerosLeft - run[i];
+            i--;
+        }
+    }
+
+    return status;
+}
diff --git a/media/libstagefright/codecs/avc/enc/src/sad.cpp b/media/libstagefright/codecs/avc/enc/src/sad.cpp
new file mode 100644
index 0000000..ae7acd2
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad.cpp
@@ -0,0 +1,290 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "sad_inline.h"
+
+#define Cached_lx 176
+
+#ifdef _SAD_STAT
+uint32 num_sad_MB = 0;
+uint32 num_sad_Blk = 0;
+uint32 num_sad_MB_call = 0;
+uint32 num_sad_Blk_call = 0;
+
+#define NUM_SAD_MB_CALL()       num_sad_MB_call++
+#define NUM_SAD_MB()            num_sad_MB++
+#define NUM_SAD_BLK_CALL()      num_sad_Blk_call++
+#define NUM_SAD_BLK()           num_sad_Blk++
+
+#else
+
+#define NUM_SAD_MB_CALL()
+#define NUM_SAD_MB()
+#define NUM_SAD_BLK_CALL()
+#define NUM_SAD_BLK()
+
+#endif
+
+
+/* consist of
+int AVCSAD_Macroblock_C(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+int AVCSAD_MB_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+int AVCSAD_MB_HTFM(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+*/
+
+
+/*==================================================================
+    Function:   SAD_Macroblock
+    Date:       09/07/2000
+    Purpose:    Compute SAD 16x16 between blk and ref.
+    To do:      Uniform subsampling will be inserted later!
+                Hypothesis Testing Fast Matching to be used later!
+    Changes:
+    11/7/00:    implemented MMX
+    1/24/01:    implemented SSE
+==================================================================*/
+/********** C ************/
+int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+    (void)(extra_info);
+
+    int32 x10;
+    int dmin = (uint32)dmin_lx >> 16;
+    int lx = dmin_lx & 0xFFFF;
+
+    NUM_SAD_MB_CALL();
+
+    x10 = simd_sad_mb(ref, blk, dmin, lx);
+
+    return x10;
+}
+
+#ifdef HTFM   /* HTFM with uniform subsampling implementation 2/28/01 */
+/*===============================================================
+    Function:   AVCAVCSAD_MB_HTFM_Collect and AVCSAD_MB_HTFM
+    Date:       3/2/1
+    Purpose:    Compute the SAD on a 16x16 block using
+                uniform subsampling and hypothesis testing fast matching
+                for early dropout. SAD_MB_HP_HTFM_Collect is to collect
+                the statistics to compute the thresholds to be used in
+                SAD_MB_HP_HTFM.
+    Input/Output:
+    Changes:
+  ===============================================================*/
+
+int AVCAVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+    int i;
+    int sad = 0;
+    uint8 *p1;
+    int lx4 = (dmin_lx << 2) & 0x3FFFC;
+    uint32 cur_word;
+    int saddata[16], tmp, tmp2;    /* used when collecting flag (global) is on */
+    int difmad;
+    int madstar;
+    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+    uint *countbreak = &(htfm_stat->countbreak);
+    int *offsetRef = htfm_stat->offsetRef;
+
+    madstar = (uint32)dmin_lx >> 20;
+
+    NUM_SAD_MB_CALL();
+
+    blk -= 4;
+    for (i = 0; i < 16; i++)
+    {
+        p1 = ref + offsetRef[i];
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        NUM_SAD_MB();
+
+        saddata[i] = sad;
+
+        if (i > 0)
+        {
+            if ((uint32)sad > ((uint32)dmin_lx >> 16))
+            {
+                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+                (*countbreak)++;
+                return sad;
+            }
+        }
+    }
+
+    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+    (*countbreak)++;
+    return sad;
+}
+
+int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+    int sad = 0;
+    uint8 *p1;
+
+    int i;
+    int tmp, tmp2;
+    int lx4 = (dmin_lx << 2) & 0x3FFFC;
+    int sadstar = 0, madstar;
+    int *nrmlz_th = (int*) extra_info;
+    int *offsetRef = (int*) extra_info + 32;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_lx >> 20;
+
+    NUM_SAD_MB_CALL();
+
+    blk -= 4;
+    for (i = 0; i < 16; i++)
+    {
+        p1 = ref + offsetRef[i];
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        cur_word = *((uint32*)(blk += 4));
+        tmp = p1[12];
+        tmp2 = (cur_word >> 24) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[8];
+        tmp2 = (cur_word >> 16) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[4];
+        tmp2 = (cur_word >> 8) & 0xFF;
+        sad = SUB_SAD(sad, tmp, tmp2);
+        tmp = p1[0];
+        p1 += lx4;
+        tmp2 = (cur_word & 0xFF);
+        sad = SUB_SAD(sad, tmp, tmp2);
+
+        NUM_SAD_MB();
+
+        sadstar += madstar;
+        if (((uint32)sad <= ((uint32)dmin_lx >> 16)) && (sad <= (sadstar - *nrmlz_th++)))
+            ;
+        else
+            return 65536;
+    }
+
+    return sad;
+}
+#endif /* HTFM */
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp
new file mode 100644
index 0000000..faf2198
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp
@@ -0,0 +1,629 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/* contains
+int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
+int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width)
+int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
+int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width)
+
+int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+*/
+
+#include "avcenc_lib.h"
+#include "sad_halfpel_inline.h"
+
+#ifdef _SAD_STAT
+uint32 num_sad_HP_MB = 0;
+uint32 num_sad_HP_Blk = 0;
+uint32 num_sad_HP_MB_call = 0;
+uint32 num_sad_HP_Blk_call = 0;
+#define NUM_SAD_HP_MB_CALL()    num_sad_HP_MB_call++
+#define NUM_SAD_HP_MB()         num_sad_HP_MB++
+#define NUM_SAD_HP_BLK_CALL()   num_sad_HP_Blk_call++
+#define NUM_SAD_HP_BLK()        num_sad_HP_Blk++
+#else
+#define NUM_SAD_HP_MB_CALL()
+#define NUM_SAD_HP_MB()
+#define NUM_SAD_HP_BLK_CALL()
+#define NUM_SAD_HP_BLK()
+#endif
+
+
+
+/*===============================================================
+    Function:   SAD_MB_HalfPel
+    Date:       09/17/2000
+    Purpose:    Compute the SAD on the half-pel resolution
+    Input/Output:   hmem is assumed to be a pointer to the starting
+                point of the search in the 33x33 matrix search region
+    Changes:
+    11/7/00:    implemented MMX
+  ===============================================================*/
+/*==================================================================
+    Function:   AVCSAD_MB_HalfPel_C
+    Date:       04/30/2001
+    Purpose:    Compute SAD 16x16 between blk and ref in halfpel
+                resolution,
+    Changes:
+  ==================================================================*/
+/* One component is half-pel */
+int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    (void)(extra_info);
+
+    int i, j;
+    int sad = 0;
+    uint8 *kk, *p1, *p2, *p3, *p4;
+//  int sumref=0;
+    int temp;
+    int rx = dmin_rx & 0xFFFF;
+
+    NUM_SAD_HP_MB_CALL();
+
+    p1 = ref;
+    p2 = ref + 1;
+    p3 = ref + rx;
+    p4 = ref + rx + 1;
+    kk  = blk;
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+
+            temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++;
+            sad += AVC_ABS(temp);
+        }
+
+        NUM_SAD_HP_MB();
+
+        if (sad > (int)((uint32)dmin_rx >> 16))
+            return sad;
+
+        p1 += rx;
+        p3 += rx;
+        p2 += rx;
+        p4 += rx;
+    }
+    return sad;
+}
+
+int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    (void)(extra_info);
+
+    int i, j;
+    int sad = 0;
+    uint8 *kk, *p1, *p2;
+//  int sumref=0;
+    int temp;
+    int rx = dmin_rx & 0xFFFF;
+
+    NUM_SAD_HP_MB_CALL();
+
+    p1 = ref;
+    p2 = ref + rx; /* either left/right or top/bottom pixel */
+    kk  = blk;
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+
+            temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++;
+            sad += AVC_ABS(temp);
+        }
+
+        NUM_SAD_HP_MB();
+
+        if (sad > (int)((uint32)dmin_rx >> 16))
+            return sad;
+        p1 += rx;
+        p2 += rx;
+    }
+    return sad;
+}
+
+int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    (void)(extra_info);
+
+    int i, j;
+    int sad = 0;
+    uint8 *kk, *p1;
+    int temp;
+    int rx = dmin_rx & 0xFFFF;
+
+    NUM_SAD_HP_MB_CALL();
+
+    p1 = ref;
+    kk  = blk;
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+
+            temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++;
+            sad += AVC_ABS(temp);
+        }
+
+        NUM_SAD_HP_MB();
+
+        if (sad > (int)((uint32)dmin_rx >> 16))
+            return sad;
+        p1 += rx;
+    }
+    return sad;
+}
+
+#ifdef HTFM  /* HTFM with uniform subsampling implementation,  2/28/01 */
+
+//Checheck here
+int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0;
+    uint8 *p1, *p2;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int saddata[16];      /* used when collecting flag (global) is on */
+    int difmad, tmp, tmp2;
+    int madstar;
+    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+    UInt *countbreak = &(htfm_stat->countbreak);
+    int *offsetRef = htfm_stat->offsetRef;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+        p2 = p1 + rx;
+
+        j = 4;/* 4 lines */
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12] + p2[12];
+            tmp2 = p1[13] + p2[13];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 24) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8] + p2[8];
+            tmp2 = p1[9] + p2[9];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 16) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4] + p2[4];
+            tmp2 = p1[5] + p2[5];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 8) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp2 = p1[1] + p2[1];
+            tmp = p1[0] + p2[0];
+            p1 += refwx4;
+            p2 += refwx4;
+            tmp += tmp2;
+            tmp2 = (cur_word & 0xFF);
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+
+        saddata[i] = sad;
+
+        if (i > 0)
+        {
+            if (sad > ((uint32)dmin_rx >> 16))
+            {
+                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+                (*countbreak)++;
+                return sad;
+            }
+        }
+    }
+    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+    (*countbreak)++;
+
+    return sad;
+}
+
+int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0;
+    uint8 *p1, *p2;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int saddata[16];      /* used when collecting flag (global) is on */
+    int difmad, tmp, tmp2;
+    int madstar;
+    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+    UInt *countbreak = &(htfm_stat->countbreak);
+    int *offsetRef = htfm_stat->offsetRef;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+        p2 = p1 + rx;
+        j = 4;
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12];
+            tmp2 = p2[12];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 24) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8];
+            tmp2 = p2[8];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 16) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4];
+            tmp2 = p2[4];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 8) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[0];
+            p1 += refwx4;
+            tmp2 = p2[0];
+            p2 += refwx4;
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word & 0xFF);
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+
+        saddata[i] = sad;
+
+        if (i > 0)
+        {
+            if (sad > ((uint32)dmin_rx >> 16))
+            {
+                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+                (*countbreak)++;
+                return sad;
+            }
+        }
+    }
+    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+    (*countbreak)++;
+
+    return sad;
+}
+
+int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0;
+    uint8 *p1;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int saddata[16];      /* used when collecting flag (global) is on */
+    int difmad, tmp, tmp2;
+    int madstar;
+    HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+    int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+    UInt *countbreak = &(htfm_stat->countbreak);
+    int *offsetRef = htfm_stat->offsetRef;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+
+        j = 4; /* 4 lines */
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12];
+            tmp2 = p1[13];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 24) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8];
+            tmp2 = p1[9];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 16) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4];
+            tmp2 = p1[5];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 8) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[0];
+            tmp2 = p1[1];
+            p1 += refwx4;
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word & 0xFF);
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+
+        saddata[i] = sad;
+
+        if (i > 0)
+        {
+            if (sad > ((uint32)dmin_rx >> 16))
+            {
+                difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+                (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+                (*countbreak)++;
+                return sad;
+            }
+        }
+    }
+    difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+    (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+    (*countbreak)++;
+
+    return sad;
+}
+
+int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0, tmp, tmp2;
+    uint8 *p1, *p2;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int sadstar = 0, madstar;
+    int *nrmlz_th = (int*) extra_info;
+    int *offsetRef = nrmlz_th + 32;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+        p2 = p1 + rx;
+
+        j = 4; /* 4 lines */
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12] + p2[12];
+            tmp2 = p1[13] + p2[13];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 24) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8] + p2[8];
+            tmp2 = p1[9] + p2[9];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 16) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4] + p2[4];
+            tmp2 = p1[5] + p2[5];
+            tmp += tmp2;
+            tmp2 = (cur_word >> 8) & 0xFF;
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+            tmp2 = p1[1] + p2[1];
+            tmp = p1[0] + p2[0];
+            p1 += refwx4;
+            p2 += refwx4;
+            tmp += tmp2;
+            tmp2 = (cur_word & 0xFF);
+            tmp += 2;
+            sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+
+        sadstar += madstar;
+        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+        {
+            return 65536;
+        }
+    }
+
+    return sad;
+}
+
+int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0, tmp, tmp2;
+    uint8 *p1, *p2;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int sadstar = 0, madstar;
+    int *nrmlz_th = (int*) extra_info;
+    int *offsetRef = nrmlz_th + 32;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+        p2 = p1 + rx;
+        j = 4;
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12];
+            tmp2 = p2[12];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 24) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8];
+            tmp2 = p2[8];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 16) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4];
+            tmp2 = p2[4];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 8) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[0];
+            p1 += refwx4;
+            tmp2 = p2[0];
+            p2 += refwx4;
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word & 0xFF);
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+        sadstar += madstar;
+        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+        {
+            return 65536;
+        }
+    }
+
+    return sad;
+}
+
+int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+    int i, j;
+    int sad = 0, tmp, tmp2;
+    uint8 *p1;
+    int rx = dmin_rx & 0xFFFF;
+    int refwx4 = rx << 2;
+    int sadstar = 0, madstar;
+    int *nrmlz_th = (int*) extra_info;
+    int *offsetRef = nrmlz_th + 32;
+    uint32 cur_word;
+
+    madstar = (uint32)dmin_rx >> 20;
+
+    NUM_SAD_HP_MB_CALL();
+
+    blk -= 4;
+
+    for (i = 0; i < 16; i++) /* 16 stages */
+    {
+        p1 = ref + offsetRef[i];
+
+        j = 4;/* 4 lines */
+        do
+        {
+            cur_word = *((uint32*)(blk += 4));
+            tmp = p1[12];
+            tmp2 = p1[13];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 24) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[8];
+            tmp2 = p1[9];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 16) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[4];
+            tmp2 = p1[5];
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word >> 8) & 0xFF;
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+            tmp = p1[0];
+            tmp2 = p1[1];
+            p1 += refwx4;
+            tmp++;
+            tmp2 += tmp;
+            tmp = (cur_word & 0xFF);
+            sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+        }
+        while (--j);
+
+        NUM_SAD_HP_MB();
+
+        sadstar += madstar;
+        if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+        {
+            return 65536;
+        }
+    }
+
+    return sad;
+}
+
+#endif /* HTFM */
+
+
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h
new file mode 100644
index 0000000..3a21647
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h
@@ -0,0 +1,96 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+
+#ifndef _SAD_HALFPEL_INLINE_H_
+#define _SAD_HALFPEL_INLINE_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+    __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        tmp = (tmp2 >> 1) - tmp;
+        if (tmp > 0) sad += tmp;
+        else sad -= tmp;
+
+        return sad;
+    }
+
+    __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        tmp = (tmp >> 2) - tmp2;
+        if (tmp > 0) sad += tmp;
+        else sad -= tmp;
+
+        return sad;
+    }
+
+#elif defined(__CC_ARM)  /* only work with arm v5 */
+
+    __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        __asm
+        {
+            rsbs    tmp, tmp, tmp2, asr #1 ;
+            rsbmi   tmp, tmp, #0 ;
+            add     sad, sad, tmp ;
+        }
+
+        return sad;
+    }
+
+    __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        __asm
+        {
+            rsbs    tmp, tmp2, tmp, asr #2 ;
+            rsbmi   tmp, tmp, #0 ;
+            add     sad, sad, tmp ;
+        }
+
+        return sad;
+    }
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+    __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+__asm__ volatile("rsbs	%1, %1, %2, asr #1\n\trsbmi %1, %1, #0\n\tadd  %0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2));
+
+        return sad;
+    }
+
+    __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+__asm__ volatile("rsbs	%1, %2, %1, asr #2\n\trsbmi %1, %1, #0\n\tadd	%0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2));
+
+        return sad;
+    }
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_SAD_HALFPEL_INLINE_H_
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_inline.h
new file mode 100644
index 0000000..f39794f
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_inline.h
@@ -0,0 +1,488 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#ifndef _SAD_INLINE_H_
+#define _SAD_INLINE_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        tmp = tmp - tmp2;
+        if (tmp > 0) sad += tmp;
+        else sad -= tmp;
+
+        return sad;
+    }
+
+    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+    {
+        int32 x7;
+
+        x7 = src2 ^ src1;       /* check odd/even combination */
+        if ((uint32)src2 >= (uint32)src1)
+        {
+            src1 = src2 - src1;     /* subs */
+        }
+        else
+        {
+            src1 = src1 - src2;
+        }
+        x7 = x7 ^ src1;     /* only odd bytes need to add carry */
+        x7 = mask & ((uint32)x7 >> 1);
+        x7 = (x7 << 8) - x7;
+        src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */
+        src1 = src1 ^(x7 >> 7);   /* take absolute value of negative byte */
+
+        return src1;
+    }
+
+#define NUMBER 3
+#define SHIFT 24
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#include "sad_mb_offset.h"
+
+
+    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+    {
+        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+        x9 = 0x80808080; /* const. */
+
+        x8 = (uint32)ref & 0x3;
+        if (x8 == 3)
+            goto SadMBOffset3;
+        if (x8 == 2)
+            goto SadMBOffset2;
+        if (x8 == 1)
+            goto SadMBOffset1;
+
+//  x5 = (x4<<8)-x4; /* x5 = x4*255; */
+        x4 = x5 = 0;
+
+        x6 = 0xFFFF00FF;
+
+        ref -= lx;
+        blk -= 16;
+
+        x8 = 16;
+
+LOOP_SAD0:
+        /****** process 8 pixels ******/
+        x10 = *((uint32*)(ref += lx));
+        x11 = *((uint32*)(ref + 4));
+        x12 = *((uint32*)(blk += 16));
+        x14 = *((uint32*)(blk + 4));
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10; /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        /****** process 8 pixels ******/
+        x10 = *((uint32*)(ref + 8));
+        x11 = *((uint32*)(ref + 12));
+        x12 = *((uint32*)(blk + 8));
+        x14 = *((uint32*)(blk + 12));
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10;  /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        /****************/
+        x10 = x5 - (x4 << 8); /* extract low bytes */
+        x10 = x10 + x4;     /* add with high bytes */
+        x10 = x10 + (x10 << 16); /* add with lower half word */
+
+        if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+        {
+            if (--x8)
+            {
+                goto LOOP_SAD0;
+            }
+
+        }
+
+        return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+        return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+        return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+        return sad_mb_offset1(ref, blk, lx, dmin);
+
+    }
+
+#elif defined(__CC_ARM)  /* only work with arm v5 */
+
+    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+        __asm
+        {
+            rsbs    tmp, tmp, tmp2 ;
+            rsbmi   tmp, tmp, #0 ;
+            add     sad, sad, tmp ;
+        }
+
+        return sad;
+    }
+
+    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+    {
+        int32 x7;
+
+        __asm
+        {
+            EOR     x7, src2, src1;     /* check odd/even combination */
+            SUBS    src1, src2, src1;
+            EOR     x7, x7, src1;
+            AND     x7, mask, x7, lsr #1;
+            ORRCC   x7, x7, #0x80000000;
+            RSB     x7, x7, x7, lsl #8;
+            ADD     src1, src1, x7, asr #7;   /* add 0xFF to the negative byte, add back carry */
+            EOR     src1, src1, x7, asr #7;   /* take absolute value of negative byte */
+        }
+
+        return src1;
+    }
+
+    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+    {
+        int32 x7;
+
+        __asm
+        {
+            EOR      x7, src2, src1;        /* check odd/even combination */
+            ADDS     src1, src2, src1;
+            EOR      x7, x7, src1;      /* only odd bytes need to add carry */
+            ANDS     x7, mask, x7, rrx;
+            RSB      x7, x7, x7, lsl #8;
+            SUB      src1, src1, x7, asr #7;  /* add 0xFF to the negative byte, add back carry */
+            EOR      src1, src1, x7, asr #7; /* take absolute value of negative byte */
+        }
+
+        return src1;
+    }
+
+#define sum_accumulate  __asm{      SBC      x5, x5, x10;  /* accumulate low bytes */ \
+        BIC      x10, x6, x10;   /* x10 & 0xFF00FF00 */ \
+        ADD      x4, x4, x10,lsr #8;   /* accumulate high bytes */ \
+        SBC      x5, x5, x11;    /* accumulate low bytes */ \
+        BIC      x11, x6, x11;   /* x11 & 0xFF00FF00 */ \
+        ADD      x4, x4, x11,lsr #8; } /* accumulate high bytes */
+
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+    {
+        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+        x9 = 0x80808080; /* const. */
+        x4 = x5 = 0;
+
+        __asm
+        {
+            MOVS    x8, ref, lsl #31 ;
+            BHI     SadMBOffset3;
+            BCS     SadMBOffset2;
+            BMI     SadMBOffset1;
+
+            MVN     x6, #0xFF00;
+        }
+LOOP_SAD0:
+        /****** process 8 pixels ******/
+        x11 = *((int32*)(ref + 12));
+        x10 = *((int32*)(ref + 8));
+        x14 = *((int32*)(blk + 12));
+        x12 = *((int32*)(blk + 8));
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10;  /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        __asm
+        {
+            /****** process 8 pixels ******/
+            LDR     x11, [ref, #4];
+            LDR     x10, [ref], lx ;
+            LDR     x14, [blk, #4];
+            LDR     x12, [blk], #16 ;
+        }
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10;  /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        /****************/
+        x10 = x5 - (x4 << 8); /* extract low bytes */
+        x10 = x10 + x4;     /* add with high bytes */
+        x10 = x10 + (x10 << 16); /* add with lower half word */
+
+        __asm
+        {
+            /****************/
+            RSBS    x11, dmin, x10, lsr #16;
+            ADDLSS  x8, x8, #0x10000001;
+            BLS     LOOP_SAD0;
+        }
+
+        return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+        return sad_mb_offset3(ref, blk, lx, dmin, x8);
+
+SadMBOffset2:
+
+        return sad_mb_offset2(ref, blk, lx, dmin, x8);
+
+SadMBOffset1:
+
+        return sad_mb_offset1(ref, blk, lx, dmin, x8);
+    }
+
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+    __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+    {
+__asm__ volatile("rsbs	%1, %1, %2\n\trsbmi %1, %1, #0\n\tadd	%0, %0, %1": "=r"(sad): "r"(tmp), "r"(tmp2));
+        return sad;
+    }
+
+    __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+    {
+        int32 x7;
+
+__asm__ volatile("EOR	%1, %2, %0\n\tSUBS  %0, %2, %0\n\tEOR	%1, %1, %0\n\tAND  %1, %3, %1, lsr #1\n\tORRCC	%1, %1, #0x80000000\n\tRSB  %1, %1, %1, lsl #8\n\tADD  %0, %0, %1, asr #7\n\tEOR  %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask));
+
+        return src1;
+    }
+
+    __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+    {
+        int32 x7;
+
+__asm__ volatile("EOR	%1, %2, %0\n\tADDS  %0, %2, %0\n\tEOR  %1, %1, %0\n\tANDS  %1, %3, %1, rrx\n\tRSB  %1, %1, %1, lsl #8\n\tSUB	%0, %0, %1, asr #7\n\tEOR   %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask));
+
+        return src1;
+    }
+
+#define sum_accumulate  __asm__ volatile("SBC  %0, %0, %1\n\tBIC   %1, %4, %1\n\tADD   %2, %2, %1, lsr #8\n\tSBC   %0, %0, %3\n\tBIC   %3, %4, %3\n\tADD   %2, %2, %3, lsr #8": "=&r" (x5), "=&r" (x10), "=&r" (x4), "=&r" (x11): "r" (x6));
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+    __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+    {
+        int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+        x9 = 0x80808080; /* const. */
+        x4 = x5 = 0;
+
+        x8 = (uint32)ref & 0x3;
+        if (x8 == 3)
+            goto SadMBOffset3;
+        if (x8 == 2)
+            goto SadMBOffset2;
+        if (x8 == 1)
+            goto SadMBOffset1;
+
+        x8 = 16;
+///
+__asm__ volatile("MVN	%0, #0xFF00": "=r"(x6));
+
+LOOP_SAD0:
+        /****** process 8 pixels ******/
+        x11 = *((int32*)(ref + 12));
+        x10 = *((int32*)(ref + 8));
+        x14 = *((int32*)(blk + 12));
+        x12 = *((int32*)(blk + 8));
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10;  /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        /****** process 8 pixels ******/
+        x11 = *((int32*)(ref + 4));
+__asm__ volatile("LDR	%0, [%1], %2": "=&r"(x10), "=r"(ref): "r"(lx));
+        //x10 = *((int32*)ref); ref+=lx;
+        x14 = *((int32*)(blk + 4));
+__asm__ volatile("LDR	%0, [%1], #16": "=&r"(x12), "=r"(blk));
+
+        /* process x11 & x14 */
+        x11 = sad_4pixel(x11, x14, x9);
+
+        /* process x12 & x10 */
+        x10 = sad_4pixel(x10, x12, x9);
+
+        x5 = x5 + x10;  /* accumulate low bytes */
+        x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+        x5 = x5 + x11;  /* accumulate low bytes */
+        x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+        x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+        /****************/
+        x10 = x5 - (x4 << 8); /* extract low bytes */
+        x10 = x10 + x4;     /* add with high bytes */
+        x10 = x10 + (x10 << 16); /* add with lower half word */
+
+        /****************/
+
+        if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+        {
+            if (--x8)
+            {
+                goto LOOP_SAD0;
+            }
+
+        }
+
+        return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+        return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+        return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+        return sad_mb_offset1(ref, blk, lx, dmin);
+    }
+
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SAD_INLINE_H_
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h
new file mode 100644
index 0000000..d5d4a42
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h
@@ -0,0 +1,311 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin)
+#endif
+{
+    int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+    //  x5 = (x4<<8) - x4;
+    x4 = x5 = 0;
+    x6 = 0xFFFF00FF;
+    x9 = 0x80808080; /* const. */
+    ref -= NUMBER; /* bic ref, ref, #3 */
+    ref -= lx;
+    blk -= 16;
+    x8 = 16;
+
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+    /****** process 8 pixels ******/
+    x10 = *((uint32*)(ref += lx)); /* D C B A */
+    x11 = *((uint32*)(ref + 4));    /* H G F E */
+    x12 = *((uint32*)(ref + 8));    /* L K J I */
+
+    x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */
+    x10 = x10 | (x11 << (32 - SHIFT));        /* G F E D */
+    x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */
+    x11 = x11 | (x12 << (32 - SHIFT));        /* K J I H */
+
+    x12 = *((uint32*)(blk += 16));
+    x14 = *((uint32*)(blk + 4));
+
+    /* process x11 & x14 */
+    x11 = sad_4pixel(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixel(x10, x12, x9);
+
+    x5 = x5 + x10; /* accumulate low bytes */
+    x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+    x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
+    x5 = x5 + x11;  /* accumulate low bytes */
+    x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+    x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+    /****** process 8 pixels ******/
+    x10 = *((uint32*)(ref + 8)); /* D C B A */
+    x11 = *((uint32*)(ref + 12));   /* H G F E */
+    x12 = *((uint32*)(ref + 16));   /* L K J I */
+
+    x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24  = 0xFF 0xFF 0xFF ~D */
+    x10 = x10 | (x11 << (32 - SHIFT));        /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */
+    x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */
+    x11 = x11 | (x12 << (32 - SHIFT));        /* ~K ~J ~I ~H */
+
+    x12 = *((uint32*)(blk + 8));
+    x14 = *((uint32*)(blk + 12));
+
+    /* process x11 & x14 */
+    x11 = sad_4pixel(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixel(x10, x12, x9);
+
+    x5 = x5 + x10; /* accumulate low bytes */
+    x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+    x4 = x4 + ((uint32)x10 >> 8);  /* accumulate high bytes */
+    x5 = x5 + x11;  /* accumulate low bytes */
+    x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+    x4 = x4 + ((uint32)x11 >> 8);  /* accumulate high bytes */
+
+    /****************/
+    x10 = x5 - (x4 << 8); /* extract low bytes */
+    x10 = x10 + x4;     /* add with high bytes */
+    x10 = x10 + (x10 << 16); /* add with lower half word */
+
+    if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+    {
+        if (--x8)
+        {
+#if (NUMBER==3)
+            goto         LOOP_SAD3;
+#elif (NUMBER==2)
+            goto         LOOP_SAD2;
+#elif (NUMBER==1)
+            goto         LOOP_SAD1;
+#endif
+        }
+
+    }
+
+    return ((uint32)x10 >> 16);
+}
+
+#elif defined(__CC_ARM)  /* only work with arm v5 */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#endif
+{
+    int32 x4, x5, x6, x9, x10, x11, x12, x14;
+
+    x9 = 0x80808080; /* const. */
+    x4 = x5 = 0;
+
+    __asm{
+        MVN      x6, #0xff0000;
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+        BIC      ref, ref, #3;
+    }
+    /****** process 8 pixels ******/
+    x11 = *((int32*)(ref + 12));
+    x12 = *((int32*)(ref + 16));
+    x10 = *((int32*)(ref + 8));
+    x14 = *((int32*)(blk + 12));
+
+    __asm{
+        MVN      x10, x10, lsr #SHIFT;
+        BIC      x10, x10, x11, lsl #(32-SHIFT);
+        MVN      x11, x11, lsr #SHIFT;
+        BIC      x11, x11, x12, lsl #(32-SHIFT);
+
+        LDR      x12, [blk, #8];
+    }
+
+    /* process x11 & x14 */
+    x11 = sad_4pixelN(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixelN(x10, x12, x9);
+
+    sum_accumulate;
+
+    __asm{
+        /****** process 8 pixels ******/
+        LDR      x11, [ref, #4];
+        LDR      x12, [ref, #8];
+        LDR  x10, [ref], lx ;
+        LDR  x14, [blk, #4];
+
+        MVN      x10, x10, lsr #SHIFT;
+        BIC      x10, x10, x11, lsl #(32-SHIFT);
+        MVN      x11, x11, lsr #SHIFT;
+        BIC      x11, x11, x12, lsl #(32-SHIFT);
+
+        LDR      x12, [blk], #16;
+    }
+
+    /* process x11 & x14 */
+    x11 = sad_4pixelN(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixelN(x10, x12, x9);
+
+    sum_accumulate;
+
+    /****************/
+    x10 = x5 - (x4 << 8); /* extract low bytes */
+    x10 = x10 + x4;     /* add with high bytes */
+    x10 = x10 + (x10 << 16); /* add with lower half word */
+
+    __asm{
+        RSBS     x11, dmin, x10, lsr #16
+        ADDLSS   x8, x8, #INC_X8
+#if (NUMBER==3)
+        BLS      LOOP_SAD3;
+#elif (NUMBER==2)
+BLS      LOOP_SAD2;
+#elif (NUMBER==1)
+BLS      LOOP_SAD1;
+#endif
+    }
+
+    return ((uint32)x10 >> 16);
+}
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER  */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin)
+#endif
+{
+    int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+    x9 = 0x80808080; /* const. */
+    x4 = x5 = 0;
+    x8 = 16; //<<===========*******
+
+__asm__ volatile("MVN	%0, #0xFF0000": "=r"(x6));
+
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+__asm__ volatile("BIC  %0, %0, #3": "=r"(ref));
+    /****** process 8 pixels ******/
+    x11 = *((int32*)(ref + 12));
+    x12 = *((int32*)(ref + 16));
+    x10 = *((int32*)(ref + 8));
+    x14 = *((int32*)(blk + 12));
+
+#if (SHIFT==8)
+__asm__ volatile("MVN   %0, %0, lsr #8\n\tBIC   %0, %0, %1,lsl #24\n\tMVN   %1, %1,lsr #8\n\tBIC   %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==16)
+__asm__ volatile("MVN   %0, %0, lsr #16\n\tBIC   %0, %0, %1,lsl #16\n\tMVN   %1, %1,lsr #16\n\tBIC   %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==24)
+__asm__ volatile("MVN   %0, %0, lsr #24\n\tBIC   %0, %0, %1,lsl #8\n\tMVN   %1, %1,lsr #24\n\tBIC   %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12));
+#endif
+
+    x12 = *((int32*)(blk + 8));
+
+    /* process x11 & x14 */
+    x11 = sad_4pixelN(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixelN(x10, x12, x9);
+
+    sum_accumulate;
+
+    /****** process 8 pixels ******/
+    x11 = *((int32*)(ref + 4));
+    x12 = *((int32*)(ref + 8));
+    x10 = *((int32*)ref); ref += lx;
+    x14 = *((int32*)(blk + 4));
+
+#if (SHIFT==8)
+__asm__ volatile("MVN   %0, %0, lsr #8\n\tBIC   %0, %0, %1,lsl #24\n\tMVN   %1, %1,lsr #8\n\tBIC   %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==16)
+__asm__ volatile("MVN   %0, %0, lsr #16\n\tBIC   %0, %0, %1,lsl #16\n\tMVN   %1, %1,lsr #16\n\tBIC   %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==24)
+__asm__ volatile("MVN   %0, %0, lsr #24\n\tBIC   %0, %0, %1,lsl #8\n\tMVN   %1, %1,lsr #24\n\tBIC   %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12));
+#endif
+__asm__ volatile("LDR   %0, [%1], #16": "=&r"(x12), "=r"(blk));
+
+    /* process x11 & x14 */
+    x11 = sad_4pixelN(x11, x14, x9);
+
+    /* process x12 & x10 */
+    x10 = sad_4pixelN(x10, x12, x9);
+
+    sum_accumulate;
+
+    /****************/
+    x10 = x5 - (x4 << 8); /* extract low bytes */
+    x10 = x10 + x4;     /* add with high bytes */
+    x10 = x10 + (x10 << 16); /* add with lower half word */
+
+    if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
+    {
+        if (--x8)
+        {
+#if (NUMBER==3)
+            goto         LOOP_SAD3;
+#elif (NUMBER==2)
+goto         LOOP_SAD2;
+#elif (NUMBER==1)
+goto         LOOP_SAD1;
+#endif
+        }
+
+    }
+
+    return ((uint32)x10 >> 16);
+}
+
+#endif
+
diff --git a/media/libstagefright/codecs/avc/enc/src/slice.cpp b/media/libstagefright/codecs/avc/enc/src/slice.cpp
new file mode 100644
index 0000000..f6d066e
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/slice.cpp
@@ -0,0 +1,1025 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+
+AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    AVCCommonObj *video = encvid->common;
+    AVCPicParamSet *pps = video->currPicParams;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    AVCMacroblock *currMB ;
+    AVCEncBitstream *stream = encvid->bitstream;
+    uint slice_group_id;
+    int CurrMbAddr, slice_type;
+
+    slice_type = video->slice_type;
+
+    /* set the first mb in slice */
+    video->mbNum = CurrMbAddr = sliceHdr->first_mb_in_slice;// * (1+video->MbaffFrameFlag);
+    slice_group_id = video->MbToSliceGroupMap[CurrMbAddr];
+
+    video->mb_skip_run = 0;
+
+    /* while loop , see subclause 7.3.4 */
+    while (1)
+    {
+        video->mbNum = CurrMbAddr;
+        currMB = video->currMB = &(video->mblock[CurrMbAddr]);
+        currMB->slice_id = video->slice_id;  // for deblocking
+
+        video->mb_x = CurrMbAddr % video->PicWidthInMbs;
+        video->mb_y = CurrMbAddr / video->PicWidthInMbs;
+
+        /* initialize QP for this MB here*/
+        /* calculate currMB->QPy */
+        RCInitMBQP(encvid);
+
+        /* check the availability of neighboring macroblocks */
+        InitNeighborAvailability(video, CurrMbAddr);
+
+        /* Assuming that InitNeighborAvailability has been called prior to this function */
+        video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+        /* this is necessary for all subsequent intra search */
+
+        if (!video->currPicParams->constrained_intra_pred_flag)
+        {
+            video->intraAvailA = video->mbAvailA;
+            video->intraAvailB = video->mbAvailB;
+            video->intraAvailC = video->mbAvailC;
+            video->intraAvailD = video->mbAvailD;
+        }
+        else
+        {
+            if (video->mbAvailA)
+            {
+                video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+            }
+            if (video->mbAvailB)
+            {
+                video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+            }
+            if (video->mbAvailC)
+            {
+                video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+            }
+            if (video->mbAvailD)
+            {
+                video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+            }
+        }
+
+        /* encode_one_macroblock() */
+        status = EncodeMB(encvid);
+        if (status != AVCENC_SUCCESS)
+        {
+            break;
+        }
+
+        /* go to next MB */
+        CurrMbAddr++;
+
+        while ((uint)video->MbToSliceGroupMap[CurrMbAddr] != slice_group_id &&
+                (uint)CurrMbAddr < video->PicSizeInMbs)
+        {
+            CurrMbAddr++;
+        }
+
+        if ((uint)CurrMbAddr >= video->PicSizeInMbs)
+        {
+            /* end of slice, return, but before that check to see if there are other slices
+            to be encoded. */
+            encvid->currSliceGroup++;
+            if (encvid->currSliceGroup > (int)pps->num_slice_groups_minus1) /* no more slice group */
+            {
+                status = AVCENC_PICTURE_READY;
+                break;
+            }
+            else
+            {
+                /* find first_mb_num for the next slice */
+                CurrMbAddr = 0;
+                while (video->MbToSliceGroupMap[CurrMbAddr] != encvid->currSliceGroup &&
+                        (uint)CurrMbAddr < video->PicSizeInMbs)
+                {
+                    CurrMbAddr++;
+                }
+                if ((uint)CurrMbAddr >= video->PicSizeInMbs)
+                {
+                    status = AVCENC_SLICE_EMPTY; /* error, one slice group has no MBs in it */
+                }
+
+                video->mbNum = CurrMbAddr;
+                status = AVCENC_SUCCESS;
+                break;
+            }
+        }
+    }
+
+    if (video->mb_skip_run > 0)
+    {
+        /* write skip_run */
+        if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+        {
+            ue_v(stream, video->mb_skip_run);
+            video->mb_skip_run = 0;
+        }
+        else    /* shouldn't happen */
+        {
+            status = AVCENC_FAIL;
+        }
+    }
+
+    return status;
+}
+
+
+AVCEnc_Status EncodeMB(AVCEncObject *encvid)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    AVCCommonObj *video = encvid->common;
+    AVCPictureData *currPic = video->currPic;
+    AVCFrameIO  *currInput = encvid->currInput;
+    AVCMacroblock *currMB = video->currMB;
+    AVCMacroblock *MB_A, *MB_B;
+    AVCEncBitstream *stream = encvid->bitstream;
+    AVCRateControl *rateCtrl = encvid->rateCtrl;
+    uint8 *cur, *curL, *curCb, *curCr;
+    uint8 *orgL, *orgCb, *orgCr, *org4;
+    int CurrMbAddr = video->mbNum;
+    int picPitch = currPic->pitch;
+    int orgPitch = currInput->pitch;
+    int x_position = (video->mb_x << 4);
+    int y_position = (video->mb_y << 4);
+    int offset;
+    int b8, b4, blkidx;
+    AVCResidualType  resType;
+    int slice_type;
+    int numcoeff; /* output from residual_block_cavlc */
+    int cost16, cost8;
+
+    int num_bits, start_mb_bits, start_text_bits;
+
+    slice_type = video->slice_type;
+
+    /* now, point to the reconstructed frame */
+    offset = y_position * picPitch + x_position;
+    curL = currPic->Sl + offset;
+    orgL = currInput->YCbCr[0] + offset;
+    offset = (offset + x_position) >> 2;
+    curCb = currPic->Scb + offset;
+    curCr = currPic->Scr + offset;
+    orgCb = currInput->YCbCr[1] + offset;
+    orgCr = currInput->YCbCr[2] + offset;
+
+    if (orgPitch != picPitch)
+    {
+        offset = y_position * (orgPitch - picPitch);
+        orgL += offset;
+        offset >>= 2;
+        orgCb += offset;
+        orgCr += offset;
+    }
+
+    /******* determine MB prediction mode *******/
+    if (encvid->intraSearch[CurrMbAddr])
+    {
+        MBIntraSearch(encvid, CurrMbAddr, curL, picPitch);
+    }
+    /******* This part should be determined somehow ***************/
+    if (currMB->mbMode == AVC_I_PCM)
+    {
+        /* write down mb_type and PCM data */
+        /* and copy from currInput to currPic */
+        status = EncodeIntraPCM(encvid);
+
+
+        return status;
+    }
+
+    /****** for intra prediction, pred is already done *******/
+    /****** for I4, the recon is ready and Xfrm coefs are ready to be encoded *****/
+
+    //RCCalculateMAD(encvid,currMB,orgL,orgPitch); // no need to re-calculate MAD for Intra
+    // not used since totalSAD is used instead
+
+    /* compute the prediction */
+    /* output is video->pred_block */
+    if (!currMB->mb_intra)
+    {
+        AVCMBMotionComp(encvid, video); /* perform prediction and residue calculation */
+        /* we can do the loop here and call dct_luma */
+        video->pred_pitch = picPitch;
+        currMB->CBP = 0;
+        cost16 = 0;
+        cur = curL;
+        org4 = orgL;
+
+        for (b8 = 0; b8 < 4; b8++)
+        {
+            cost8 = 0;
+
+            for (b4 = 0; b4 < 4; b4++)
+            {
+                blkidx = blkIdx2blkXY[b8][b4];
+                video->pred_block = cur;
+                numcoeff = dct_luma(encvid, blkidx, cur, org4, &cost8);
+                currMB->nz_coeff[blkidx] = numcoeff;
+                if (numcoeff)
+                {
+                    video->cbp4x4 |= (1 << blkidx);
+                    currMB->CBP |= (1 << b8);
+                }
+
+                if (b4&1)
+                {
+                    cur += ((picPitch << 2) - 4);
+                    org4 += ((orgPitch << 2) - 4);
+                }
+                else
+                {
+                    cur += 4;
+                    org4 += 4;
+                }
+            }
+
+            /* move the IDCT part out of dct_luma to accommodate the check
+               for coeff_cost. */
+
+            if ((currMB->CBP&(1 << b8)) && (cost8 <= _LUMA_COEFF_COST_))
+            {
+                cost8 = 0; // reset it
+
+                currMB->CBP ^= (1 << b8);
+                blkidx = blkIdx2blkXY[b8][0];
+
+                currMB->nz_coeff[blkidx] = 0;
+                currMB->nz_coeff[blkidx+1] = 0;
+                currMB->nz_coeff[blkidx+4] = 0;
+                currMB->nz_coeff[blkidx+5] = 0;
+            }
+
+            cost16 += cost8;
+
+            if (b8&1)
+            {
+                cur -= 8;
+                org4 -= 8;
+            }
+            else
+            {
+                cur += (8 - (picPitch << 3));
+                org4 += (8 - (orgPitch << 3));
+            }
+        }
+
+        /* after the whole MB, we do another check for coeff_cost */
+        if ((currMB->CBP&0xF) && (cost16 <= _LUMA_MB_COEFF_COST_))
+        {
+            currMB->CBP = 0;  // reset it to zero
+            memset(currMB->nz_coeff, 0, sizeof(uint8)*16);
+        }
+
+        // now we do IDCT
+        MBInterIdct(video, curL, currMB, picPitch);
+
+//      video->pred_block = video->pred + 256;
+    }
+    else    /* Intra prediction */
+    {
+        encvid->numIntraMB++;
+
+        if (currMB->mbMode == AVC_I16) /* do prediction for the whole macroblock */
+        {
+            currMB->CBP = 0;
+            /* get the prediction from encvid->pred_i16 */
+            dct_luma_16x16(encvid, curL, orgL);
+        }
+        video->pred_block = encvid->pred_ic[currMB->intra_chroma_pred_mode];
+    }
+
+    /* chrominance */
+    /* not need to do anything, the result is in encvid->pred_ic
+    chroma dct must be aware that prediction block can come from either intra or inter. */
+
+    dct_chroma(encvid, curCb, orgCb, 0);
+
+    dct_chroma(encvid, curCr, orgCr, 1);
+
+
+    /* 4.1 if there's nothing in there, video->mb_skip_run++ */
+    /* 4.2 if coded, check if there is a run of skipped MB, encodes it,
+            set video->QPyprev = currMB->QPy; */
+
+    /* 5. vlc encode */
+
+    /* check for skipped macroblock, INTER only */
+    if (!currMB->mb_intra)
+    {
+        /* decide whether this MB (for inter MB) should be skipped if there's nothing left. */
+        if (!currMB->CBP && currMB->NumMbPart == 1 && currMB->QPy == video->QPy)
+        {
+            if (currMB->MBPartPredMode[0][0] == AVC_Pred_L0 && currMB->ref_idx_L0[0] == 0)
+            {
+                MB_A = &video->mblock[video->mbAddrA];
+                MB_B = &video->mblock[video->mbAddrB];
+
+                if (!video->mbAvailA || !video->mbAvailB)
+                {
+                    if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/
+                    {
+                        currMB->mbMode = AVC_SKIP;
+                        video->mvd_l0[0][0][0] = 0;
+                        video->mvd_l0[0][0][1] = 0;
+                    }
+                }
+                else
+                {
+                    if ((MB_A->ref_idx_L0[1] == 0 && MB_A->mvL0[3] == 0) ||
+                            (MB_B->ref_idx_L0[2] == 0 && MB_B->mvL0[12] == 0))
+                    {
+                        if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/
+                        {
+                            currMB->mbMode = AVC_SKIP;
+                            video->mvd_l0[0][0][0] = 0;
+                            video->mvd_l0[0][0][1] = 0;
+                        }
+                    }
+                    else if (video->mvd_l0[0][0][0] == 0 && video->mvd_l0[0][0][1] == 0)
+                    {
+                        currMB->mbMode = AVC_SKIP;
+                    }
+                }
+            }
+
+            if (currMB->mbMode == AVC_SKIP)
+            {
+                video->mb_skip_run++;
+
+                /* set parameters */
+                /* not sure whether we need the followings */
+                if (slice_type == AVC_P_SLICE)
+                {
+                    currMB->mbMode = AVC_SKIP;
+                    currMB->MbPartWidth = currMB->MbPartHeight = 16;
+                    currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
+                    currMB->NumMbPart = 1;
+                    currMB->NumSubMbPart[0] = currMB->NumSubMbPart[1] =
+                                                  currMB->NumSubMbPart[2] = currMB->NumSubMbPart[3] = 1;
+                    currMB->SubMbPartWidth[0] = currMB->SubMbPartWidth[1] =
+                                                    currMB->SubMbPartWidth[2] = currMB->SubMbPartWidth[3] = currMB->MbPartWidth;
+                    currMB->SubMbPartHeight[0] = currMB->SubMbPartHeight[1] =
+                                                     currMB->SubMbPartHeight[2] = currMB->SubMbPartHeight[3] = currMB->MbPartHeight;
+
+                }
+                else if (slice_type == AVC_B_SLICE)
+                {
+                    currMB->mbMode = AVC_SKIP;
+                    currMB->MbPartWidth = currMB->MbPartHeight = 8;
+                    currMB->MBPartPredMode[0][0] = AVC_Direct;
+                    currMB->NumMbPart = -1;
+                }
+
+                /* for skipped MB, always look at the first entry in RefPicList */
+                currMB->RefIdx[0] = currMB->RefIdx[1] =
+                                        currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[0]->RefIdx;
+
+                /* do not return yet, need to do some copies */
+            }
+        }
+    }
+    /* non-skipped MB */
+
+
+    /************* START ENTROPY CODING *************************/
+
+    start_mb_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+    /* encode mb_type, mb_pred, sub_mb_pred, CBP */
+    if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE && currMB->mbMode != AVC_SKIP)
+    {
+        //if(!pps->entropy_coding_mode_flag)  ALWAYS true
+        {
+            ue_v(stream, video->mb_skip_run);
+            video->mb_skip_run = 0;
+        }
+    }
+
+    if (currMB->mbMode != AVC_SKIP)
+    {
+        status = EncodeMBHeader(currMB, encvid);
+        if (status != AVCENC_SUCCESS)
+        {
+            return status;
+        }
+    }
+
+    start_text_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+    /**** now decoding part *******/
+    resType = AVC_Luma;
+
+    /* DC transform for luma I16 mode */
+    if (currMB->mbMode == AVC_I16)
+    {
+        /* vlc encode level/run */
+        status = enc_residual_block(encvid, AVC_Intra16DC, encvid->numcoefdc, currMB);
+        if (status != AVCENC_SUCCESS)
+        {
+            return status;
+        }
+        resType = AVC_Intra16AC;
+    }
+
+    /* VLC encoding for luma */
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        if (currMB->CBP&(1 << b8))
+        {
+            for (b4 = 0; b4 < 4; b4++)
+            {
+                /* vlc encode level/run */
+                status = enc_residual_block(encvid, resType, (b8 << 2) + b4, currMB);
+                if (status != AVCENC_SUCCESS)
+                {
+                    return status;
+                }
+            }
+        }
+    }
+
+    /* chroma */
+    if (currMB->CBP & (3 << 4)) /* chroma DC residual present */
+    {
+        for (b8 = 0; b8 < 2; b8++) /* for iCbCr */
+        {
+            /* vlc encode level/run */
+            status = enc_residual_block(encvid, AVC_ChromaDC, encvid->numcoefcdc[b8] + (b8 << 3), currMB);
+            if (status != AVCENC_SUCCESS)
+            {
+                return status;
+            }
+        }
+    }
+
+    if (currMB->CBP & (2 << 4))
+    {
+        /* AC part */
+        for (b8 = 0; b8 < 2; b8++) /* for iCbCr */
+        {
+            for (b4 = 0; b4 < 4; b4++)  /* for each block inside Cb or Cr */
+            {
+                /* vlc encode level/run */
+                status = enc_residual_block(encvid, AVC_ChromaAC, 16 + (b8 << 2) + b4, currMB);
+                if (status != AVCENC_SUCCESS)
+                {
+                    return status;
+                }
+            }
+        }
+    }
+
+
+    num_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+    RCPostMB(video, rateCtrl, start_text_bits - start_mb_bits,
+             num_bits - start_text_bits);
+
+//  num_bits -= start_mb_bits;
+//  fprintf(fdebug,"MB #%d: %d bits\n",CurrMbAddr,num_bits);
+//  fclose(fdebug);
+    return status;
+}
+
+/* copy the content from predBlock back to the reconstructed YUV frame */
+void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picPitch)
+{
+    int j, offset;
+    uint32 *dst, *dst2, *src;
+
+    dst = (uint32*)curL;
+    src = (uint32*)predBlock;
+
+    offset = (picPitch - 16) >> 2;
+
+    for (j = 0; j < 16; j++)
+    {
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+
+        dst += offset;
+    }
+
+    dst = (uint32*)curCb;
+    dst2 = (uint32*)curCr;
+    offset >>= 1;
+
+    for (j = 0; j < 8; j++)
+    {
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst2++ = *src++;
+        *dst2++ = *src++;
+
+        dst += offset;
+        dst2 += offset;
+    }
+    return ;
+}
+
+/* encode mb_type, mb_pred, sub_mb_pred, CBP */
+/* decide whether this MB (for inter MB) should be skipped */
+AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *encvid)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    uint mb_type;
+    AVCCommonObj *video = encvid->common;
+    AVCEncBitstream *stream = encvid->bitstream;
+
+    if (currMB->CBP > 47)   /* chroma CBP is 11 */
+    {
+        currMB->CBP -= 16;  /* remove the 5th bit from the right */
+    }
+
+    mb_type = InterpretMBType(currMB, video->slice_type);
+
+    status = ue_v(stream, mb_type);
+
+    if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
+    {
+        status = sub_mb_pred(video, currMB, stream);
+    }
+    else
+    {
+        status = mb_pred(video, currMB, stream) ;
+    }
+
+    if (currMB->mbMode != AVC_I16)
+    {
+        /* decode coded_block_pattern */
+        status = EncodeCBP(currMB, stream);
+    }
+
+    /* calculate currMB->mb_qp_delta = currMB->QPy - video->QPyprev */
+    if (currMB->CBP > 0 || currMB->mbMode == AVC_I16)
+    {
+        status = se_v(stream, currMB->QPy - video->QPy);
+        video->QPy = currMB->QPy; /* = (video->QPyprev + currMB->mb_qp_delta + 52)%52; */
+        // no need video->QPc = currMB->QPc;
+    }
+    else
+    {
+        if (currMB->QPy != video->QPy) // current QP is not the same as previous QP
+        {
+            /* restore these values */
+            RCRestoreQP(currMB, video, encvid);
+        }
+    }
+
+    return status;
+}
+
+
+/* inputs are mbMode, mb_intra, i16Mode, CBP, NumMbPart, MbPartWidth, MbPartHeight */
+uint InterpretMBType(AVCMacroblock *currMB, int slice_type)
+{
+    int CBP_chrom;
+    int mb_type;// part1, part2, part3;
+//  const static int MapParts2Type[2][3][3]={{{4,8,12},{10,6,14},{16,18,20}},
+//  {{5,9,13},{11,7,15},{17,19,21}}};
+
+    if (currMB->mb_intra)
+    {
+        if (currMB->mbMode == AVC_I4)
+        {
+            mb_type = 0;
+        }
+        else if (currMB->mbMode == AVC_I16)
+        {
+            CBP_chrom = (currMB->CBP & 0x30);
+            if (currMB->CBP&0xF)
+            {
+                currMB->CBP |= 0xF;  /* either 0x0 or 0xF */
+                mb_type = 13;
+            }
+            else
+            {
+                mb_type = 1;
+            }
+            mb_type += (CBP_chrom >> 2) + currMB->i16Mode;
+        }
+        else /* if(currMB->mbMode == AVC_I_PCM) */
+        {
+            mb_type = 25;
+        }
+    }
+    else
+    {  /* P-MB *//* note that the order of the enum AVCMBMode cannot be changed
+        since we use it here. */
+        mb_type = currMB->mbMode - AVC_P16;
+    }
+
+    if (slice_type == AVC_P_SLICE)
+    {
+        if (currMB->mb_intra)
+        {
+            mb_type += 5;
+        }
+    }
+    // following codes have not been tested yet, not needed.
+    /*  else if(slice_type == AVC_B_SLICE)
+        {
+            if(currMB->mbMode == AVC_BDirect16)
+            {
+                mb_type = 0;
+            }
+            else if(currMB->mbMode == AVC_P16)
+            {
+                mb_type = currMB->MBPartPredMode[0][0] + 1; // 1 or 2
+            }
+            else if(currMB->mbMode == AVC_P8)
+            {
+                mb_type = 26;
+            }
+            else if(currMB->mbMode == AVC_P8ref0)
+            {
+                mb_type = 27;
+            }
+            else
+            {
+                part1 = currMB->mbMode - AVC_P16x8;
+                part2 = currMB->MBPartPredMode[0][0];
+                part3 = currMB->MBPartPredMode[1][0];
+                mb_type = MapParts2Type[part1][part2][part3];
+            }
+        }
+
+        if(slice_type == AVC_SI_SLICE)
+        {
+            mb_type++;
+        }
+    */
+    return (uint)mb_type;
+}
+
+//const static int mbPart2raster[3][4] = {{0,0,0,0},{1,1,0,0},{1,0,1,0}};
+
+/* see subclause 7.3.5.1 */
+AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    int mbPartIdx;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    int max_ref_idx;
+    uint code;
+
+    if (currMB->mbMode == AVC_I4 || currMB->mbMode == AVC_I16)
+    {
+        if (currMB->mbMode == AVC_I4)
+        {
+            /* perform prediction to get the actual intra 4x4 pred mode */
+            EncodeIntra4x4Mode(video, currMB, stream);
+            /* output will be in currMB->i4Mode[4][4] */
+        }
+
+        /* assume already set from MBPrediction() */
+        status = ue_v(stream, currMB->intra_chroma_pred_mode);
+    }
+    else if (currMB->MBPartPredMode[0][0] != AVC_Direct)
+    {
+
+        memset(currMB->ref_idx_L0, 0, sizeof(int16)*4);
+
+        /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+        max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1;
+        /*      if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+                    max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1;
+        */
+        /* decode ref index for L0 */
+        if (sliceHdr->num_ref_idx_l0_active_minus1 > 0)
+        {
+            for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+            {
+                if (/*(sliceHdr->num_ref_idx_l0_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+                    currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+                {
+                    code = currMB->ref_idx_L0[mbPartIdx];
+                    status = te_v(stream, code, max_ref_idx);
+                }
+            }
+        }
+
+        /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+        max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1;
+        /*      if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+                    max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1;
+        */
+        /* decode ref index for L1 */
+        if (sliceHdr->num_ref_idx_l1_active_minus1 > 0)
+        {
+            for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+            {
+                if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+                    currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+                {
+                    status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx);
+                }
+            }
+        }
+
+        /* encode mvd_l0 */
+        for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+        {
+            if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+            {
+                status = se_v(stream, video->mvd_l0[mbPartIdx][0][0]);
+                status = se_v(stream, video->mvd_l0[mbPartIdx][0][1]);
+            }
+        }
+        /* encode mvd_l1 */
+        for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+        {
+            if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+            {
+                status = se_v(stream, video->mvd_l1[mbPartIdx][0][0]);
+                status = se_v(stream, video->mvd_l1[mbPartIdx][0][1]);
+            }
+        }
+    }
+
+    return status;
+}
+
+/* see subclause 7.3.5.2 */
+AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    int mbPartIdx, subMbPartIdx;
+    AVCSliceHeader *sliceHdr = video->sliceHdr;
+    uint max_ref_idx;
+    uint slice_type = video->slice_type;
+    uint sub_mb_type[4];
+
+    /* this should move somewhere else where we don't have to make this check */
+    if (currMB->mbMode == AVC_P8ref0)
+    {
+        memset(currMB->ref_idx_L0, 0, sizeof(int16)*4);
+    }
+
+    /* we have to check the values to make sure they are valid  */
+    /* assign values to currMB->sub_mb_type[] */
+    if (slice_type == AVC_P_SLICE)
+    {
+        InterpretSubMBTypeP(currMB, sub_mb_type);
+    }
+    /* no need to check for B-slice
+        else if(slice_type == AVC_B_SLICE)
+        {
+            InterpretSubMBTypeB(currMB,sub_mb_type);
+        }*/
+
+    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+    {
+        status = ue_v(stream, sub_mb_type[mbPartIdx]);
+    }
+
+    /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+    max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1;
+    /*  if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+            max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1; */
+
+    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+    {
+        if ((sliceHdr->num_ref_idx_l0_active_minus1 > 0 /*|| currMB->mb_field_decoding_flag*/) &&
+                currMB->mbMode != AVC_P8ref0 && /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+                currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+        {
+            status = te_v(stream, currMB->ref_idx_L0[mbPartIdx], max_ref_idx);
+        }
+        /* used in deblocking */
+        currMB->RefIdx[mbPartIdx] = video->RefPicList0[currMB->ref_idx_L0[mbPartIdx]]->RefIdx;
+    }
+    /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+    max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1;
+    /*  if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+            max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1;*/
+
+    if (sliceHdr->num_ref_idx_l1_active_minus1 > 0)
+    {
+        for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+        {
+            if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+                /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+                currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+            {
+                status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx);
+            }
+        }
+    }
+
+    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+    {
+        if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+            currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+        {
+            for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+            {
+                status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][0]);
+                status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][1]);
+            }
+        }
+    }
+
+    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+    {
+        if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+            currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+        {
+            for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+            {
+                status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][0]);
+                status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][1]);
+            }
+        }
+    }
+
+    return status;
+}
+
+/* input is mblock->sub_mb_type[] */
+void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type)
+{
+    int i;
+    /* see enum AVCMBType declaration */
+    /*const static AVCSubMBMode map2subMbMode[4] = {AVC_8x8,AVC_8x4,AVC_4x8,AVC_4x4};
+    const static int map2subPartWidth[4] = {8,8,4,4};
+    const static int map2subPartHeight[4] = {8,4,8,4};
+    const static int map2numSubPart[4] = {1,2,2,4};*/
+
+    for (i = 0; i < 4 ; i++)
+    {
+        sub_mb_type[i] = mblock->subMbMode[i] - AVC_8x8;
+    }
+
+    return ;
+}
+
+void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type)
+{
+    int i;
+    /* see enum AVCMBType declaration */
+    /*  const static AVCSubMBMode map2subMbMode[13] = {AVC_BDirect8,AVC_8x8,AVC_8x8,
+            AVC_8x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_4x4,AVC_4x4,AVC_4x4};
+        const static int map2subPartWidth[13] = {4,8,8,8,8,4,8,4,8,4,4,4,4};
+        const static int map2subPartHeight[13] = {4,8,8,8,4,8,4,8,4,8,4,4,4};
+        const static int map2numSubPart[13] = {4,1,1,1,2,2,2,2,2,2,4,4,4};
+        const static int map2predMode[13] = {3,0,1,2,0,0,1,1,2,2,0,1,2};*/
+
+    for (i = 0; i < 4 ; i++)
+    {
+        if (mblock->subMbMode[i] == AVC_BDirect8)
+        {
+            sub_mb_type[i] = 0;
+        }
+        else if (mblock->subMbMode[i] == AVC_8x8)
+        {
+            sub_mb_type[i] = 1 + mblock->MBPartPredMode[i][0];
+        }
+        else if (mblock->subMbMode[i] == AVC_4x4)
+        {
+            sub_mb_type[i] = 10 + mblock->MBPartPredMode[i][0];
+        }
+        else
+        {
+            sub_mb_type[i] = 4 + (mblock->MBPartPredMode[i][0] << 1) + (mblock->subMbMode[i] - AVC_8x4);
+        }
+    }
+
+    return ;
+}
+
+/* see subclause 8.3.1 */
+AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+    int intra4x4PredModeA = 0;
+    int intra4x4PredModeB, predIntra4x4PredMode;
+    int component, SubBlock_indx, block_x, block_y;
+    int dcOnlyPredictionFlag;
+    uint    flag;
+    int     rem = 0;
+    int     mode;
+    int bindx = 0;
+
+    for (component = 0; component < 4; component++) /* partition index */
+    {
+        block_x = ((component & 1) << 1);
+        block_y = ((component >> 1) << 1);
+
+        for (SubBlock_indx = 0; SubBlock_indx < 4; SubBlock_indx++) /* sub-partition index */
+        {
+            dcOnlyPredictionFlag = 0;
+            if (block_x > 0)
+            {
+                intra4x4PredModeA = currMB->i4Mode[(block_y << 2) + block_x - 1 ];
+            }
+            else
+            {
+                if (video->intraAvailA)
+                {
+                    if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
+                    {
+                        intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[(block_y << 2) + 3];
+                    }
+                    else
+                    {
+                        intra4x4PredModeA = AVC_I4_DC;
+                    }
+                }
+                else
+                {
+                    dcOnlyPredictionFlag = 1;
+                }
+            }
+
+            if (block_y > 0)
+            {
+                intra4x4PredModeB = currMB->i4Mode[((block_y-1) << 2) + block_x];
+            }
+            else
+            {
+                if (video->intraAvailB)
+                {
+                    if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
+                    {
+                        intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[(3 << 2) + block_x];
+                    }
+                    else
+                    {
+                        intra4x4PredModeB = AVC_I4_DC;
+                    }
+                }
+                else
+                {
+                    dcOnlyPredictionFlag = 1;
+                }
+            }
+
+            if (dcOnlyPredictionFlag)
+            {
+                intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
+            }
+
+            predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
+
+            flag = 0;
+            mode = currMB->i4Mode[(block_y<<2)+block_x];
+
+            if (mode == (AVCIntra4x4PredMode)predIntra4x4PredMode)
+            {
+                flag = 1;
+            }
+            else if (mode < predIntra4x4PredMode)
+            {
+                rem = mode;
+            }
+            else
+            {
+                rem = mode - 1;
+            }
+
+            BitstreamWrite1Bit(stream, flag);
+
+            if (!flag)
+            {
+                BitstreamWriteBits(stream, 3, rem);
+            }
+
+            bindx++;
+            block_y += (SubBlock_indx & 1) ;
+            block_x += (1 - 2 * (SubBlock_indx & 1)) ;
+        }
+    }
+
+    return AVCENC_SUCCESS;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp
new file mode 100644
index 0000000..222e709
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp
@@ -0,0 +1,336 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+/**
+See algorithm in subclause 9.1, Table 9-1, Table 9-2. */
+AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum)
+{
+    if (AVCENC_SUCCESS != SetEGBitstring(bitstream, codeNum))
+        return AVCENC_FAIL;
+
+    return AVCENC_SUCCESS;
+}
+
+/**
+See subclause 9.1.1, Table 9-3 */
+AVCEnc_Status  se_v(AVCEncBitstream *bitstream, int value)
+{
+    uint codeNum;
+    AVCEnc_Status status;
+
+    if (value <= 0)
+    {
+        codeNum = -value * 2;
+    }
+    else
+    {
+        codeNum = value * 2 - 1;
+    }
+
+    status = ue_v(bitstream, codeNum);
+
+    return status;
+}
+
+AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range)
+{
+    AVCEnc_Status status;
+
+    if (range > 1)
+    {
+        return ue_v(bitstream, value);
+    }
+    else
+    {
+        status = BitstreamWrite1Bit(bitstream, 1 - value);
+        return status;
+    }
+}
+
+/**
+See subclause 9.1, Table 9-1, 9-2. */
+// compute leadingZeros and inforbits
+//codeNum = (1<<leadingZeros)-1+infobits;
+AVCEnc_Status SetEGBitstring(AVCEncBitstream *bitstream, uint codeNum)
+{
+    AVCEnc_Status status;
+    int leadingZeros;
+    int infobits;
+
+    if (!codeNum)
+    {
+        status = BitstreamWrite1Bit(bitstream, 1);
+        return status;
+    }
+
+    /* calculate leadingZeros and infobits */
+    leadingZeros = 1;
+    while ((uint)(1 << leadingZeros) < codeNum + 2)
+    {
+        leadingZeros++;
+    }
+    leadingZeros--;
+    infobits = codeNum - (1 << leadingZeros) + 1;
+
+    status = BitstreamWriteBits(bitstream, leadingZeros, 0);
+    infobits |= (1 << leadingZeros);
+    status = BitstreamWriteBits(bitstream, leadingZeros + 1, infobits);
+    return status;
+}
+
+/* see Table 9-4 assignment of codeNum to values of coded_block_pattern. */
+const static uint8 MapCBP2code[48][2] =
+{
+    {3, 0}, {29, 2}, {30, 3}, {17, 7}, {31, 4}, {18, 8}, {37, 17}, {8, 13}, {32, 5}, {38, 18}, {19, 9}, {9, 14},
+    {20, 10}, {10, 15}, {11, 16}, {2, 11}, {16, 1}, {33, 32}, {34, 33}, {21, 36}, {35, 34}, {22, 37}, {39, 44}, {4, 40},
+    {36, 35}, {40, 45}, {23, 38}, {5, 41}, {24, 39}, {6, 42}, {7, 43}, {1, 19}, {41, 6}, {42, 24}, {43, 25}, {25, 20},
+    {44, 26}, {26, 21}, {46, 46}, {12, 28}, {45, 27}, {47, 47}, {27, 22}, {13, 29}, {28, 23}, {14, 30}, {15, 31}, {0, 12}
+};
+
+AVCEnc_Status EncodeCBP(AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+    AVCEnc_Status status;
+    uint codeNum;
+
+    if (currMB->mbMode == AVC_I4)
+    {
+        codeNum = MapCBP2code[currMB->CBP][0];
+    }
+    else
+    {
+        codeNum = MapCBP2code[currMB->CBP][1];
+    }
+
+    status = ue_v(stream, codeNum);
+
+    return status;
+}
+
+AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC)
+{
+    const static uint8 totCoeffTrailOne[3][4][17][2] =
+    {
+        {   // 0702
+            {{1, 1}, {6, 5}, {8, 7}, {9, 7}, {10, 7}, {11, 7}, {13, 15}, {13, 11}, {13, 8}, {14, 15}, {14, 11}, {15, 15}, {15, 11}, {16, 15}, {16, 11}, {16, 7}, {16, 4}},
+            {{0, 0}, {2, 1}, {6, 4}, {8, 6}, {9, 6}, {10, 6}, {11, 6}, {13, 14}, {13, 10}, {14, 14}, {14, 10}, {15, 14}, {15, 10}, {15, 1}, {16, 14}, {16, 10}, {16, 6}},
+            {{0, 0}, {0, 0}, {3, 1}, {7, 5}, {8, 5}, {9, 5}, {10, 5}, {11, 5}, {13, 13}, {13, 9}, {14, 13}, {14, 9}, {15, 13}, {15, 9}, {16, 13}, {16, 9}, {16, 5}},
+            {{0, 0}, {0, 0}, {0, 0}, {5, 3}, {6, 3}, {7, 4}, {8, 4}, {9, 4}, {10, 4}, {11, 4}, {13, 12}, {14, 12}, {14, 8}, {15, 12}, {15, 8}, {16, 12}, {16, 8}},
+        },
+        {
+            {{2, 3}, {6, 11}, {6, 7}, {7, 7}, {8, 7}, {8, 4}, {9, 7}, {11, 15}, {11, 11}, {12, 15}, {12, 11}, {12, 8}, {13, 15}, {13, 11}, {13, 7}, {14, 9}, {14, 7}},
+            {{0, 0}, {2, 2}, {5, 7}, {6, 10}, {6, 6}, {7, 6}, {8, 6}, {9, 6}, {11, 14}, {11, 10}, {12, 14}, {12, 10}, {13, 14}, {13, 10}, {14, 11}, {14, 8}, {14, 6}},
+            {{0, 0}, {0, 0}, {3, 3}, {6, 9}, {6, 5}, {7, 5}, {8, 5}, {9, 5}, {11, 13}, {11, 9}, {12, 13}, {12, 9}, {13, 13}, {13, 9}, {13, 6}, {14, 10}, {14, 5}},
+            {{0, 0}, {0, 0}, {0, 0}, {4, 5}, {4, 4}, {5, 6}, {6, 8}, {6, 4}, {7, 4}, {9, 4}, {11, 12}, {11, 8}, {12, 12}, {13, 12}, {13, 8}, {13, 1}, {14, 4}},
+        },
+        {
+            {{4, 15}, {6, 15}, {6, 11}, {6, 8}, {7, 15}, {7, 11}, {7, 9}, {7, 8}, {8, 15}, {8, 11}, {9, 15}, {9, 11}, {9, 8}, {10, 13}, {10, 9}, {10, 5}, {10, 1}},
+            {{0, 0}, {4, 14}, {5, 15}, {5, 12}, {5, 10}, {5, 8}, {6, 14}, {6, 10}, {7, 14}, {8, 14}, {8, 10}, {9, 14}, {9, 10}, {9, 7}, {10, 12}, {10, 8}, {10, 4}},
+            {{0, 0}, {0, 0}, {4, 13}, {5, 14}, {5, 11}, {5, 9}, {6, 13}, {6, 9}, {7, 13}, {7, 10}, {8, 13}, {8, 9}, {9, 13}, {9, 9}, {10, 11}, {10, 7}, {10, 3}},
+            {{0, 0}, {0, 0}, {0, 0}, {4, 12}, {4, 11}, {4, 10}, {4, 9}, {4, 8}, {5, 13}, {6, 12}, {7, 12}, {8, 12}, {8, 8}, {9, 12}, {10, 10}, {10, 6}, {10, 2}}
+        }
+    };
+
+
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    uint code, len;
+    int vlcnum;
+
+    if (TrailingOnes > 3)
+    {
+        return AVCENC_TRAILINGONES_FAIL;
+    }
+
+    if (nC >= 8)
+    {
+        if (TotalCoeff)
+        {
+            code = ((TotalCoeff - 1) << 2) | (TrailingOnes);
+        }
+        else
+        {
+            code = 3;
+        }
+        status = BitstreamWriteBits(stream, 6, code);
+    }
+    else
+    {
+        if (nC < 2)
+        {
+            vlcnum = 0;
+        }
+        else if (nC < 4)
+        {
+            vlcnum = 1;
+        }
+        else
+        {
+            vlcnum = 2;
+        }
+
+        len = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][0];
+        code = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][1];
+        status = BitstreamWriteBits(stream, len, code);
+    }
+
+    return status;
+}
+
+AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff)
+{
+    const static uint8 totCoeffTrailOneChrom[4][5][2] =
+    {
+        { {2, 1}, {6, 7}, {6, 4}, {6, 3}, {6, 2}},
+        { {0, 0}, {1, 1}, {6, 6}, {7, 3}, {8, 3}},
+        { {0, 0}, {0, 0}, {3, 1}, {7, 2}, {8, 2}},
+        { {0, 0}, {0, 0}, {0, 0}, {6, 5}, {7, 0}},
+    };
+
+    AVCEnc_Status status = AVCENC_SUCCESS;
+    uint code, len;
+
+    len = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][0];
+    code = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][1];
+    status = BitstreamWriteBits(stream, len, code);
+
+    return status;
+}
+
+/* see Table 9-7 and 9-8 */
+AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff)
+{
+    const static uint8 lenTotalZeros[15][16] =
+    {
+        { 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9},
+        { 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6},
+        { 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6},
+        { 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5},
+        { 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5},
+        { 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6},
+        { 6, 5, 3, 3, 3, 2, 3, 4, 3, 6},
+        { 6, 4, 5, 3, 2, 2, 3, 3, 6},
+        { 6, 6, 4, 2, 2, 3, 2, 5},
+        { 5, 5, 3, 2, 2, 2, 4},
+        { 4, 4, 3, 3, 1, 3},
+        { 4, 4, 2, 1, 3},
+        { 3, 3, 1, 2},
+        { 2, 2, 1},
+        { 1, 1},
+    };
+
+    const static uint8 codTotalZeros[15][16] =
+    {
+        {1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1},
+        {7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0},
+        {5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0},
+        {3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0},
+        {5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0},
+        {1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0},
+        {1, 1, 5, 4, 3, 3, 2, 1, 1, 0},
+        {1, 1, 1, 3, 3, 2, 2, 1, 0},
+        {1, 0, 1, 3, 2, 1, 1, 1, },
+        {1, 0, 1, 3, 2, 1, 1, },
+        {0, 1, 1, 2, 1, 3},
+        {0, 1, 1, 1, 1},
+        {0, 1, 1, 1},
+        {0, 1, 1},
+        {0, 1},
+    };
+    int len, code;
+    AVCEnc_Status status;
+
+    len = lenTotalZeros[TotalCoeff-1][total_zeros];
+    code = codTotalZeros[TotalCoeff-1][total_zeros];
+
+    status = BitstreamWriteBits(stream, len, code);
+
+    return status;
+}
+
+/* see Table 9-9 */
+AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff)
+{
+    const static uint8 lenTotalZerosChromaDC[3][4] =
+    {
+        { 1, 2, 3, 3, },
+        { 1, 2, 2, 0, },
+        { 1, 1, 0, 0, },
+    };
+
+    const static uint8 codTotalZerosChromaDC[3][4] =
+    {
+        { 1, 1, 1, 0, },
+        { 1, 1, 0, 0, },
+        { 1, 0, 0, 0, },
+    };
+
+    int len, code;
+    AVCEnc_Status status;
+
+    len = lenTotalZerosChromaDC[TotalCoeff-1][total_zeros];
+    code = codTotalZerosChromaDC[TotalCoeff-1][total_zeros];
+
+    status = BitstreamWriteBits(stream, len, code);
+
+    return status;
+}
+
+/* see Table 9-10 */
+AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft)
+{
+    const static uint8 lenRunBefore[7][16] =
+    {
+        {1, 1},
+        {1, 2, 2},
+        {2, 2, 2, 2},
+        {2, 2, 2, 3, 3},
+        {2, 2, 3, 3, 3, 3},
+        {2, 3, 3, 3, 3, 3, 3},
+        {3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+    };
+
+    const static uint8 codRunBefore[7][16] =
+    {
+        {1, 0},
+        {1, 1, 0},
+        {3, 2, 1, 0},
+        {3, 2, 1, 1, 0},
+        {3, 2, 3, 2, 1, 0},
+        {3, 0, 1, 3, 2, 5, 4},
+        {7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+    };
+
+    int len, code;
+    AVCEnc_Status status;
+
+    if (zerosLeft <= 6)
+    {
+        len = lenRunBefore[zerosLeft-1][run_before];
+        code = codRunBefore[zerosLeft-1][run_before];
+    }
+    else
+    {
+        len = lenRunBefore[6][run_before];
+        code = codRunBefore[6][run_before];
+    }
+
+    status = BitstreamWriteBits(stream, len, code);
+
+
+    return status;
+}
-- 
cgit v1.1