summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Dong <jdong@google.com>2010-07-02 17:44:44 -0700
committerJames Dong <jdong@google.com>2010-07-13 10:58:20 -0700
commit29a84457aed4c45bc900998b5e11c03023264208 (patch)
treede114a11a9ec702c45a4a1683fa8c0024f49df8d
parenta480a6f7f204fa1712e1484a6ba9e6c8772fa110 (diff)
downloadframeworks_av-29a84457aed4c45bc900998b5e11c03023264208.zip
frameworks_av-29a84457aed4c45bc900998b5e11c03023264208.tar.gz
frameworks_av-29a84457aed4c45bc900998b5e11c03023264208.tar.bz2
Initial checkin for software AVC encoder
- Since the software encoder assumes the input is YUV420 planar, color conversion needs to be added when the input color format does not meet the requirement. With this patch, I only added a single color conversion from YUV420 semi planar to YUV420 planar. We can add more as we go. Change-Id: If8640c9e5a4f73d385ae9bb2022e57f7f62b91b9
-rw-r--r--media/libmediaplayerservice/StagefrightRecorder.cpp4
-rw-r--r--media/libstagefright/Android.mk1
-rw-r--r--media/libstagefright/OMXCodec.cpp4
-rw-r--r--media/libstagefright/codecs/avc/enc/AVCEncoder.cpp492
-rw-r--r--media/libstagefright/codecs/avc/enc/Android.mk34
-rw-r--r--media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp744
-rw-r--r--media/libstagefright/codecs/avc/enc/src/avcenc_api.h320
-rw-r--r--media/libstagefright/codecs/avc/enc/src/avcenc_int.h471
-rw-r--r--media/libstagefright/codecs/avc/enc/src/avcenc_lib.h1020
-rw-r--r--media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp336
-rw-r--r--media/libstagefright/codecs/avc/enc/src/block.cpp1283
-rw-r--r--media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp622
-rw-r--r--media/libstagefright/codecs/avc/enc/src/header.cpp917
-rw-r--r--media/libstagefright/codecs/avc/enc/src/init.cpp899
-rw-r--r--media/libstagefright/codecs/avc/enc/src/intra_est.cpp2199
-rw-r--r--media/libstagefright/codecs/avc/enc/src/motion_comp.cpp2156
-rw-r--r--media/libstagefright/codecs/avc/enc/src/motion_est.cpp1774
-rw-r--r--media/libstagefright/codecs/avc/enc/src/rate_control.cpp981
-rw-r--r--media/libstagefright/codecs/avc/enc/src/residual.cpp389
-rw-r--r--media/libstagefright/codecs/avc/enc/src/sad.cpp290
-rw-r--r--media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp629
-rw-r--r--media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h96
-rw-r--r--media/libstagefright/codecs/avc/enc/src/sad_inline.h488
-rw-r--r--media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h311
-rw-r--r--media/libstagefright/codecs/avc/enc/src/slice.cpp1025
-rw-r--r--media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp336
-rw-r--r--media/libstagefright/include/AVCEncoder.h90
27 files changed, 17910 insertions, 1 deletions
diff --git a/media/libmediaplayerservice/StagefrightRecorder.cpp b/media/libmediaplayerservice/StagefrightRecorder.cpp
index 72061ad..cba5084 100644
--- a/media/libmediaplayerservice/StagefrightRecorder.cpp
+++ b/media/libmediaplayerservice/StagefrightRecorder.cpp
@@ -868,17 +868,19 @@ status_t StagefrightRecorder::setupVideoEncoder(const sp<MediaWriter>& writer) {
sp<MetaData> meta = cameraSource->getFormat();
- int32_t width, height, stride, sliceHeight;
+ int32_t width, height, stride, sliceHeight, colorFormat;
CHECK(meta->findInt32(kKeyWidth, &width));
CHECK(meta->findInt32(kKeyHeight, &height));
CHECK(meta->findInt32(kKeyStride, &stride));
CHECK(meta->findInt32(kKeySliceHeight, &sliceHeight));
+ CHECK(meta->findInt32(kKeyColorFormat, &colorFormat));
enc_meta->setInt32(kKeyWidth, width);
enc_meta->setInt32(kKeyHeight, height);
enc_meta->setInt32(kKeyIFramesInterval, mIFramesInterval);
enc_meta->setInt32(kKeyStride, stride);
enc_meta->setInt32(kKeySliceHeight, sliceHeight);
+ enc_meta->setInt32(kKeyColorFormat, colorFormat);
if (mVideoEncoderProfile != -1) {
enc_meta->setInt32(kKeyVideoProfile, mVideoEncoderProfile);
}
diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk
index 60d0233..49cf647 100644
--- a/media/libstagefright/Android.mk
+++ b/media/libstagefright/Android.mk
@@ -66,6 +66,7 @@ LOCAL_STATIC_LIBRARIES := \
libstagefright_amrwbdec \
libstagefright_amrwbenc \
libstagefright_avcdec \
+ libstagefright_avcenc \
libstagefright_m4vh263dec \
libstagefright_mp3dec \
libstagefright_vorbisdec \
diff --git a/media/libstagefright/OMXCodec.cpp b/media/libstagefright/OMXCodec.cpp
index efaab5b..077e123 100644
--- a/media/libstagefright/OMXCodec.cpp
+++ b/media/libstagefright/OMXCodec.cpp
@@ -25,6 +25,7 @@
#include "include/AMRWBDecoder.h"
#include "include/AMRWBEncoder.h"
#include "include/AVCDecoder.h"
+#include "include/AVCEncoder.h"
#include "include/M4vH263Decoder.h"
#include "include/MP3Decoder.h"
#include "include/VorbisDecoder.h"
@@ -81,6 +82,7 @@ FACTORY_CREATE(VPXDecoder)
FACTORY_CREATE_ENCODER(AMRNBEncoder)
FACTORY_CREATE_ENCODER(AMRWBEncoder)
FACTORY_CREATE_ENCODER(AACEncoder)
+FACTORY_CREATE_ENCODER(AVCEncoder)
static sp<MediaSource> InstantiateSoftwareEncoder(
const char *name, const sp<MediaSource> &source,
@@ -94,6 +96,7 @@ static sp<MediaSource> InstantiateSoftwareEncoder(
FACTORY_REF(AMRNBEncoder)
FACTORY_REF(AMRWBEncoder)
FACTORY_REF(AACEncoder)
+ FACTORY_REF(AVCEncoder)
};
for (size_t i = 0;
i < sizeof(kFactoryInfo) / sizeof(kFactoryInfo[0]); ++i) {
@@ -186,6 +189,7 @@ static const CodecInfo kEncoderInfo[] = {
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.qcom.7x30.video.encoder.avc" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.qcom.video.encoder.avc" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.TI.Video.encoder" },
+ { MEDIA_MIMETYPE_VIDEO_AVC, "AVCEncoder" },
// { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.PV.avcenc" },
};
diff --git a/media/libstagefright/codecs/avc/enc/AVCEncoder.cpp b/media/libstagefright/codecs/avc/enc/AVCEncoder.cpp
new file mode 100644
index 0000000..d5eb156
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/AVCEncoder.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "AVCEncoder"
+#include <utils/Log.h>
+
+#include "AVCEncoder.h"
+
+#include "avcenc_api.h"
+#include "avcenc_int.h"
+#include "OMX_Video.h"
+
+#include <media/stagefright/MediaBufferGroup.h>
+#include <media/stagefright/MediaDebug.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaErrors.h>
+#include <media/stagefright/MetaData.h>
+#include <media/stagefright/Utils.h>
+
+namespace android {
+
+inline static void ConvertYUV420SemiPlanarToYUV420Planar(
+ uint8_t *inyuv, uint8_t* outyuv,
+ int32_t width, int32_t height) {
+
+ int32_t outYsize = width * height;
+ uint32_t *outy = (uint32_t *) outyuv;
+ uint16_t *outcb = (uint16_t *) (outyuv + outYsize);
+ uint16_t *outcr = (uint16_t *) (outyuv + outYsize + (outYsize >> 2));
+
+ /* Y copying */
+ memcpy(outy, inyuv, outYsize);
+
+ /* U & V copying */
+ uint32_t *inyuv_4 = (uint32_t *) (inyuv + outYsize);
+ for (int32_t i = height >> 1; i > 0; --i) {
+ for (int32_t j = width >> 2; j > 0; --j) {
+ uint32_t temp = *inyuv_4++;
+ uint32_t tempU = temp & 0xFF;
+ tempU = tempU | ((temp >> 8) & 0xFF00);
+
+ uint32_t tempV = (temp >> 8) & 0xFF;
+ tempV = tempV | ((temp >> 16) & 0xFF00);
+
+ // Flip U and V
+ *outcb++ = tempV;
+ *outcr++ = tempU;
+ }
+ }
+}
+
+static int32_t MallocWrapper(
+ void *userData, int32_t size, int32_t attrs) {
+ return reinterpret_cast<int32_t>(malloc(size));
+}
+
+static void FreeWrapper(void *userData, int32_t ptr) {
+ free(reinterpret_cast<void *>(ptr));
+}
+
+static int32_t DpbAllocWrapper(void *userData,
+ unsigned int sizeInMbs, unsigned int numBuffers) {
+ AVCEncoder *encoder = static_cast<AVCEncoder *>(userData);
+ CHECK(encoder != NULL);
+ return encoder->allocOutputBuffers(sizeInMbs, numBuffers);
+}
+
+static int32_t BindFrameWrapper(
+ void *userData, int32_t index, uint8_t **yuv) {
+ AVCEncoder *encoder = static_cast<AVCEncoder *>(userData);
+ CHECK(encoder != NULL);
+ return encoder->bindOutputBuffer(index, yuv);
+}
+
+static void UnbindFrameWrapper(void *userData, int32_t index) {
+ AVCEncoder *encoder = static_cast<AVCEncoder *>(userData);
+ CHECK(encoder != NULL);
+ return encoder->unbindOutputBuffer(index);
+}
+
+AVCEncoder::AVCEncoder(
+ const sp<MediaSource>& source,
+ const sp<MetaData>& meta)
+ : mSource(source),
+ mMeta(meta),
+ mNumInputFrames(-1),
+ mStarted(false),
+ mInputBuffer(NULL),
+ mInputFrameData(NULL),
+ mGroup(NULL) {
+
+ LOGV("Construct software AVCEncoder");
+
+ mHandle = new tagAVCHandle;
+ memset(mHandle, 0, sizeof(tagAVCHandle));
+ mHandle->AVCObject = NULL;
+ mHandle->userData = this;
+ mHandle->CBAVC_DPBAlloc = DpbAllocWrapper;
+ mHandle->CBAVC_FrameBind = BindFrameWrapper;
+ mHandle->CBAVC_FrameUnbind = UnbindFrameWrapper;
+ mHandle->CBAVC_Malloc = MallocWrapper;
+ mHandle->CBAVC_Free = FreeWrapper;
+
+ mInitCheck = initCheck(meta);
+}
+
+AVCEncoder::~AVCEncoder() {
+ LOGV("Destruct software AVCEncoder");
+ if (mStarted) {
+ stop();
+ }
+
+ delete mEncParams;
+ delete mHandle;
+}
+
+status_t AVCEncoder::initCheck(const sp<MetaData>& meta) {
+ LOGV("initCheck");
+ CHECK(meta->findInt32(kKeyWidth, &mVideoWidth));
+ CHECK(meta->findInt32(kKeyHeight, &mVideoHeight));
+ CHECK(meta->findInt32(kKeySampleRate, &mVideoFrameRate));
+ CHECK(meta->findInt32(kKeyBitRate, &mVideoBitRate));
+
+ // XXX: Add more color format support
+ CHECK(meta->findInt32(kKeyColorFormat, &mVideoColorFormat));
+ if (mVideoColorFormat != OMX_COLOR_FormatYUV420Planar) {
+ if (mVideoColorFormat != OMX_COLOR_FormatYUV420SemiPlanar) {
+ LOGE("Color format %d is not supported", mVideoColorFormat);
+ return BAD_VALUE;
+ }
+ // Allocate spare buffer only when color conversion is needed.
+ // Assume the color format is OMX_COLOR_FormatYUV420SemiPlanar.
+ mInputFrameData =
+ (uint8_t *) malloc((mVideoWidth * mVideoHeight * 3 ) >> 1);
+ CHECK(mInputFrameData);
+ }
+
+ // XXX: Remove this restriction
+ if (mVideoWidth % 16 != 0 || mVideoHeight % 16 != 0) {
+ LOGE("Video frame size %dx%d must be a multiple of 16",
+ mVideoWidth, mVideoHeight);
+ return BAD_VALUE;
+ }
+
+ mEncParams = new tagAVCEncParam;
+ memset(mEncParams, 0, sizeof(mEncParams));
+ mEncParams->width = mVideoWidth;
+ mEncParams->height = mVideoHeight;
+ mEncParams->frame_rate = 1000 * mVideoFrameRate; // In frames/ms!
+ mEncParams->rate_control = AVC_ON;
+ mEncParams->bitrate = mVideoBitRate;
+ mEncParams->initQP = 0;
+ mEncParams->init_CBP_removal_delay = 1600;
+ mEncParams->CPB_size = (uint32_t) (mVideoBitRate >> 1);
+
+ mEncParams->intramb_refresh = 0;
+ mEncParams->auto_scd = AVC_ON;
+ mEncParams->out_of_band_param_set = AVC_ON;
+ mEncParams->poc_type = 2;
+ mEncParams->log2_max_poc_lsb_minus_4 = 12;
+ mEncParams->delta_poc_zero_flag = 0;
+ mEncParams->offset_poc_non_ref = 0;
+ mEncParams->offset_top_bottom = 0;
+ mEncParams->num_ref_in_cycle = 0;
+ mEncParams->offset_poc_ref = NULL;
+
+ mEncParams->num_ref_frame = 1;
+ mEncParams->num_slice_group = 1;
+ mEncParams->fmo_type = 0;
+
+ mEncParams->db_filter = AVC_ON;
+ mEncParams->disable_db_idc = 0;
+
+ mEncParams->alpha_offset = 0;
+ mEncParams->beta_offset = 0;
+ mEncParams->constrained_intra_pred = AVC_OFF;
+
+ mEncParams->data_par = AVC_OFF;
+ mEncParams->fullsearch = AVC_OFF;
+ mEncParams->search_range = 16;
+ mEncParams->sub_pel = AVC_OFF;
+ mEncParams->submb_pred = AVC_OFF;
+ mEncParams->rdopt_mode = AVC_OFF;
+ mEncParams->bidir_pred = AVC_OFF;
+ int32_t nMacroBlocks = ((((mVideoWidth + 15) >> 4) << 4) *
+ (((mVideoHeight + 15) >> 4) << 4)) >> 8;
+ uint32_t *sliceGroup = (uint32_t *) malloc(sizeof(uint32_t) * nMacroBlocks);
+ for (int ii = 0, idx = 0; ii < nMacroBlocks; ++ii) {
+ sliceGroup[ii] = idx++;
+ if (idx >= mEncParams->num_slice_group) {
+ idx = 0;
+ }
+ }
+ mEncParams->slice_group = sliceGroup;
+
+ mEncParams->use_overrun_buffer = AVC_OFF;
+
+ // Set IDR frame refresh interval
+ int32_t iFramesIntervalSec;
+ CHECK(meta->findInt32(kKeyIFramesInterval, &iFramesIntervalSec));
+ if (iFramesIntervalSec < 0) {
+ mEncParams->idr_period = -1;
+ } else if (iFramesIntervalSec == 0) {
+ mEncParams->idr_period = 1; // All I frames
+ } else {
+ mEncParams->idr_period =
+ (iFramesIntervalSec * mVideoFrameRate);
+ }
+ LOGV("idr_period: %d, I-frames interval: %d seconds, and frame rate: %d",
+ mEncParams->idr_period, iFramesIntervalSec, mVideoFrameRate);
+
+ // Set profile and level
+ // If profile and level setting is not correct, failure
+ // is reported when the encoder is initialized.
+ mEncParams->profile = AVC_BASELINE;
+ mEncParams->level = AVC_LEVEL3_2;
+ int32_t profile, level;
+ if (meta->findInt32(kKeyVideoProfile, &profile)) {
+ mEncParams->profile = (AVCProfile) profile;
+ }
+ if (meta->findInt32(kKeyVideoLevel, &level)) {
+ mEncParams->level = (AVCLevel) level;
+ }
+
+
+ mFormat = new MetaData;
+ mFormat->setInt32(kKeyWidth, mVideoWidth);
+ mFormat->setInt32(kKeyHeight, mVideoHeight);
+ mFormat->setInt32(kKeyBitRate, mVideoBitRate);
+ mFormat->setInt32(kKeySampleRate, mVideoFrameRate);
+ mFormat->setInt32(kKeyColorFormat, mVideoColorFormat);
+ mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
+ mFormat->setCString(kKeyDecoderComponent, "AVCEncoder");
+ return OK;
+}
+
+status_t AVCEncoder::start(MetaData *params) {
+ LOGV("start");
+ if (mInitCheck != OK) {
+ return mInitCheck;
+ }
+
+ if (mStarted) {
+ LOGW("Call start() when encoder already started");
+ return OK;
+ }
+
+ AVCEnc_Status err;
+ err = PVAVCEncInitialize(mHandle, mEncParams, NULL, NULL);
+ if (err != AVCENC_SUCCESS) {
+ LOGE("Failed to initialize the encoder: %d", err);
+ return UNKNOWN_ERROR;
+ }
+
+ mGroup = new MediaBufferGroup();
+ int32_t maxSize;
+ if (AVCENC_SUCCESS !=
+ PVAVCEncGetMaxOutputBufferSize(mHandle, &maxSize)) {
+ maxSize = 31584; // Magic #
+ }
+ mGroup->add_buffer(new MediaBuffer(maxSize));
+
+ mSource->start(params);
+ mNumInputFrames = -2; // 1st two buffers contain SPS and PPS
+ mStarted = true;
+ mSpsPpsHeaderReceived = false;
+ mReadyForNextFrame = true;
+ mIsIDRFrame = 0;
+
+ return OK;
+}
+
+status_t AVCEncoder::stop() {
+ LOGV("stop");
+ if (!mStarted) {
+ LOGW("Call stop() when encoder has not started");
+ return OK;
+ }
+
+ if (mInputBuffer) {
+ mInputBuffer->release();
+ mInputBuffer = NULL;
+ }
+
+ if (mGroup) {
+ delete mGroup;
+ mGroup = NULL;
+ }
+
+ if (mInputFrameData) {
+ delete mInputFrameData;
+ mInputFrameData = NULL;
+ }
+
+ PVAVCCleanUpEncoder(mHandle);
+ mSource->stop();
+ releaseOutputBuffers();
+ mStarted = false;
+
+ return OK;
+}
+
+void AVCEncoder::releaseOutputBuffers() {
+ LOGV("releaseOutputBuffers");
+ for (size_t i = 0; i < mOutputBuffers.size(); ++i) {
+ MediaBuffer *buffer = mOutputBuffers.editItemAt(i);
+ buffer->setObserver(NULL);
+ buffer->release();
+ }
+ mOutputBuffers.clear();
+}
+
+sp<MetaData> AVCEncoder::getFormat() {
+ LOGV("getFormat");
+ return mFormat;
+}
+
+status_t AVCEncoder::read(
+ MediaBuffer **out, const ReadOptions *options) {
+
+ CHECK(!options);
+ *out = NULL;
+
+ MediaBuffer *outputBuffer;
+ CHECK_EQ(OK, mGroup->acquire_buffer(&outputBuffer));
+ uint8_t *outPtr = (uint8_t *) outputBuffer->data();
+ uint32_t dataLength = outputBuffer->size();
+
+ int32_t type;
+ AVCEnc_Status encoderStatus = AVCENC_SUCCESS;
+
+ // Return SPS and PPS for the first two buffers
+ if (!mSpsPpsHeaderReceived) {
+ encoderStatus = PVAVCEncodeNAL(mHandle, outPtr, &dataLength, &type);
+ if (encoderStatus == AVCENC_WRONG_STATE) {
+ mSpsPpsHeaderReceived = true;
+ CHECK_EQ(0, mNumInputFrames); // 1st video frame is 0
+ } else {
+ switch (type) {
+ case AVC_NALTYPE_SPS:
+ case AVC_NALTYPE_PPS:
+ LOGV("%s received",
+ (type == AVC_NALTYPE_SPS)? "SPS": "PPS");
+ ++mNumInputFrames;
+ outputBuffer->set_range(0, dataLength);
+ *out = outputBuffer;
+ return OK;
+ default:
+ LOGE("Nal type (%d) other than SPS/PPS is unexpected", type);
+ return UNKNOWN_ERROR;
+ }
+ }
+ }
+
+ // Get next input video frame
+ if (mReadyForNextFrame) {
+ if (mInputBuffer) {
+ mInputBuffer->release();
+ mInputBuffer = NULL;
+ }
+ status_t err = mSource->read(&mInputBuffer, options);
+ if (err != OK) {
+ LOGE("Failed to read input video frame: %d", err);
+ outputBuffer->release();
+ return err;
+ }
+ int64_t timeUs;
+ CHECK(mInputBuffer->meta_data()->findInt64(kKeyTime, &timeUs));
+ outputBuffer->meta_data()->setInt64(kKeyTime, timeUs);
+
+ AVCFrameIO videoInput;
+ memset(&videoInput, 0, sizeof(videoInput));
+ videoInput.height = ((mVideoHeight + 15) >> 4) << 4;
+ videoInput.pitch = ((mVideoWidth + 15) >> 4) << 4;
+ videoInput.coding_timestamp = (timeUs + 500) / 1000; // in ms
+ uint8_t *inputData = (uint8_t *) mInputBuffer->data();
+
+ if (mVideoColorFormat != OMX_COLOR_FormatYUV420Planar) {
+ CHECK(mInputFrameData);
+ CHECK(mVideoColorFormat == OMX_COLOR_FormatYUV420SemiPlanar);
+ ConvertYUV420SemiPlanarToYUV420Planar(
+ inputData, mInputFrameData, mVideoWidth, mVideoHeight);
+ inputData = mInputFrameData;
+ }
+ CHECK(inputData != NULL);
+ videoInput.YCbCr[0] = inputData;
+ videoInput.YCbCr[1] = videoInput.YCbCr[0] + videoInput.height * videoInput.pitch;
+ videoInput.YCbCr[2] = videoInput.YCbCr[1] +
+ ((videoInput.height * videoInput.pitch) >> 2);
+ videoInput.disp_order = mNumInputFrames;
+
+ encoderStatus = PVAVCEncSetInput(mHandle, &videoInput);
+ if (encoderStatus == AVCENC_SUCCESS ||
+ encoderStatus == AVCENC_NEW_IDR) {
+ mReadyForNextFrame = false;
+ ++mNumInputFrames;
+ if (encoderStatus == AVCENC_NEW_IDR) {
+ mIsIDRFrame = 1;
+ }
+ } else {
+ if (encoderStatus < AVCENC_SUCCESS) {
+ outputBuffer->release();
+ return UNKNOWN_ERROR;
+ } else {
+ outputBuffer->set_range(0, 0);
+ *out = outputBuffer;
+ return OK;
+ }
+ }
+ }
+
+ // Encode an input video frame
+ CHECK(encoderStatus == AVCENC_SUCCESS ||
+ encoderStatus == AVCENC_NEW_IDR);
+ dataLength = outputBuffer->size(); // Reset the output buffer length
+ encoderStatus = PVAVCEncodeNAL(mHandle, outPtr, &dataLength, &type);
+ if (encoderStatus == AVCENC_SUCCESS) {
+ outputBuffer->meta_data()->setInt32(kKeyIsSyncFrame, mIsIDRFrame);
+ CHECK_EQ(NULL, PVAVCEncGetOverrunBuffer(mHandle));
+ } else if (encoderStatus == AVCENC_PICTURE_READY) {
+ CHECK_EQ(NULL, PVAVCEncGetOverrunBuffer(mHandle));
+ if (mIsIDRFrame) {
+ outputBuffer->meta_data()->setInt32(kKeyIsSyncFrame, mIsIDRFrame);
+ mIsIDRFrame = 0;
+ LOGV("Output an IDR frame");
+ }
+ mReadyForNextFrame = true;
+ AVCFrameIO recon;
+ if (PVAVCEncGetRecon(mHandle, &recon) == AVCENC_SUCCESS) {
+ PVAVCEncReleaseRecon(mHandle, &recon);
+ }
+ } else {
+ dataLength = 0;
+ mReadyForNextFrame = true;
+ }
+ if (encoderStatus < AVCENC_SUCCESS) {
+ outputBuffer->release();
+ return UNKNOWN_ERROR;
+ }
+
+ outputBuffer->set_range(0, dataLength);
+ *out = outputBuffer;
+ return OK;
+}
+
+int32_t AVCEncoder::allocOutputBuffers(
+ unsigned int sizeInMbs, unsigned int numBuffers) {
+ CHECK(mOutputBuffers.isEmpty());
+ size_t frameSize = (sizeInMbs << 7) * 3;
+ for (unsigned int i = 0; i < numBuffers; ++i) {
+ MediaBuffer *buffer = new MediaBuffer(frameSize);
+ buffer->setObserver(this);
+ mOutputBuffers.push(buffer);
+ }
+
+ return 1;
+}
+
+void AVCEncoder::unbindOutputBuffer(int32_t index) {
+ CHECK(index >= 0);
+}
+
+int32_t AVCEncoder::bindOutputBuffer(int32_t index, uint8_t **yuv) {
+ CHECK(index >= 0);
+ CHECK(index < (int32_t) mOutputBuffers.size());
+ int64_t timeUs;
+ CHECK(mInputBuffer->meta_data()->findInt64(kKeyTime, &timeUs));
+ mOutputBuffers[index]->meta_data()->setInt64(kKeyTime, timeUs);
+
+ *yuv = (uint8_t *) mOutputBuffers[index]->data();
+
+ return 1;
+}
+
+void AVCEncoder::signalBufferReturned(MediaBuffer *buffer) {
+}
+
+} // namespace android
diff --git a/media/libstagefright/codecs/avc/enc/Android.mk b/media/libstagefright/codecs/avc/enc/Android.mk
new file mode 100644
index 0000000..735eb0c
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/Android.mk
@@ -0,0 +1,34 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ AVCEncoder.cpp \
+ src/avcenc_api.cpp \
+ src/bitstream_io.cpp \
+ src/block.cpp \
+ src/findhalfpel.cpp \
+ src/header.cpp \
+ src/init.cpp \
+ src/intra_est.cpp \
+ src/motion_comp.cpp \
+ src/motion_est.cpp \
+ src/rate_control.cpp \
+ src/residual.cpp \
+ src/sad.cpp \
+ src/sad_halfpel.cpp \
+ src/slice.cpp \
+ src/vlc_encode.cpp
+
+
+LOCAL_MODULE := libstagefright_avcenc
+
+LOCAL_C_INCLUDES := \
+ $(LOCAL_PATH)/src \
+ $(LOCAL_PATH)/../common/include \
+ $(TOP)/external/opencore/extern_libs_v2/khronos/openmax/include \
+ $(TOP)/frameworks/base/media/libstagefright/include
+
+LOCAL_CFLAGS := \
+ -DOSCL_IMPORT_REF= -DOSCL_UNUSED_ARG= -DOSCL_EXPORT_REF=
+
+include $(BUILD_STATIC_LIBRARY)
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp
new file mode 100644
index 0000000..d39885d
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.cpp
@@ -0,0 +1,744 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_api.h"
+#include "avcenc_lib.h"
+
+/* ======================================================================== */
+/* Function : PVAVCGetNALType() */
+/* Date : 11/4/2003 */
+/* Purpose : Sniff NAL type from the bitstream */
+/* In/out : */
+/* Return : AVCENC_SUCCESS if succeed, AVCENC_FAIL if fail. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetNALType(unsigned char *bitstream, int size,
+ int *nal_type, int *nal_ref_idc)
+{
+ int forbidden_zero_bit;
+ if (size > 0)
+ {
+ forbidden_zero_bit = bitstream[0] >> 7;
+ if (forbidden_zero_bit != 0)
+ return AVCENC_FAIL;
+ *nal_ref_idc = (bitstream[0] & 0x60) >> 5;
+ *nal_type = bitstream[0] & 0x1F;
+ return AVCENC_SUCCESS;
+ }
+
+ return AVCENC_FAIL;
+}
+
+
+/* ======================================================================== */
+/* Function : PVAVCEncInitialize() */
+/* Date : 3/18/2004 */
+/* Purpose : Initialize the encoder library, allocate memory and verify */
+/* the profile/level support/settings. */
+/* In/out : Encoding parameters. */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam,
+ void* extSPS, void* extPPS)
+{
+ AVCEnc_Status status;
+ AVCEncObject *encvid;
+ AVCCommonObj *video;
+ uint32 *userData = (uint32*) avcHandle->userData;
+ int framesize;
+
+ if (avcHandle->AVCObject != NULL)
+ {
+ return AVCENC_ALREADY_INITIALIZED; /* It's already initialized, need to cleanup first */
+ }
+
+ /* not initialized */
+
+ /* allocate videoObject */
+ avcHandle->AVCObject = (void*)avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncObject), DEFAULT_ATTR);
+ if (avcHandle->AVCObject == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ encvid = (AVCEncObject*) avcHandle->AVCObject;
+ memset(encvid, 0, sizeof(AVCEncObject)); /* reset everything */
+
+ encvid->enc_state = AVCEnc_Initializing;
+
+ encvid->avcHandle = avcHandle;
+
+ encvid->common = (AVCCommonObj*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCCommonObj), DEFAULT_ATTR);
+ if (encvid->common == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ video = encvid->common;
+ memset(video, 0, sizeof(AVCCommonObj));
+
+ /* allocate bitstream structure */
+ encvid->bitstream = (AVCEncBitstream*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncBitstream), DEFAULT_ATTR);
+ if (encvid->bitstream == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ encvid->bitstream->encvid = encvid; /* to point back for reallocation */
+
+ /* allocate sequence parameter set structure */
+ video->currSeqParams = (AVCSeqParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSeqParamSet), DEFAULT_ATTR);
+ if (video->currSeqParams == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(video->currSeqParams, 0, sizeof(AVCSeqParamSet));
+
+ /* allocate picture parameter set structure */
+ video->currPicParams = (AVCPicParamSet*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCPicParamSet), DEFAULT_ATTR);
+ if (video->currPicParams == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(video->currPicParams, 0, sizeof(AVCPicParamSet));
+
+ /* allocate slice header structure */
+ video->sliceHdr = (AVCSliceHeader*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCSliceHeader), DEFAULT_ATTR);
+ if (video->sliceHdr == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(video->sliceHdr, 0, sizeof(AVCSliceHeader));
+
+ /* allocate encoded picture buffer structure*/
+ video->decPicBuf = (AVCDecPicBuffer*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCDecPicBuffer), DEFAULT_ATTR);
+ if (video->decPicBuf == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(video->decPicBuf, 0, sizeof(AVCDecPicBuffer));
+
+ /* allocate rate control structure */
+ encvid->rateCtrl = (AVCRateControl*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCRateControl), DEFAULT_ATTR);
+ if (encvid->rateCtrl == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(encvid->rateCtrl, 0, sizeof(AVCRateControl));
+
+ /* reset frame list, not really needed */
+ video->currPic = NULL;
+ video->currFS = NULL;
+ encvid->currInput = NULL;
+ video->prevRefPic = NULL;
+
+ /* now read encParams, and allocate dimension-dependent variables */
+ /* such as mblock */
+ status = SetEncodeParam(avcHandle, encParam, extSPS, extPPS); /* initialized variables to be used in SPS*/
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ if (encParam->use_overrun_buffer == AVC_ON)
+ {
+ /* allocate overrun buffer */
+ encvid->oBSize = encvid->rateCtrl->cpbSize;
+ if (encvid->oBSize > DEFAULT_OVERRUN_BUFFER_SIZE)
+ {
+ encvid->oBSize = DEFAULT_OVERRUN_BUFFER_SIZE;
+ }
+ encvid->overrunBuffer = (uint8*) avcHandle->CBAVC_Malloc(userData, encvid->oBSize, DEFAULT_ATTR);
+ if (encvid->overrunBuffer == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ }
+ else
+ {
+ encvid->oBSize = 0;
+ encvid->overrunBuffer = NULL;
+ }
+
+ /* allocate frame size dependent structures */
+ framesize = video->FrameHeightInMbs * video->PicWidthInMbs;
+
+ video->mblock = (AVCMacroblock*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMacroblock) * framesize, DEFAULT_ATTR);
+ if (video->mblock == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ video->MbToSliceGroupMap = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits * 2, DEFAULT_ATTR);
+ if (video->MbToSliceGroupMap == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ encvid->mot16x16 = (AVCMV*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCMV) * framesize, DEFAULT_ATTR);
+ if (encvid->mot16x16 == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ memset(encvid->mot16x16, 0, sizeof(AVCMV)*framesize);
+
+ encvid->intraSearch = (uint8*) avcHandle->CBAVC_Malloc(userData, sizeof(uint8) * framesize, DEFAULT_ATTR);
+ if (encvid->intraSearch == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ encvid->min_cost = (int*) avcHandle->CBAVC_Malloc(userData, sizeof(int) * framesize, DEFAULT_ATTR);
+ if (encvid->min_cost == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ /* initialize motion search related memory */
+ if (AVCENC_SUCCESS != InitMotionSearchModule(avcHandle))
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ if (AVCENC_SUCCESS != InitRateControlModule(avcHandle))
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ /* intialize function pointers */
+ encvid->functionPointer = (AVCEncFuncPtr*) avcHandle->CBAVC_Malloc(userData, sizeof(AVCEncFuncPtr), DEFAULT_ATTR);
+ if (encvid->functionPointer == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+ encvid->functionPointer->SAD_Macroblock = &AVCSAD_Macroblock_C;
+ encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+ encvid->functionPointer->SAD_MB_HalfPel[1] = &AVCSAD_MB_HalfPel_Cxh;
+ encvid->functionPointer->SAD_MB_HalfPel[2] = &AVCSAD_MB_HalfPel_Cyh;
+ encvid->functionPointer->SAD_MB_HalfPel[3] = &AVCSAD_MB_HalfPel_Cxhyh;
+
+ /* initialize timing control */
+ encvid->modTimeRef = 0; /* ALWAYS ASSUME THAT TIMESTAMP START FROM 0 !!!*/
+ video->prevFrameNum = 0;
+ encvid->prevCodedFrameNum = 0;
+ encvid->dispOrdPOCRef = 0;
+
+ if (encvid->outOfBandParamSet == TRUE)
+ {
+ encvid->enc_state = AVCEnc_Encoding_SPS;
+ }
+ else
+ {
+ encvid->enc_state = AVCEnc_Analyzing_Frame;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/* Function : PVAVCEncGetMaxOutputSize() */
+/* Date : 11/29/2008 */
+/* Purpose : Return max output buffer size that apps should allocate for */
+/* output buffer. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : size */
+/* ======================================================================== */
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size)
+{
+ AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+
+ if (encvid == NULL)
+ {
+ return AVCENC_UNINITIALIZED;
+ }
+
+ *size = encvid->rateCtrl->cpbSize;
+
+ return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/* Function : PVAVCEncSetInput() */
+/* Date : 4/18/2004 */
+/* Purpose : To feed an unencoded original frame to the encoder library. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input)
+{
+ AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+ AVCCommonObj *video = encvid->common;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+
+ AVCEnc_Status status;
+ uint frameNum;
+
+ if (encvid == NULL)
+ {
+ return AVCENC_UNINITIALIZED;
+ }
+
+ if (encvid->enc_state == AVCEnc_WaitingForBuffer)
+ {
+ goto RECALL_INITFRAME;
+ }
+ else if (encvid->enc_state != AVCEnc_Analyzing_Frame)
+ {
+ return AVCENC_FAIL;
+ }
+
+ if (input->pitch > 0xFFFF)
+ {
+ return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch
+ }
+
+ /***********************************/
+
+ /* Let's rate control decide whether to encode this frame or not */
+ /* Also set video->nal_unit_type, sliceHdr->slice_type, video->slice_type */
+ if (AVCENC_SUCCESS != RCDetermineFrameNum(encvid, rateCtrl, input->coding_timestamp, &frameNum))
+ {
+ return AVCENC_SKIPPED_PICTURE; /* not time to encode, thus skipping */
+ }
+
+ /* we may not need this line */
+ //nextFrmModTime = (uint32)((((frameNum+1)*1000)/rateCtrl->frame_rate) + modTimeRef); /* rec. time */
+ //encvid->nextModTime = nextFrmModTime - (encvid->frameInterval>>1) - 1; /* between current and next frame */
+
+ encvid->currInput = input;
+ encvid->currInput->coding_order = frameNum;
+
+RECALL_INITFRAME:
+ /* initialize and analyze the frame */
+ status = InitFrame(encvid);
+
+ if (status == AVCENC_SUCCESS)
+ {
+ encvid->enc_state = AVCEnc_Encoding_Frame;
+ }
+ else if (status == AVCENC_NEW_IDR)
+ {
+ if (encvid->outOfBandParamSet == TRUE)
+ {
+ encvid->enc_state = AVCEnc_Encoding_Frame;
+ }
+ else // assuming that in-band paramset keeps sending new SPS and PPS.
+ {
+ encvid->enc_state = AVCEnc_Encoding_SPS;
+ //video->currSeqParams->seq_parameter_set_id++;
+ //if(video->currSeqParams->seq_parameter_set_id > 31) // range check
+ {
+ video->currSeqParams->seq_parameter_set_id = 0; // reset
+ }
+ }
+
+ video->sliceHdr->idr_pic_id++;
+ if (video->sliceHdr->idr_pic_id > 65535) // range check
+ {
+ video->sliceHdr->idr_pic_id = 0; // reset
+ }
+ }
+ /* the following logics need to be revisited */
+ else if (status == AVCENC_PICTURE_READY) // no buffers returned back to the encoder
+ {
+ encvid->enc_state = AVCEnc_WaitingForBuffer; // Input accepted but can't continue
+ // need to free up some memory before proceeding with Encode
+ }
+
+ return status; // return status, including the AVCENC_FAIL case and all 3 above.
+}
+
+/* ======================================================================== */
+/* Function : PVAVCEncodeNAL() */
+/* Date : 4/29/2004 */
+/* Purpose : To encode one NAL/slice. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, unsigned char *buffer, unsigned int *buf_nal_size, int *nal_type)
+{
+ AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+ AVCCommonObj *video = encvid->common;
+ AVCEncBitstream *bitstream = encvid->bitstream;
+ AVCEnc_Status status;
+
+ if (encvid == NULL)
+ {
+ return AVCENC_UNINITIALIZED;
+ }
+
+ switch (encvid->enc_state)
+ {
+ case AVCEnc_Initializing:
+ return AVCENC_UNINITIALIZED;
+ case AVCEnc_Encoding_SPS:
+ /* initialized the structure */
+ BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0);
+ BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_SPS);
+
+ /* encode SPS */
+ status = EncodeSPS(encvid, bitstream);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ /* closing the NAL with trailing bits */
+ status = BitstreamTrailingBits(bitstream, buf_nal_size);
+ if (status == AVCENC_SUCCESS)
+ {
+ encvid->enc_state = AVCEnc_Encoding_PPS;
+ video->currPicParams->seq_parameter_set_id = video->currSeqParams->seq_parameter_set_id;
+ video->currPicParams->pic_parameter_set_id++;
+ *nal_type = AVC_NALTYPE_SPS;
+ *buf_nal_size = bitstream->write_pos;
+ }
+ break;
+ case AVCEnc_Encoding_PPS:
+ /* initialized the structure */
+ BitstreamEncInit(bitstream, buffer, *buf_nal_size, NULL, 0);
+ BitstreamWriteBits(bitstream, 8, (1 << 5) | AVC_NALTYPE_PPS);
+
+ /* encode PPS */
+ status = EncodePPS(encvid, bitstream);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ /* closing the NAL with trailing bits */
+ status = BitstreamTrailingBits(bitstream, buf_nal_size);
+ if (status == AVCENC_SUCCESS)
+ {
+ if (encvid->outOfBandParamSet == TRUE) // already extract PPS, SPS
+ {
+ encvid->enc_state = AVCEnc_Analyzing_Frame;
+ }
+ else // SetInput has been called before SPS and PPS.
+ {
+ encvid->enc_state = AVCEnc_Encoding_Frame;
+ }
+
+ *nal_type = AVC_NALTYPE_PPS;
+ *buf_nal_size = bitstream->write_pos;
+ }
+ break;
+
+ case AVCEnc_Encoding_Frame:
+ /* initialized the structure */
+ BitstreamEncInit(bitstream, buffer, *buf_nal_size, encvid->overrunBuffer, encvid->oBSize);
+ BitstreamWriteBits(bitstream, 8, (video->nal_ref_idc << 5) | (video->nal_unit_type));
+
+ /* Re-order the reference list according to the ref_pic_list_reordering() */
+ /* We don't have to reorder the list for the encoder here. This can only be done
+ after we encode this slice. We can run thru a second-pass to see if new ordering
+ would save more bits. Too much delay !! */
+ /* status = ReOrderList(video);*/
+ status = InitSlice(encvid);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ /* when we have everything, we encode the slice header */
+ status = EncodeSliceHeader(encvid, bitstream);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = AVCEncodeSlice(encvid);
+
+ video->slice_id++;
+
+ /* closing the NAL with trailing bits */
+ BitstreamTrailingBits(bitstream, buf_nal_size);
+
+ *buf_nal_size = bitstream->write_pos;
+
+ encvid->rateCtrl->numFrameBits += ((*buf_nal_size) << 3);
+
+ *nal_type = video->nal_unit_type;
+
+ if (status == AVCENC_PICTURE_READY)
+ {
+ status = RCUpdateFrame(encvid);
+ if (status == AVCENC_SKIPPED_PICTURE) /* skip current frame */
+ {
+ DPBReleaseCurrentFrame(avcHandle, video);
+ encvid->enc_state = AVCEnc_Analyzing_Frame;
+
+ return status;
+ }
+
+ /* perform loop-filtering on the entire frame */
+ DeblockPicture(video);
+
+ /* update the original frame array */
+ encvid->prevCodedFrameNum = encvid->currInput->coding_order;
+
+ /* store the encoded picture in the DPB buffer */
+ StorePictureInDPB(avcHandle, video);
+
+ if (video->currPic->isReference)
+ {
+ video->PrevRefFrameNum = video->sliceHdr->frame_num;
+ }
+
+ /* update POC related variables */
+ PostPOC(video);
+
+ encvid->enc_state = AVCEnc_Analyzing_Frame;
+ status = AVCENC_PICTURE_READY;
+
+ }
+ break;
+ default:
+ status = AVCENC_WRONG_STATE;
+ }
+
+ return status;
+}
+
+/* ======================================================================== */
+/* Function : PVAVCEncGetOverrunBuffer() */
+/* Purpose : To retrieve the overrun buffer. Check whether overrun buffer */
+/* is used or not before returning */
+/* In/out : */
+/* Return : Pointer to the internal overrun buffer. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+ AVCEncBitstream *bitstream = encvid->bitstream;
+
+ if (bitstream->overrunBuffer == bitstream->bitstreamBuffer) /* OB is used */
+ {
+ return encvid->overrunBuffer;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+
+/* ======================================================================== */
+/* Function : PVAVCEncGetRecon() */
+/* Date : 4/29/2004 */
+/* Purpose : To retrieve the most recently encoded frame. */
+/* assume that user will make a copy if they want to hold on */
+/* to it. Otherwise, it is not guaranteed to be reserved. */
+/* Most applications prefer to see original frame rather than */
+/* reconstructed frame. So, we are staying aware from complex */
+/* buffering mechanism. If needed, can be added later. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon)
+{
+ AVCEncObject *encvid = (AVCEncObject*)avcHandle->AVCObject;
+ AVCCommonObj *video = encvid->common;
+ AVCFrameStore *currFS = video->currFS;
+
+ if (encvid == NULL)
+ {
+ return AVCENC_UNINITIALIZED;
+ }
+
+ recon->YCbCr[0] = currFS->frame.Sl;
+ recon->YCbCr[1] = currFS->frame.Scb;
+ recon->YCbCr[2] = currFS->frame.Scr;
+ recon->height = currFS->frame.height;
+ recon->pitch = currFS->frame.pitch;
+ recon->disp_order = currFS->PicOrderCnt;
+ recon->coding_order = currFS->FrameNum;
+ recon->id = (uint32) currFS->base_dpb; /* use the pointer as the id */
+
+ currFS->IsOutputted |= 1;
+
+ return AVCENC_SUCCESS;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+ OSCL_UNUSED_ARG(recon);
+
+ return AVCENC_SUCCESS; //for now
+}
+
+/* ======================================================================== */
+/* Function : PVAVCCleanUpEncoder() */
+/* Date : 4/18/2004 */
+/* Purpose : To clean up memories allocated by PVAVCEncInitialize() */
+/* In/out : */
+/* Return : AVCENC_SUCCESS for success. */
+/* Modified : */
+/* ======================================================================== */
+OSCL_EXPORT_REF void PVAVCCleanUpEncoder(AVCHandle *avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCCommonObj *video;
+ uint32 *userData = (uint32*) avcHandle->userData;
+
+ if (encvid != NULL)
+ {
+ CleanMotionSearchModule(avcHandle);
+
+ CleanupRateControlModule(avcHandle);
+
+ if (encvid->functionPointer != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->functionPointer);
+ }
+
+ if (encvid->min_cost)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->min_cost);
+ }
+
+ if (encvid->intraSearch)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->intraSearch);
+ }
+
+ if (encvid->mot16x16)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->mot16x16);
+ }
+
+ if (encvid->rateCtrl)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->rateCtrl);
+ }
+
+ if (encvid->overrunBuffer)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->overrunBuffer);
+ }
+
+ video = encvid->common;
+ if (video != NULL)
+ {
+ if (video->MbToSliceGroupMap)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video->MbToSliceGroupMap);
+ }
+ if (video->mblock != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video->mblock);
+ }
+ if (video->decPicBuf != NULL)
+ {
+ CleanUpDPB(avcHandle, video);
+ avcHandle->CBAVC_Free(userData, (int)video->decPicBuf);
+ }
+ if (video->sliceHdr != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video->sliceHdr);
+ }
+ if (video->currPicParams != NULL)
+ {
+ if (video->currPicParams->slice_group_id)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video->currPicParams->slice_group_id);
+ }
+
+ avcHandle->CBAVC_Free(userData, (int)video->currPicParams);
+ }
+ if (video->currSeqParams != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video->currSeqParams);
+ }
+ if (encvid->bitstream != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)encvid->bitstream);
+ }
+ if (video != NULL)
+ {
+ avcHandle->CBAVC_Free(userData, (int)video);
+ }
+ }
+
+ avcHandle->CBAVC_Free(userData, (int)encvid);
+
+ avcHandle->AVCObject = NULL;
+ }
+
+ return ;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+ OSCL_UNUSED_ARG(bitrate);
+
+ return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+ OSCL_UNUSED_ARG(num);
+ OSCL_UNUSED_ARG(denom);
+
+ return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+ OSCL_UNUSED_ARG(IDRInterval);
+
+ return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+
+ return AVCENC_FAIL;
+}
+
+OSCL_EXPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB)
+{
+ OSCL_UNUSED_ARG(avcHandle);
+ OSCL_UNUSED_ARG(numMB);
+
+ return AVCENC_FAIL;
+}
+
+void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+
+ avcStats->avgFrameQP = GetAvgFrameQP(rateCtrl);
+ avcStats->numIntraMBs = encvid->numIntraMB;
+
+ return ;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_api.h b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h
new file mode 100644
index 0000000..628dec6
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_api.h
@@ -0,0 +1,320 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains application function interfaces to the AVC encoder library
+and necessary type defitionitions and enumerations.
+@publishedAll
+*/
+
+#ifndef AVCENC_API_H_INCLUDED
+#define AVCENC_API_H_INCLUDED
+
+#ifndef AVCAPI_COMMON_H_INCLUDED
+#include "avcapi_common.h"
+#endif
+
+/**
+ This enumeration is used for the status returned from the library interface.
+*/
+typedef enum
+{
+ /**
+ Fail information, need to add more error code for more specific info
+ */
+ AVCENC_TRAILINGONES_FAIL = -35,
+ AVCENC_SLICE_EMPTY = -34,
+ AVCENC_POC_FAIL = -33,
+ AVCENC_CONSECUTIVE_NONREF = -32,
+ AVCENC_CABAC_FAIL = -31,
+ AVCENC_PRED_WEIGHT_TAB_FAIL = -30,
+ AVCENC_DEC_REF_PIC_MARK_FAIL = -29,
+ AVCENC_SPS_FAIL = -28,
+ AVCENC_BITSTREAM_BUFFER_FULL = -27,
+ AVCENC_BITSTREAM_INIT_FAIL = -26,
+ AVCENC_CHROMA_QP_FAIL = -25,
+ AVCENC_INIT_QS_FAIL = -24,
+ AVCENC_INIT_QP_FAIL = -23,
+ AVCENC_WEIGHTED_BIPRED_FAIL = -22,
+ AVCENC_INVALID_INTRA_PERIOD = -21,
+ AVCENC_INVALID_CHANGE_RATE = -20,
+ AVCENC_INVALID_BETA_OFFSET = -19,
+ AVCENC_INVALID_ALPHA_OFFSET = -18,
+ AVCENC_INVALID_DEBLOCK_IDC = -17,
+ AVCENC_INVALID_REDUNDANT_PIC = -16,
+ AVCENC_INVALID_FRAMERATE = -15,
+ AVCENC_INVALID_NUM_SLICEGROUP = -14,
+ AVCENC_INVALID_POC_LSB = -13,
+ AVCENC_INVALID_NUM_REF = -12,
+ AVCENC_INVALID_FMO_TYPE = -11,
+ AVCENC_ENCPARAM_MEM_FAIL = -10,
+ AVCENC_LEVEL_NOT_SUPPORTED = -9,
+ AVCENC_LEVEL_FAIL = -8,
+ AVCENC_PROFILE_NOT_SUPPORTED = -7,
+ AVCENC_TOOLS_NOT_SUPPORTED = -6,
+ AVCENC_WRONG_STATE = -5,
+ AVCENC_UNINITIALIZED = -4,
+ AVCENC_ALREADY_INITIALIZED = -3,
+ AVCENC_NOT_SUPPORTED = -2,
+ AVCENC_MEMORY_FAIL = AVC_MEMORY_FAIL,
+ AVCENC_FAIL = AVC_FAIL,
+ /**
+ Generic success value
+ */
+ AVCENC_SUCCESS = AVC_SUCCESS,
+ AVCENC_PICTURE_READY = 2,
+ AVCENC_NEW_IDR = 3, /* upon getting this, users have to call PVAVCEncodeSPS and PVAVCEncodePPS to get a new SPS and PPS*/
+ AVCENC_SKIPPED_PICTURE = 4 /* continuable error message */
+
+} AVCEnc_Status;
+
+#define MAX_NUM_SLICE_GROUP 8 /* maximum for all the profiles */
+
+/**
+This structure contains the encoding parameters.
+*/
+typedef struct tagAVCEncParam
+{
+ /* if profile/level is set to zero, encoder will choose the closest one for you */
+ AVCProfile profile; /* profile of the bitstream to be compliant with*/
+ AVCLevel level; /* level of the bitstream to be compliant with*/
+
+ int width; /* width of an input frame in pixel */
+ int height; /* height of an input frame in pixel */
+
+ int poc_type; /* picture order count mode, 0,1 or 2 */
+ /* for poc_type == 0 */
+ uint log2_max_poc_lsb_minus_4; /* specify maximum value of POC Lsb, range 0..12*/
+ /* for poc_type == 1 */
+ uint delta_poc_zero_flag; /* delta POC always zero */
+ int offset_poc_non_ref; /* offset for non-reference pic */
+ int offset_top_bottom; /* offset between top and bottom field */
+ uint num_ref_in_cycle; /* number of reference frame in one cycle */
+ int *offset_poc_ref; /* array of offset for ref pic, dimension [num_ref_in_cycle] */
+
+ int num_ref_frame; /* number of reference frame used */
+ int num_slice_group; /* number of slice group */
+ int fmo_type; /* 0: interleave, 1: dispersed, 2: foreground with left-over
+ 3: box-out, 4:raster scan, 5:wipe, 6:explicit */
+ /* for fmo_type == 0 */
+ uint run_length_minus1[MAX_NUM_SLICE_GROUP]; /* array of size num_slice_group, in round robin fasion */
+ /* fmo_type == 2*/
+ uint top_left[MAX_NUM_SLICE_GROUP-1]; /* array of co-ordinates of each slice_group */
+ uint bottom_right[MAX_NUM_SLICE_GROUP-1]; /* except the last one which is the background. */
+ /* fmo_type == 3,4,5 */
+ AVCFlag change_dir_flag; /* slice group change direction flag */
+ uint change_rate_minus1;
+ /* fmo_type == 6 */
+ uint *slice_group; /* array of size MBWidth*MBHeight */
+
+ AVCFlag db_filter; /* enable deblocking loop filter */
+ int disable_db_idc; /* 0: filter everywhere, 1: no filter, 2: no filter across slice boundary */
+ int alpha_offset; /* alpha offset range -6,...,6 */
+ int beta_offset; /* beta offset range -6,...,6 */
+
+ AVCFlag constrained_intra_pred; /* constrained intra prediction flag */
+
+ AVCFlag auto_scd; /* scene change detection on or off */
+ int idr_period; /* idr frame refresh rate in number of target encoded frame (no concept of actual time).*/
+ int intramb_refresh; /* minimum number of intra MB per frame */
+ AVCFlag data_par; /* enable data partitioning */
+
+ AVCFlag fullsearch; /* enable full-pel full-search mode */
+ int search_range; /* search range for motion vector in (-search_range,+search_range) pixels */
+ AVCFlag sub_pel; /* enable sub pel prediction */
+ AVCFlag submb_pred; /* enable sub MB partition mode */
+ AVCFlag rdopt_mode; /* RD optimal mode selection */
+ AVCFlag bidir_pred; /* enable bi-directional for B-slice, this flag forces the encoder to encode
+ any frame with POC less than the previously encoded frame as a B-frame.
+ If it's off, then such frames will remain P-frame. */
+
+ AVCFlag rate_control; /* rate control enable, on: RC on, off: constant QP */
+ int initQP; /* initial QP */
+ uint32 bitrate; /* target encoding bit rate in bits/second */
+ uint32 CPB_size; /* coded picture buffer in number of bits */
+ uint32 init_CBP_removal_delay; /* initial CBP removal delay in msec */
+
+ uint32 frame_rate; /* frame rate in the unit of frames per 1000 second */
+ /* note, frame rate is only needed by the rate control, AVC is timestamp agnostic. */
+
+ AVCFlag out_of_band_param_set; /* flag to set whether param sets are to be retrieved up front or not */
+
+ AVCFlag use_overrun_buffer; /* do not throw away the frame if output buffer is not big enough.
+ copy excess bits to the overrun buffer */
+} AVCEncParams;
+
+
+/**
+This structure contains current frame encoding statistics for debugging purpose.
+*/
+typedef struct tagAVCEncFrameStats
+{
+ int avgFrameQP; /* average frame QP */
+ int numIntraMBs; /* number of intra MBs */
+ int numFalseAlarm;
+ int numMisDetected;
+ int numDetected;
+
+} AVCEncFrameStats;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+ /** THE FOLLOWINGS ARE APIS */
+ /**
+ This function initializes the encoder library. It verifies the validity of the
+ encoding parameters against the specified profile/level and the list of supported
+ tools by this library. It allocates necessary memories required to perform encoding.
+ For re-encoding application, if users want to setup encoder in a more precise way,
+ users can give the external SPS and PPS to the encoder to follow.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "encParam" "Pointer to the encoding parameter structure."
+ \param "extSPS" "External SPS used for re-encoding purpose. NULL if not present"
+ \param "extPPS" "External PPS used for re-encoding purpose. NULL if not present"
+ \return "AVCENC_SUCCESS for success,
+ AVCENC_NOT_SUPPORTED for the use of unsupported tools,
+ AVCENC_MEMORY_FAIL for memory allocation failure,
+ AVCENC_FAIL for generic failure."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncInitialize(AVCHandle *avcHandle, AVCEncParams *encParam, void* extSPS, void* extPPS);
+
+
+ /**
+ Since the output buffer size is not known prior to encoding a frame, users need to
+ allocate big enough buffer otherwise, that frame will be dropped. This function returns
+ the size of the output buffer to be allocated by the users that guarantees to hold one frame.
+ It follows the CPB spec for a particular level. However, when the users set use_overrun_buffer
+ flag, this API is useless as excess output bits are saved in the overrun buffer waiting to be
+ copied out in small chunks, i.e. users can allocate any size of output buffer.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "size" "Pointer to the size to be modified."
+ \return "AVCENC_SUCCESS for success, AVCENC_UNINITIALIZED when level is not known.
+ */
+
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetMaxOutputBufferSize(AVCHandle *avcHandle, int* size);
+
+ /**
+ Users call this function to provide an input structure to the encoder library which will keep
+ a list of input structures it receives in case the users call this function many time before
+ calling PVAVCEncodeSlice. The encoder library will encode them according to the frame_num order.
+ Users should not modify the content of a particular frame until this frame is encoded and
+ returned thru CBAVCEnc_ReturnInput() callback function.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "input" "Pointer to the input structure."
+ \return "AVCENC_SUCCESS for success,
+ AVCENC_FAIL if the encoder is not in the right state to take a new input frame.
+ AVCENC_NEW_IDR for the detection or determination of a new IDR, with this status,
+ the returned NAL is an SPS NAL,
+ AVCENC_NO_PICTURE if the input frame coding timestamp is too early, users must
+ get next frame or adjust the coding timestamp."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncSetInput(AVCHandle *avcHandle, AVCFrameIO *input);
+
+ /**
+ This function is called to encode a NAL unit which can be an SPS NAL, a PPS NAL or
+ a VCL (video coding layer) NAL which contains one slice of data. It could be a
+ fixed number of macroblocks, as specified in the encoder parameters set, or the
+ maximum number of macroblocks fitted into the given input argument "buffer". The
+ input frame is taken from the oldest unencoded input frame retrieved by users by
+ PVAVCEncGetInput API.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "buffer" "Pointer to the output AVC bitstream buffer, the format will be EBSP,
+ not RBSP."
+ \param "buf_nal_size" "As input, the size of the buffer in bytes.
+ This is the physical limitation of the buffer. As output, the size of the EBSP."
+ \param "nal_type" "Pointer to the NAL type of the returned buffer."
+ \return "AVCENC_SUCCESS for success of encoding one slice,
+ AVCENC_PICTURE_READY for the completion of a frame encoding,
+ AVCENC_FAIL for failure (this should not occur, though)."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncodeNAL(AVCHandle *avcHandle, uint8 *buffer, uint *buf_nal_size, int *nal_type);
+
+ /**
+ This function sniffs the nal_unit_type such that users can call corresponding APIs.
+ This function is identical to PVAVCDecGetNALType() in the decoder.
+ \param "bitstream" "Pointer to the beginning of a NAL unit (start with forbidden_zero_bit, etc.)."
+ \param "size" "size of the bitstream (NumBytesInNALunit + 1)."
+ \param "nal_unit_type" "Pointer to the return value of nal unit type."
+ \return "AVCENC_SUCCESS if success, AVCENC_FAIL otherwise."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetNALType(uint8 *bitstream, int size, int *nal_type, int *nal_ref_idc);
+
+ /**
+ This function returns the pointer to internal overrun buffer. Users can call this to query
+ whether the overrun buffer has been used to encode the current NAL.
+ \param "avcHandle" "Pointer to the handle."
+ \return "Pointer to overrun buffer if it is used, otherwise, NULL."
+ */
+ OSCL_IMPORT_REF uint8* PVAVCEncGetOverrunBuffer(AVCHandle* avcHandle);
+
+ /**
+ This function returns the reconstructed frame of the most recently encoded frame.
+ Note that this frame is not returned to the users yet. Users should only read the
+ content of this frame.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "output" "Pointer to the input structure."
+ \return "AVCENC_SUCCESS for success, AVCENC_NO_PICTURE if no picture to be outputted."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncGetRecon(AVCHandle *avcHandle, AVCFrameIO *recon);
+
+ /**
+ This function is used to return the recontructed frame back to the AVC encoder library
+ in order to be re-used for encoding operation. If users want the content of it to remain
+ unchanged for a long time, they should make a copy of it and release the memory back to
+ the encoder. The encoder relies on the id element in the AVCFrameIO structure,
+ thus users should not change the id value.
+ \param "avcHandle" "Handle to the AVC decoder library object."
+ \param "output" "Pointer to the AVCFrameIO structure."
+ \return "AVCENC_SUCCESS for success, AVCENC_FAIL for fail for id not found."
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncReleaseRecon(AVCHandle *avcHandle, AVCFrameIO *recon);
+
+ /**
+ This function performs clean up operation including memory deallocation.
+ The encoder will also clear the list of input structures it has not released.
+ This implies that users must keep track of the number of input structure they have allocated
+ and free them accordingly.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ */
+ OSCL_IMPORT_REF void PVAVCCleanUpEncoder(AVCHandle *avcHandle);
+
+ /**
+ This function extracts statistics of the current frame. If the encoder has not finished
+ with the current frame, the result is not accurate.
+ \param "avcHandle" "Handle to the AVC encoder library object."
+ \param "avcStats" "Pointer to AVCEncFrameStats structure."
+ \return "void."
+ */
+ void PVAVCEncGetFrameStats(AVCHandle *avcHandle, AVCEncFrameStats *avcStats);
+
+ /**
+ These functions are used for the modification of encoding parameters.
+ To be polished.
+ */
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateBitRate(AVCHandle *avcHandle, uint32 bitrate);
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateFrameRate(AVCHandle *avcHandle, uint32 num, uint32 denom);
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIDRInterval(AVCHandle *avcHandle, int IDRInterval);
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncIDRRequest(AVCHandle *avcHandle);
+ OSCL_IMPORT_REF AVCEnc_Status PVAVCEncUpdateIMBRefresh(AVCHandle *avcHandle, int numMB);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _AVCENC_API_H_ */
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_int.h b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h
new file mode 100644
index 0000000..3fe08a1
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_int.h
@@ -0,0 +1,471 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains application function interfaces to the AVC encoder library
+and necessary type defitionitions and enumerations.
+@publishedAll
+*/
+
+#ifndef AVCENC_INT_H_INCLUDED
+#define AVCENC_INT_H_INCLUDED
+
+#ifndef AVCINT_COMMON_H_INCLUDED
+#include "avcint_common.h"
+#endif
+#ifndef AVCENC_API_H_INCLUDED
+#include "avcenc_api.h"
+#endif
+
+typedef float OsclFloat;
+
+/* Definition for the structures below */
+#define DEFAULT_ATTR 0 /* default memory attribute */
+#define MAX_INPUT_FRAME 30 /* some arbitrary number, it can be much higher than this. */
+#define MAX_REF_FRAME 16 /* max size of the RefPicList0 and RefPicList1 */
+#define MAX_REF_PIC_LIST 33
+
+#define MIN_QP 0
+#define MAX_QP 51
+#define SHIFT_QP 12
+#define LAMBDA_ACCURACY_BITS 16
+#define LAMBDA_FACTOR(lambda) ((int)((double)(1<<LAMBDA_ACCURACY_BITS)*lambda+0.5))
+
+
+#define DISABLE_THRESHOLDING 0
+// for better R-D performance
+#define _LUMA_COEFF_COST_ 4 //!< threshold for luma coeffs
+#define _CHROMA_COEFF_COST_ 4 //!< threshold for chroma coeffs, used to be 7
+#define _LUMA_MB_COEFF_COST_ 5 //!< threshold for luma coeffs of inter Macroblocks
+#define _LUMA_8x8_COEFF_COST_ 5 //!< threshold for luma coeffs of 8x8 Inter Partition
+#define MAX_VALUE 999999 //!< used for start value for some variables
+
+#define WEIGHTED_COST(factor,bits) (((factor)*(bits))>>LAMBDA_ACCURACY_BITS)
+#define MV_COST(f,s,cx,cy,px,py) (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py]))
+#define MV_COST_S(f,cx,cy,px,py) (WEIGHTED_COST(f,mvbits[cx-px]+mvbits[cy-py]))
+
+/* for sub-pel search and interpolation */
+#define SUBPEL_PRED_BLK_SIZE 576 // 24x24
+#define REF_CENTER 75
+#define V2Q_H0Q 1
+#define V0Q_H2Q 2
+#define V2Q_H2Q 3
+
+/*
+#define V3Q_H0Q 1
+#define V3Q_H1Q 2
+#define V0Q_H1Q 3
+#define V1Q_H1Q 4
+#define V1Q_H0Q 5
+#define V1Q_H3Q 6
+#define V0Q_H3Q 7
+#define V3Q_H3Q 8
+#define V2Q_H3Q 9
+#define V2Q_H0Q 10
+#define V2Q_H1Q 11
+#define V2Q_H2Q 12
+#define V3Q_H2Q 13
+#define V0Q_H2Q 14
+#define V1Q_H2Q 15
+*/
+
+
+#define DEFAULT_OVERRUN_BUFFER_SIZE 1000
+
+// associated with the above cost model
+const uint8 COEFF_COST[2][16] =
+{
+ {3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}
+};
+
+
+
+//! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6)
+const int QP2QUANT[40] =
+{
+ 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 4, 4, 4, 5, 6,
+ 6, 7, 8, 9, 10, 11, 13, 14,
+ 16, 18, 20, 23, 25, 29, 32, 36,
+ 40, 45, 51, 57, 64, 72, 81, 91
+};
+
+
+/**
+This enumeration keeps track of the internal status of the encoder whether it is doing
+something. The encoding flow follows the order in which these states are.
+@publishedAll
+*/
+typedef enum
+{
+ AVCEnc_Initializing = 0,
+ AVCEnc_Encoding_SPS,
+ AVCEnc_Encoding_PPS,
+ AVCEnc_Analyzing_Frame,
+ AVCEnc_WaitingForBuffer, // pending state
+ AVCEnc_Encoding_Frame,
+} AVCEnc_State ;
+
+/**
+Bitstream structure contains bitstream related parameters such as the pointer
+to the buffer, the current byte position and bit position. The content of the
+bitstreamBuffer will be in EBSP format as the emulation prevention codes are
+automatically inserted as the RBSP is recorded.
+@publishedAll
+*/
+typedef struct tagEncBitstream
+{
+ uint8 *bitstreamBuffer; /* pointer to buffer memory */
+ int buf_size; /* size of the buffer memory */
+ int write_pos; /* next position to write to bitstreamBuffer */
+ int count_zeros; /* count number of consecutive zero */
+ uint current_word; /* byte-swapped (MSB left) current word to write to buffer */
+ int bit_left; /* number of bit left in current_word */
+ uint8 *overrunBuffer; /* extra output buffer to prevent current skip due to output buffer overrun*/
+ int oBSize; /* size of allocated overrun buffer */
+ void *encvid; /* pointer to the main object */
+
+} AVCEncBitstream;
+
+/**
+This structure is used for rate control purpose and other performance related control
+variables such as, RD cost, statistics, motion search stuffs, etc.
+should be in this structure.
+@publishedAll
+*/
+
+
+typedef struct tagRDInfo
+{
+ int QP;
+ int actual_bits;
+ OsclFloat mad;
+ OsclFloat R_D;
+} RDInfo;
+
+typedef struct tagMultiPass
+{
+ /* multipass rate control data */
+ int target_bits; /* target bits for current frame, = rc->T */
+ int actual_bits; /* actual bits for current frame obtained after encoding, = rc->Rc*/
+ int QP; /* quantization level for current frame, = rc->Qc*/
+ int prev_QP; /* quantization level for previous frame */
+ int prev_prev_QP; /* quantization level for previous frame before last*/
+ OsclFloat mad; /* mad for current frame, = video->avgMAD*/
+ int bitrate; /* bitrate for current frame */
+ OsclFloat framerate; /* framerate for current frame*/
+
+ int nRe_Quantized; /* control variable for multipass encoding, */
+ /* 0 : first pass */
+ /* 1 : intermediate pass(quantization and VLC loop only) */
+ /* 2 : final pass(de-quantization, idct, etc) */
+ /* 3 : macroblock level rate control */
+
+ int encoded_frames; /* counter for all encoded frames */
+ int re_encoded_frames; /* counter for all multipass encoded frames*/
+ int re_encoded_times; /* counter for all times of multipass frame encoding */
+
+ /* Multiple frame prediction*/
+ RDInfo **pRDSamples; /* pRDSamples[30][32], 30->30fps, 32 -> 5 bit quantizer, 32 candidates*/
+ int framePos; /* specific position in previous multiple frames*/
+ int frameRange; /* number of overall previous multiple frames */
+ int samplesPerFrame[30]; /* number of samples per frame, 30->30fps */
+
+ /* Bit allocation for scene change frames and high motion frames */
+ OsclFloat sum_mad;
+ int counter_BTsrc; /* BT = Bit Transfer, bit transfer from low motion frames or less complicatedly compressed frames */
+ int counter_BTdst; /* BT = Bit Transfer, bit transfer to scene change frames or high motion frames or more complicatedly compressed frames */
+ OsclFloat sum_QP;
+ int diff_counter; /* diff_counter = -diff_counter_BTdst, or diff_counter_BTsrc */
+
+ /* For target bitrate or framerate update */
+ OsclFloat target_bits_per_frame; /* = C = bitrate/framerate */
+ OsclFloat target_bits_per_frame_prev; /* previous C */
+ OsclFloat aver_mad; /* so-far average mad could replace sum_mad */
+ OsclFloat aver_mad_prev; /* previous average mad */
+ int overlapped_win_size; /* transition period of time */
+ int encoded_frames_prev; /* previous encoded_frames */
+} MultiPass;
+
+
+typedef struct tagdataPointArray
+{
+ int Qp;
+ int Rp;
+ OsclFloat Mp; /* for MB-based RC */
+ struct tagdataPointArray *next;
+ struct tagdataPointArray *prev;
+} dataPointArray;
+
+typedef struct tagAVCRateControl
+{
+
+ /* these parameters are initialized by the users AVCEncParams */
+ /* bitrate-robustness tradeoff */
+ uint scdEnable; /* enable scene change detection */
+ int idrPeriod; /* IDR period in number of frames */
+ int intraMBRate; /* intra MB refresh rate per frame */
+ uint dpEnable; /* enable data partitioning */
+
+ /* quality-complexity tradeoff */
+ uint subPelEnable; /* enable quarter pel search */
+ int mvRange; /* motion vector search range in +/- pixel */
+ uint subMBEnable; /* enable sub MB prediction mode (4x4, 4x8, 8x4) */
+ uint rdOptEnable; /* enable RD-opt mode selection */
+ uint twoPass; /* flag for 2 pass encoding ( for future )*/
+ uint bidirPred; /* bi-directional prediction for B-frame. */
+
+ uint rcEnable; /* enable rate control, '1' on, '0' const QP */
+ int initQP; /* initial QP */
+
+ /* note the following 3 params are for HRD, these triplets can be a series
+ of triplets as the generalized HRD allows. SEI message must be generated in this case. */
+ /* We no longer have to differentiate between CBR and VBR. The users to the
+ AVC encoder lib will do the mapping from CBR/VBR to these parameters. */
+ int32 bitRate; /* target bit rate for the overall clip in bits/second*/
+ int32 cpbSize; /* coded picture buffer size in bytes */
+ int32 initDelayOffset; /* initial CBP removal delay in bits */
+
+ OsclFloat frame_rate; /* frame rate */
+ int srcInterval; /* source frame rate in msec */
+ int basicUnit; /* number of macroblocks per BU */
+
+ /* Then internal parameters for the operation */
+ uint first_frame; /* a flag for the first frame */
+ int lambda_mf; /* for example */
+ int totalSAD; /* SAD of current frame */
+
+ /*******************************************/
+ /* this part comes from MPEG4 rate control */
+ int alpha; /* weight for I frame */
+ int Rs; /*bit rate for the sequence (or segment) e.g., 24000 bits/sec */
+ int Rc; /*bits used for the current frame. It is the bit count obtained after encoding. */
+ int Rp; /*bits to be removed from the buffer per picture. */
+ /*? is this the average one, or just the bits coded for the previous frame */
+ int Rps; /*bit to be removed from buffer per src frame */
+ OsclFloat Ts; /*number of seconds for the sequence (or segment). e.g., 10 sec */
+ OsclFloat Ep;
+ OsclFloat Ec; /*mean absolute difference for the current frame after motion compensation.*/
+ /*If the macroblock is intra coded, the original spatial pixel values are summed.*/
+ int Qc; /*quantization level used for the current frame. */
+ int Nr; /*number of P frames remaining for encoding.*/
+ int Rr; /*number of bits remaining for encoding this sequence (or segment).*/
+ int Rr_Old;
+ int T; /*target bit to be used for the current frame.*/
+ int S; /*number of bits used for encoding the previous frame.*/
+ int Hc; /*header and motion vector bits used in the current frame. It includes all the information except to the residual information.*/
+ int Hp; /*header and motion vector bits used in the previous frame. It includes all the information except to the residual information.*/
+ int Ql; /*quantization level used in the previous frame */
+ int Bs; /*buffer size e.g., R/2 */
+ int B; /*current buffer level e.g., R/4 - start from the middle of the buffer */
+ OsclFloat X1;
+ OsclFloat X2;
+ OsclFloat X11;
+ OsclFloat M; /*safe margin for the buffer */
+ OsclFloat smTick; /*ratio of src versus enc frame rate */
+ double remnant; /*remainder frame of src/enc frame for fine frame skipping */
+ int timeIncRes; /* vol->timeIncrementResolution */
+
+ dataPointArray *end; /*quantization levels for the past (20) frames */
+
+ int frameNumber; /* ranging from 0 to 20 nodes*/
+ int w;
+ int Nr_Original;
+ int Nr_Old, Nr_Old2;
+ int skip_next_frame;
+ int Qdep; /* smooth Q adjustment */
+ int VBR_Enabled;
+
+ int totalFrameNumber; /* total coded frames, for debugging!!*/
+
+ char oFirstTime;
+
+ int numFrameBits; /* keep track of number of bits of the current frame */
+ int NumberofHeaderBits;
+ int NumberofTextureBits;
+ int numMBHeaderBits;
+ int numMBTextureBits;
+ double *MADofMB;
+ int32 bitsPerFrame;
+
+ /* BX rate control, something like TMN8 rate control*/
+
+ MultiPass *pMP;
+
+ int TMN_W;
+ int TMN_TH;
+ int VBV_fullness;
+ int max_BitVariance_num; /* the number of the maximum bit variance within the given buffer with the unit of 10% of bitrate/framerate*/
+ int encoded_frames; /* counter for all encoded frames */
+ int low_bound; /* bound for underflow detection, usually low_bound=-Bs/2, but could be changed in H.263 mode */
+ int VBV_fullness_offset; /* offset of VBV_fullness, usually is zero, but can be changed in H.263 mode*/
+ /* End BX */
+
+} AVCRateControl;
+
+
+/**
+This structure is for the motion vector information. */
+typedef struct tagMV
+{
+ int x;
+ int y;
+ uint sad;
+} AVCMV;
+
+/**
+This structure contains function pointers for different platform dependent implementation of
+functions. */
+typedef struct tagAVCEncFuncPtr
+{
+
+ int (*SAD_MB_HalfPel[4])(uint8*, uint8*, int, void *);
+ int (*SAD_Macroblock)(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+
+} AVCEncFuncPtr;
+
+/**
+This structure contains information necessary for correct padding.
+*/
+typedef struct tagPadInfo
+{
+ int i;
+ int width;
+ int j;
+ int height;
+} AVCPadInfo;
+
+
+#ifdef HTFM
+typedef struct tagHTFM_Stat
+{
+ int abs_dif_mad_avg;
+ uint countbreak;
+ int offsetArray[16];
+ int offsetRef[16];
+} HTFM_Stat;
+#endif
+
+
+/**
+This structure is the main object for AVC encoder library providing access to all
+global variables. It is allocated at PVAVCInitEncoder and freed at PVAVCCleanUpEncoder.
+@publishedAll
+*/
+typedef struct tagEncObject
+{
+
+ AVCCommonObj *common;
+
+ AVCEncBitstream *bitstream; /* for current NAL */
+ uint8 *overrunBuffer; /* extra output buffer to prevent current skip due to output buffer overrun*/
+ int oBSize; /* size of allocated overrun buffer */
+
+ /* rate control */
+ AVCRateControl *rateCtrl; /* pointer to the rate control structure */
+
+ /* encoding operation */
+ AVCEnc_State enc_state; /* encoding state */
+
+ AVCFrameIO *currInput; /* pointer to the current input frame */
+
+ int currSliceGroup; /* currently encoded slice group id */
+
+ int level[24][16], run[24][16]; /* scratch memory */
+ int leveldc[16], rundc[16]; /* for DC component */
+ int levelcdc[16], runcdc[16]; /* for chroma DC component */
+ int numcoefcdc[2]; /* number of coefficient for chroma DC */
+ int numcoefdc; /* number of coefficients for DC component */
+
+ int qp_const;
+ int qp_const_c;
+ /********* intra prediction scratch memory **********************/
+ uint8 pred_i16[AVCNumI16PredMode][256]; /* save prediction for MB */
+ uint8 pred_i4[AVCNumI4PredMode][16]; /* save prediction for blk */
+ uint8 pred_ic[AVCNumIChromaMode][128]; /* for 2 chroma */
+
+ int mostProbableI4Mode[16]; /* in raster scan order */
+ /********* motion compensation related variables ****************/
+ AVCMV *mot16x16; /* Saved motion vectors for 16x16 block*/
+ AVCMV(*mot16x8)[2]; /* Saved motion vectors for 16x8 block*/
+ AVCMV(*mot8x16)[2]; /* Saved motion vectors for 8x16 block*/
+ AVCMV(*mot8x8)[4]; /* Saved motion vectors for 8x8 block*/
+
+ /********* subpel position **************************************/
+ uint32 subpel_pred[SUBPEL_PRED_BLK_SIZE/*<<2*/]; /* all 16 sub-pel positions */
+ uint8 *hpel_cand[9]; /* pointer to half-pel position */
+ int best_hpel_pos; /* best position */
+ uint8 qpel_cand[8][24*16]; /* pointer to quarter-pel position */
+ int best_qpel_pos;
+ uint8 *bilin_base[9][4]; /* pointer to 4 position at top left of bilinear quarter-pel */
+
+ /* need for intra refresh rate */
+ uint8 *intraSearch; /* Intra Array for MBs to be intra searched */
+ uint firstIntraRefreshMBIndx; /* keep track for intra refresh */
+
+ int i4_sad; /* temporary for i4 mode SAD */
+ int *min_cost; /* Minimum cost for the all MBs */
+ int lambda_mode; /* Lagrange parameter for mode selection */
+ int lambda_motion; /* Lagrange parameter for MV selection */
+
+ uint8 *mvbits_array; /* Table for bits spent in the cost funciton */
+ uint8 *mvbits; /* An offset to the above array. */
+
+ /* to speedup the SAD calculation */
+ void *sad_extra_info;
+ uint8 currYMB[256]; /* interleaved current macroblock in HTFM order */
+
+#ifdef HTFM
+ int nrmlz_th[48]; /* Threshold for fast SAD calculation using HTFM */
+ HTFM_Stat htfm_stat; /* For statistics collection */
+#endif
+
+ /* statistics */
+ int numIntraMB; /* keep track of number of intra MB */
+
+ /* encoding complexity control */
+ uint fullsearch_enable; /* flag to enable full-pel full-search */
+
+ /* misc.*/
+ bool outOfBandParamSet; /* flag to enable out-of-band param set */
+
+ AVCSeqParamSet extSPS; /* for external SPS */
+ AVCPicParamSet extPPS; /* for external PPS */
+
+ /* time control */
+ uint32 prevFrameNum; /* previous frame number starting from modTimeRef */
+ uint32 modTimeRef; /* Reference modTime update every I-Vop*/
+ uint32 wrapModTime; /* Offset to modTime Ref, rarely used */
+
+ uint prevProcFrameNum; /* previously processed frame number, could be skipped */
+ uint prevCodedFrameNum; /* previously encoded frame number */
+ /* POC related variables */
+ uint32 dispOrdPOCRef; /* reference POC is displayer order unit. */
+
+ /* Function pointers */
+ AVCEncFuncPtr *functionPointer; /* store pointers to platform specific functions */
+
+ /* Application control data */
+ AVCHandle *avcHandle;
+
+
+} AVCEncObject;
+
+
+#endif /*AVCENC_INT_H_INCLUDED*/
+
diff --git a/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h
new file mode 100644
index 0000000..17e28ef
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/avcenc_lib.h
@@ -0,0 +1,1020 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/**
+This file contains declarations of internal functions for AVC decoder library.
+@publishedAll
+*/
+#ifndef AVCENC_LIB_H_INCLUDED
+#define AVCENC_LIB_H_INCLUDED
+
+#ifndef AVCLIB_COMMON_H_INCLUDED
+#include "avclib_common.h"
+#endif
+#ifndef AVCENC_INT_H_INCLUDED
+#include "avcenc_int.h"
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+ /*------------- block.c -------------------------*/
+
+ /**
+ This function perform residue calculation, transform, quantize, inverse quantize,
+ inverse transform and residue compensation on a 4x4 block.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "blkidx" "raster scan block index of the current 4x4 block."
+ \param "cur" "Pointer to the reconstructed block."
+ \param "org" "Pointer to the original block."
+ \param "coef_cost" "Pointer to the coefficient cost to be filled in and returned."
+ \return "Number of non-zero coefficients."
+ */
+ int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost);
+
+ /**
+ This function performs IDCT on an INTER macroblock.
+ \param "video" "Pointer to AVCCommonObj."
+ \param "curL" "Pointer to the origin of the macroblock on the current frame."
+ \param "currMB" "Pointer to the AVCMacroblock structure."
+ \param "picPitch" "Pitch of the current frame."
+ \return "void".
+ */
+ void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch);
+
+ /**
+ This function perform residue calculation, transform, quantize, inverse quantize,
+ inverse transform and residue compensation on a macroblock.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "curL" "Pointer to the reconstructed MB."
+ \param "orgL" "Pointer to the original MB."
+ \return "void"
+ */
+ void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL);
+
+ /**
+ This function perform residue calculation, transform, quantize, inverse quantize,
+ inverse transform and residue compensation for chroma components of an MB.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "curC" "Pointer to the reconstructed MB."
+ \param "orgC" "Pointer to the original MB."
+ \param "cr" "Flag whether it is Cr or not."
+ \return "void"
+ */
+ void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr);
+
+ /*----------- init.c ------------------*/
+ /**
+ This function interprets the encoding parameters provided by users in encParam.
+ The results are kept in AVCEncObject, AVCSeqParamSet, AVCPicParamSet and AVCSliceHeader.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "encParam" "Pointer to AVCEncParam."
+ \param "extSPS" "External SPS template to be followed. NULL if not present."
+ \param "extPPS" "External PPS template to be followed. NULL if not present."
+ \return "see AVCEnc_Status."
+ */
+ AVCEnc_Status SetEncodeParam(AVCHandle *avcHandle, AVCEncParams *encParam,
+ void *extSPS, void *extPPS);
+
+ /**
+ This function verifies the encoding parameters whether they meet the set of supported
+ tool by a specific profile. If the profile is not set, it will just find the closest
+ profile instead of verifying it.
+ \param "video" "Pointer to AVCEncObject."
+ \param "seqParam" "Pointer to AVCSeqParamSet."
+ \param "picParam" "Pointer to AVCPicParamSet."
+ \return "AVCENC_SUCCESS if success,
+ AVCENC_PROFILE_NOT_SUPPORTED if the specified profile
+ is not supported by this version of the library,
+ AVCENC_TOOLS_NOT_SUPPORTED if any of the specified encoding tools are
+ not supported by the user-selected profile."
+ */
+ AVCEnc_Status VerifyProfile(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam);
+
+ /**
+ This function verifies the encoding parameters whether they meet the requirement
+ for a specific level. If the level is not set, it will just find the closest
+ level instead of verifying it.
+ \param "video" "Pointer to AVCEncObject."
+ \param "seqParam" "Pointer to AVCSeqParamSet."
+ \param "picParam" "Pointer to AVCPicParamSet."
+ \return "AVCENC_SUCCESS if success,
+ AVCENC_LEVEL_NOT_SUPPORTED if the specified level
+ is not supported by this version of the library,
+ AVCENC_LEVEL_FAIL if any of the encoding parameters exceed
+ the range of the user-selected level."
+ */
+ AVCEnc_Status VerifyLevel(AVCEncObject *video, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam);
+
+ /**
+ This funciton initializes the frame encoding by setting poc/frame_num related parameters. it
+ also performs motion estimation.
+ \param "encvid" "Pointer to the AVCEncObject."
+ \return "AVCENC_SUCCESS if success, AVCENC_NO_PICTURE if there is no input picture
+ in the queue to encode, AVCENC_POC_FAIL or AVCENC_CONSECUTIVE_NONREF for POC
+ related errors, AVCENC_NEW_IDR if new IDR is detected."
+ */
+ AVCEnc_Status InitFrame(AVCEncObject *encvid);
+
+ /**
+ This function initializes slice header related variables and other variables necessary
+ for decoding one slice.
+ \param "encvid" "Pointer to the AVCEncObject."
+ \return "AVCENC_SUCCESS if success."
+ */
+ AVCEnc_Status InitSlice(AVCEncObject *encvid);
+
+ /*----------- header.c ----------------*/
+ /**
+ This function performs bitstream encoding of the sequence parameter set NAL.
+ \param "encvid" "Pointer to the AVCEncObject."
+ \param "stream" "Pointer to AVCEncBitstream."
+ \return "AVCENC_SUCCESS if success or AVCENC_SPS_FAIL or others for unexpected failure which
+ should not occur. The SPS parameters should all be verified before this function is called."
+ */
+ AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+ /**
+ This function encodes the VUI parameters into the sequence parameter set bitstream.
+ \param "stream" "Pointer to AVCEncBitstream."
+ \param "vui" "Pointer to AVCVUIParams."
+ \return "nothing."
+ */
+ void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui);
+
+ /**
+ This function encodes HRD parameters into the sequence parameter set bitstream
+ \param "stream" "Pointer to AVCEncBitstream."
+ \param "hrd" "Pointer to AVCHRDParams."
+ \return "nothing."
+ */
+ void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd);
+
+
+ /**
+ This function performs bitstream encoding of the picture parameter set NAL.
+ \param "encvid" "Pointer to the AVCEncObject."
+ \param "stream" "Pointer to AVCEncBitstream."
+ \return "AVCENC_SUCCESS if success or AVCENC_PPS_FAIL or others for unexpected failure which
+ should not occur. The SPS parameters should all be verified before this function is called."
+ */
+ AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+ /**
+ This function encodes slice header information which has been initialized or fabricated
+ prior to entering this funciton.
+ \param "encvid" "Pointer to the AVCEncObject."
+ \param "stream" "Pointer to AVCEncBitstream."
+ \return "AVCENC_SUCCESS if success or bitstream fail statuses."
+ */
+ AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream);
+
+ /**
+ This function encodes reference picture list reordering relted syntax.
+ \param "video" "Pointer to AVCCommonObj."
+ \param "stream" "Pointer to AVCEncBitstream."
+ \param "sliceHdr" "Pointer to AVCSliceHdr."
+ \param "slice_type" "Value of slice_type - 5 if greater than 5."
+ \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise."
+ */
+ AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type);
+
+ /**
+ This function encodes dec_ref_pic_marking related syntax.
+ \param "video" "Pointer to AVCCommonObj."
+ \param "stream" "Pointer to AVCEncBitstream."
+ \param "sliceHdr" "Pointer to AVCSliceHdr."
+ \return "AVCENC_SUCCESS for success and AVCENC_FAIL otherwise."
+ */
+ AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr);
+
+ /**
+ This function initializes the POC related variables and the POC syntax to be encoded
+ to the slice header derived from the disp_order and is_reference flag of the original
+ input frame to be encoded.
+ \param "video" "Pointer to the AVCEncObject."
+ \return "AVCENC_SUCCESS if success,
+ AVCENC_POC_FAIL if the poc type is undefined or
+ AVCENC_CONSECUTIVE_NONREF if there are consecutive non-reference frame for POC type 2."
+ */
+ AVCEnc_Status InitPOC(AVCEncObject *video);
+
+ /**
+ This function performs POC related operation after a picture is decoded.
+ \param "video" "Pointer to AVCCommonObj."
+ \return "AVCENC_SUCCESS"
+ */
+ AVCEnc_Status PostPOC(AVCCommonObj *video);
+
+ /*----------- bitstream_io.c ----------------*/
+ /**
+ This function initializes the bitstream structure with the information given by
+ the users.
+ \param "bitstream" "Pointer to the AVCEncBitstream structure."
+ \param "buffer" "Pointer to the unsigned char buffer for output."
+ \param "buf_size" "The size of the buffer in bytes."
+ \param "overrunBuffer" "Pointer to extra overrun buffer."
+ \param "oBSize" "Size of overrun buffer in bytes."
+ \return "AVCENC_SUCCESS if success, AVCENC_BITSTREAM_INIT_FAIL if fail"
+ */
+ AVCEnc_Status BitstreamEncInit(AVCEncBitstream *bitstream, uint8 *buffer, int buf_size,
+ uint8 *overrunBuffer, int oBSize);
+
+ /**
+ This function writes the data from the cache into the bitstream buffer. It also adds the
+ emulation prevention code if necessary.
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+ */
+ AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream);
+
+ /**
+ This function writes the codeword into the cache which will eventually be written to
+ the bitstream buffer.
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \param "nBits" "Number of bits in the codeword."
+ \param "code" "The codeword."
+ \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+ */
+ AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code);
+
+ /**
+ This function writes one bit of data into the cache which will eventually be written
+ to the bitstream buffer.
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \param "code" "The codeword."
+ \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+ */
+ AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code);
+
+ /**
+ This function adds trailing bits to the bitstream and reports back the final EBSP size.
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \param "nal_size" "Output the final NAL size."
+ \return "AVCENC_SUCCESS if success or AVCENC_BITSTREAM_BUFFER_FULL if fail."
+ */
+ AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size);
+
+ /**
+ This function checks whether the current bit position is byte-aligned or not.
+ \param "stream" "Pointer to the bitstream structure."
+ \return "true if byte-aligned, false otherwise."
+ */
+ bool byte_aligned(AVCEncBitstream *stream);
+
+
+ /**
+ This function checks the availability of overrun buffer and switches to use it when
+ normal bufffer is not big enough.
+ \param "stream" "Pointer to the bitstream structure."
+ \param "numExtraBytes" "Number of extra byte needed."
+ \return "AVCENC_SUCCESS or AVCENC_FAIL."
+ */
+ AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes);
+
+
+ /*-------------- intra_est.c ---------------*/
+
+ /** This function performs intra/inter decision based on ABE.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "min_cost" "Best inter cost."
+ \param "curL" "Pointer to the current MB origin in reconstructed frame."
+ \param "picPitch" "Pitch of the reconstructed frame."
+ \return "Boolean for intra mode."
+ */
+
+//bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch);
+ bool IntraDecision(int *min_cost, uint8 *cur, int pitch, bool ave);
+
+ /**
+ This function performs intra prediction mode search.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "mbnum" "Current MB number."
+ \param "curL" "Pointer to the current MB origin in reconstructed frame."
+ \param "picPitch" "Pitch of the reconstructed frame."
+ \return "void."
+ */
+ void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch);
+
+ /**
+ This function generates all the I16 prediction modes for an MB and keep it in
+ encvid->pred_i16.
+ \param "encvid" "Pointer to AVCEncObject."
+ \return "void"
+ */
+ void intrapred_luma_16x16(AVCEncObject *encvid);
+
+ /**
+ This function calculate the cost of all I16 modes and compare them to get the minimum.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "orgY" "Pointer to the original luma MB."
+ \param "min_cost" "Pointer to the minimal cost so-far."
+ \return "void"
+ */
+ void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost);
+
+ /**
+ This function calculates the cost of each I16 mode.
+ \param "org" "Pointer to the original luma MB."
+ \param "org_pitch" "Stride size of the original frame."
+ \param "pred" "Pointer to the prediction values."
+ \param "min_cost" "Minimal cost so-far."
+ \return "Cost"
+ */
+
+ int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost);
+
+ /**
+ This function generates all the I4 prediction modes and select the best one
+ for all the blocks inside a macroblock.It also calls dct_luma to generate the reconstructed
+ MB, and transform coefficients to be encoded.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "min_cost" "Pointer to the minimal cost so-far."
+ \return "void"
+ */
+ void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost);
+
+ /**
+ This function calculates the most probable I4 mode of a given 4x4 block
+ from neighboring informationaccording to AVC/H.264 standard.
+ \param "video" "Pointer to AVCCommonObj."
+ \param "blkidx" "The current block index."
+ \return "Most probable mode."
+ */
+ int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx);
+
+ /**
+ This function is where a lot of actions take place in the 4x4 block level inside
+ mb_intra4x4_search.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "blkidx" "The current 4x4 block index."
+ \param "cur" "Pointer to the reconstructed block."
+ \param "org" "Pointer to the original block."
+ \return "Minimal cost, also set currMB->i4Mode"
+ */
+ int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org);
+
+ /**
+ This function calculates the cost of a given I4 prediction mode.
+ \param "org" "Pointer to the original block."
+ \param "org_pitch" "Stride size of the original frame."
+ \param "pred" "Pointer to the prediction block. (encvid->pred_i4)"
+ \param "cost" "Pointer to the minimal cost (to be updated)."
+ \return "void"
+ */
+ void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost);
+
+ /**
+ This function performs chroma intra search. Each mode is saved in encvid->pred_ic.
+ \param "encvid" "Pointer to AVCEncObject."
+ \return "void"
+ */
+ void chroma_intra_search(AVCEncObject *encvid);
+
+ /**
+ This function calculates the cost of a chroma prediction mode.
+ \param "orgCb" "Pointer to the original Cb block."
+ \param "orgCr" "Pointer to the original Cr block."
+ \param "org_pitch" "Stride size of the original frame."
+ \param "pred" "Pointer to the prediction block (encvid->pred_ic)"
+ \param "mincost" "Minimal cost so far."
+ \return "Cost."
+ */
+
+ int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int mincost);
+
+ /*-------------- motion_comp.c ---------------*/
+
+ /**
+ This is a main function to peform inter prediction.
+ \param "encvid" "Pointer to AVCEncObject."
+ \param "video" "Pointer to AVCCommonObj."
+ \return "void".
+ */
+ void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video);
+
+
+ /**
+ This function is called for luma motion compensation.
+ \param "ref" "Pointer to the origin of a reference luma."
+ \param "picwidth" "Width of the picture."
+ \param "picheight" "Height of the picture."
+ \param "x_pos" "X-coordinate of the predicted block in quarter pel resolution."
+ \param "y_pos" "Y-coordinate of the predicted block in quarter pel resolution."
+ \param "pred" "Pointer to the output predicted block."
+ \param "pred_pitch" "Width of pred."
+ \param "blkwidth" "Width of the current partition."
+ \param "blkheight" "Height of the current partition."
+ \return "void"
+ */
+ void eLumaMotionComp(uint8 *ref, int picwidth, int picheight,
+ int x_pos, int y_pos,
+ uint8 *pred, int pred_pitch,
+ int blkwidth, int blkheight);
+
+ void eFullPelMC(uint8 *in, int inwidth, uint8 *out, int outpitch,
+ int blkwidth, int blkheight);
+
+ void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dx);
+
+ void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dx);
+
+ void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
+ int blkwidth, int blkheight);
+
+ void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dy);
+
+ void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
+ int blkwidth, int blkheight);
+
+ void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dy);
+
+ void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
+ uint8 *out, int outpitch,
+ int blkwidth, int blkheight);
+
+ void eChromaMotionComp(uint8 *ref, int picwidth, int picheight,
+ int x_pos, int y_pos, uint8 *pred, int pred_pitch,
+ int blkwidth, int blkheight);
+
+ void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+ void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight);
+
+
+ /*-------------- motion_est.c ---------------*/
+
+ /**
+ Allocate and initialize arrays necessary for motion search algorithm.
+ \param "envid" "Pointer to AVCEncObject."
+ \return "AVC_SUCCESS or AVC_MEMORY_FAIL."
+ */
+ AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle);
+
+ /**
+ Clean up memory allocated in InitMotionSearchModule.
+ \param "envid" "Pointer to AVCEncObject."
+ \return "void."
+ */
+ void CleanMotionSearchModule(AVCHandle *avcHandle);
+
+
+ /**
+ This function performs motion estimation of all macroblocks in a frame during the InitFrame.
+ The goal is to find the best MB partition for inter and find out if intra search is needed for
+ any MBs. This intra MB tendency can be used for scene change detection.
+ \param "encvid" "Pointer to AVCEncObject."
+ \return "void"
+ */
+ void AVCMotionEstimation(AVCEncObject *encvid);
+
+ /**
+ This function performs repetitive edge padding to the reference picture by adding 16 pixels
+ around the luma and 8 pixels around the chromas.
+ \param "refPic" "Pointer to the reference picture."
+ \return "void"
+ */
+ void AVCPaddingEdge(AVCPictureData *refPic);
+
+ /**
+ This function keeps track of intra refresh macroblock locations.
+ \param "encvid" "Pointer to the global array structure AVCEncObject."
+ \param "mblock" "Pointer to the array of AVCMacroblock structures."
+ \param "totalMB" "Total number of MBs in a frame."
+ \param "numRefresh" "Number of MB to be intra refresh in a single frame."
+ \return "void"
+ */
+ void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh);
+
+#ifdef HTFM
+ void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect);
+ void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat);
+ void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[]);
+ void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch);
+#endif
+
+ /**
+ This function reads the input MB into a smaller faster memory space to minimize the cache miss.
+ \param "encvid" "Pointer to the global AVCEncObject."
+ \param "cur" "Pointer to the original input macroblock."
+ \param "pitch" "Stride size of the input frame (luma)."
+ \return "void"
+ */
+ void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch);
+
+ /**
+ Performs motion vector search for a macroblock.
+ \param "encvid" "Pointer to AVCEncObject structure."
+ \param "cur" "Pointer to the current macroblock in the input frame."
+ \param "best_cand" "Array of best candidates (to be filled in and returned)."
+ \param "i0" "X-coordinate of the macroblock."
+ \param "j0" "Y-coordinate of the macroblock."
+ \param "type_pred" "Indicates the type of operations."
+ \param "FS_en" "Flag for fullsearch enable."
+ \param "hp_guess" "Guess for half-pel search."
+ \return "void"
+ */
+ void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
+ int i0, int j0, int type_pred, int FS_en, int *hp_guess);
+
+//AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
+// int num_pass);
+
+ /**
+ Perform full-pel exhaustive search around the predicted MV.
+ \param "encvid" "Pointer to AVCEncObject structure."
+ \param "prev" "Pointer to the reference frame."
+ \param "cur" "Pointer to the input macroblock."
+ \param "imin" "Pointer to minimal mv (x)."
+ \param "jmin" "Pointer to minimal mv (y)."
+ \param "ilow, ihigh, jlow, jhigh" "Lower bound on search range."
+ \param "cmvx, cmvy" "Predicted MV value."
+
+ \return "The cost function of the best candidate."
+ */
+ int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
+ int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
+ int cmvx, int cmvy);
+
+ /**
+ Select candidates from neighboring blocks according to the type of the
+ prediction selection.
+ \param "mvx" "Pointer to the candidate, x-coordinate."
+ \param "mvy" "Pointer to the candidate, y-coordinate."
+ \param "num_can" "Pointer to the number of candidates returned."
+ \param "imb" "The MB index x-coordinate."
+ \param "jmb" "The MB index y-coordinate."
+ \param "type_pred" "Type of the prediction."
+ \param "cmvx, cmvy" "Pointer to predicted MV (modified version)."
+ \return "void."
+ */
+ void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
+ AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy);
+
+ /**
+ Utility function to move the values in the array dn according to the new
+ location to avoid redundant calculation.
+ \param "dn" "Array of integer of size 9."
+ \param "new_loc" "New location index."
+ \return "void."
+ */
+ void AVCMoveNeighborSAD(int dn[], int new_loc);
+
+ /**
+ Find minimum index of dn.
+ \param "dn" "Array of integer of size 9."
+ \return "The index of dn with the smallest dn[] value."
+ */
+ int AVCFindMin(int dn[]);
+
+
+ /*------------- findhalfpel.c -------------------*/
+
+ /**
+ Search for the best half-pel resolution MV around the full-pel MV.
+ \param "encvid" "Pointer to the global AVCEncObject structure."
+ \param "cur" "Pointer to the current macroblock."
+ \param "mot" "Pointer to the AVCMV array of the frame."
+ \param "ncand" "Pointer to the origin of the fullsearch result."
+ \param "xpos" "The current MB position in x."
+ \param "ypos" "The current MB position in y."
+ \param "hp_guess" "Input to help speedup the search."
+ \param "cmvx, cmvy" "Predicted motion vector use for mvcost."
+ \return "Minimal cost (SATD) without MV cost. (for rate control purpose)"
+ */
+ int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand,
+ int xpos, int ypos, int hp_guess, int cmvx, int cmvy);
+
+ /**
+ This function generates sub-pel pixels required to do subpel MV search.
+ \param "subpel_pred" "Pointer to 2-D array, each array for each position."
+ \param "ncand" "Pointer to the full-pel center position in ref frame."
+ \param "lx" "Pitch of the ref frame."
+ \return "void"
+ */
+ void GenerateHalfPelPred(uint8 *subpel_pred, uint8 *ncand, int lx);
+
+ /**
+ This function calculate vertical interpolation at half-point of size 4x17.
+ \param "dst" "Pointer to destination."
+ \param "ref" "Pointer to the starting reference pixel."
+ \return "void."
+ */
+ void VertInterpWClip(uint8 *dst, uint8 *ref);
+
+ /**
+ This function generates quarter-pel pixels around the best half-pel result
+ during the sub-pel MV search.
+ \param "bilin_base" "Array of pointers to be used as basis for q-pel interp."
+ \param "qpel_pred" "Array of pointers pointing to quarter-pel candidates."
+ \param "hpel_pos" "Best half-pel position at the center."
+ \return "void"
+ */
+ void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_pred, int hpel_pos);
+
+ /**
+ This function calculates the SATD of a subpel candidate.
+ \param "cand" "Pointer to a candidate."
+ \param "cur" "Pointer to the current block."
+ \param "dmin" "Min-so-far SATD."
+ \return "Sum of Absolute Transformed Difference."
+ */
+ int SATD_MB(uint8 *cand, uint8 *cur, int dmin);
+
+ /*------------- rate_control.c -------------------*/
+
+ /** This function is a utility function. It returns average QP of the previously encoded frame.
+ \param "rateCtrl" "Pointer to AVCRateControl structure."
+ \return "Average QP."
+ */
+ int GetAvgFrameQP(AVCRateControl *rateCtrl);
+
+ /**
+ This function takes the timestamp of the input and determine whether it should be encoded
+ or skipped.
+ \param "encvid" "Pointer to the AVCEncObject structure."
+ \param "rateCtrl" "Pointer to the AVCRateControl structure."
+ \param "modTime" "The 32 bit timestamp of the input frame."
+ \param "frameNum" "Pointer to the frame number if to be encoded."
+ \return "AVC_SUCCESS or else."
+ */
+ AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum);
+
+ /**
+ This function updates the buffer fullness when frames are dropped either by the
+ rate control algorithm or by the users to make sure that target bit rate is still met.
+ \param "video" "Pointer to the common object structure."
+ \param "rateCtrl" "Pointer to rate control structure."
+ \param "frameInc" "Difference of the current frame number and previous frame number."
+ \return "void."
+ */
+ void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc);
+
+ /**
+ This function initializes rate control module and allocates necessary bufferes to do the job.
+ \param "avcHandle" "Pointer to the encoder handle."
+ \return "AVCENC_SUCCESS or AVCENC_MEMORY_FAIL."
+ */
+ AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle);
+
+ /**
+ This function frees buffers allocated in InitRateControlModule.
+ \param "avcHandle" "Pointer to the encoder handle."
+ \return "void."
+ */
+ void CleanupRateControlModule(AVCHandle *avcHandle);
+
+ /**
+ This function is called at the beginning of each GOP or the first IDR frame. It calculates
+ target bits for a GOP.
+ \param "encvid" "Pointer to the encoder object."
+ \return "void."
+ */
+ void RCInitGOP(AVCEncObject *encvid);
+
+ /**
+ This function calculates target bits for a particular frame.
+ \param "video" "Pointer to the AVCEncObject structure."
+ \return "void"
+ */
+ void RCInitFrameQP(AVCEncObject *video);
+
+ /**
+ This function calculates QP for the upcoming frame or basic unit.
+ \param "encvid" "Pointer to the encoder object."
+ \param "rateCtrl" "Pointer to the rate control object."
+ \return "QP value ranging from 0-51."
+ */
+ int RCCalculateQP(AVCEncObject *encvid, AVCRateControl *rateCtrl);
+
+ /**
+ This function translates the luma QP to chroma QP and calculates lambda based on QP.
+ \param "video" "Pointer to the AVCEncObject structure."
+ \return "void"
+ */
+ void RCInitChromaQP(AVCEncObject *encvid);
+
+ /**
+ This function is called before encoding each macroblock.
+ \param "encvid" "Pointer to the encoder object."
+ \return "void."
+ */
+ void RCInitMBQP(AVCEncObject *encvid);
+
+ /**
+ This function updates bits usage stats after encoding an macroblock.
+ \param "video" "Pointer to AVCCommonObj."
+ \param "rateCtrl" "Pointer to AVCRateControl."
+ \param "num_header_bits" "Number of bits used for MB header."
+ \param "num_texture_bits" "Number of bits used for MB texture."
+ \return "void"
+ */
+ void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits);
+
+ /**
+ This function calculates the difference between prediction and original MB.
+ \param "encvid" "Pointer to the encoder object."
+ \param "currMB" "Pointer to the current macroblock structure."
+ \param "orgL" "Pointer to the original MB."
+ \param "orgPitch" "Pointer to the original picture pitch."
+ \return "void."
+ */
+ void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch);
+
+ /**
+ Restore QP related parameters of previous MB when current MB is skipped.
+ \param "currMB" "Pointer to the current macroblock."
+ \param "video" "Pointer to the common video structure."
+ \param "encvid" "Pointer to the global encoding structure."
+ \return "void"
+ */
+ void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid);
+
+ /**
+ This function is called after done with a frame.
+ \param "encvid" "Pointer to the encoder object."
+ \return "AVCENC_SUCCESS or AVCENC_SKIPPED_PICTURE when bufer overflow (need to discard current frame)."
+ */
+ AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid);
+
+ /*--------- residual.c -------------------*/
+
+ /**
+ This function encodes the intra pcm data and fill it in the corresponding location
+ on the current picture.
+ \param "video" "Pointer to AVCEncObject."
+ \return "AVCENC_SUCCESS if success, or else for bitstream errors."
+ */
+ AVCEnc_Status EncodeIntraPCM(AVCEncObject *video);
+
+ /**
+ This function performs CAVLC syntax encoding on the run and level information of the coefficients.
+ The level and run arrays are elements in AVCEncObject structure, populated by TransQuantZZ,
+ TransQuantIntraDC and TransQuantChromaDC functions.
+ \param "video" "Pointer to AVCEncObject."
+ \param "type" "One of AVCResidualType for a particular 4x4 block."
+ \param "bindx" "Block index or number of nonzero coefficients for AVC_Intra16DC and AVC_ChromaDC mode."
+ \param "currMB" "Pointer to the current macroblock structure."
+ \return "AVCENC_SUCCESS for success."
+ \Note "This function has 32-bit machine specific instruction!!!!"
+ */
+ AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int bindx, AVCMacroblock *currMB);
+
+
+ /*------------- sad.c ---------------------------*/
+
+
+ int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+
+#ifdef HTFM /* 3/2/1, Hypothesis Testing Fast Matching */
+ int AVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+ int AVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+ int AVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_x, void *extra_info);
+ int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+ int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
+#endif
+
+
+ /*------------- slice.c -------------------------*/
+
+ /**
+ This function performs the main encoding loop for a slice.
+ \param "encvid" "Pointer to AVCEncObject."
+ \return "AVCENC_SUCCESS for success, AVCENC_PICTURE_READY for end-of-picture and
+ AVCENC_FAIL or AVCENC_SLICE_EMPTY otherwise."
+ */
+ AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid);
+
+ /**
+ This function performs the main encoding operation for one macroblock.
+ \param "video" "pointer to AVCEncObject."
+ \return "AVCENC_SUCCESS for success, or other bitstream related failure status."
+ */
+ AVCEnc_Status EncodeMB(AVCEncObject *video);
+
+ /**
+ This function calls prediction INTRA/INTER functions, transform,
+ quantization and zigzag scanning to get the run-level symbols.
+ \param "encvid" "pointer to AVCEncObject."
+ \param "curL" "pointer to Luma component of the current frame.
+ \param "curCb" "pointer to Cb component of the current frame.
+ \param "curCr" "pointer to Cr component of the current frame.
+ \return "void for now."
+ */
+ void MBPredTransQuantZZ(AVCEncObject *encvid, uint8 *curL, uint8 *curCb, uint8 *curCr);
+
+ /**
+ This function copies the content of the prediction MB into the reconstructed YUV
+ frame directly.
+ \param "curL" "Pointer to the destination Y component."
+ \param "curCb" "Pointer to the destination Cb component."
+ \param "curCr" "Pointer to the destination Cr component."
+ \param "predBlock" "Pointer to the prediction MB."
+ \param "picWidth" "The width of the frame."
+ \return "None."
+ */
+ void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picWidth);
+
+ /**
+ This function encodes the mb_type, CBP, prediction mode, ref idx and MV.
+ \param "currMB" "Pointer to the current macroblock structure."
+ \param "video" "Pointer to the AVCEncObject structure."
+ \return "AVCENC_SUCCESS for success or else for fail."
+ */
+ AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *video);
+
+ /**
+ This function finds the right mb_type for a macroblock given the mbMode, CBP,
+ NumPart, PredPartMode.
+ \param "currMB" "Pointer to the current macroblock structure."
+ \param "slice_type" "Value of the slice_type."
+ \return "mb_type."
+ */
+ uint InterpretMBType(AVCMacroblock *currMB, int slice_type);
+
+ /**
+ This function encodes the mb_pred part of the macroblock data.
+ \param "video" "Pointer to the AVCCommonObj structure."
+ \param "currMB" "Pointer to the current macroblock structure."
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \return "AVCENC_SUCCESS for success or bitstream fail status."
+ */
+ AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+ /**
+ This function encodes the sub_mb_pred part of the macroblock data.
+ \param "video" "Pointer to the AVCCommonObj structure."
+ \param "currMB" "Pointer to the current macroblock structure."
+ \param "stream" "Pointer to the AVCEncBitstream structure."
+ \return "AVCENC_SUCCESS for success or bitstream fail status."
+ */
+ AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+ /**
+ This function interprets the sub_mb_type and sets necessary information
+ when the slice type is AVC_P_SLICE.
+ in the macroblock structure.
+ \param "mblock" "Pointer to current AVCMacroblock."
+ \param "sub_mb_type" "From the syntax bitstream."
+ \return "void"
+ */
+ void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type);
+
+ /**
+ This function interprets the sub_mb_type and sets necessary information
+ when the slice type is AVC_B_SLICE.
+ in the macroblock structure.
+ \param "mblock" "Pointer to current AVCMacroblock."
+ \param "sub_mb_type" "From the syntax bitstream."
+ \return "void"
+ */
+ void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type);
+
+ /**
+ This function encodes intra 4x4 mode. It calculates the predicted I4x4 mode and the
+ remnant to be encoded.
+ \param "video" "Pointer to AVCEncObject structure."
+ \param "currMB" "Pointer to the AVCMacroblock structure."
+ \param "stream" "Pointer to AVCEncBitstream sructure."
+ \return "AVCENC_SUCCESS for success."
+ */
+ AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+ /*------------- vlc_encode.c -----------------------*/
+ /**
+ This function encodes and writes a value into an Exp-Golomb codeword.
+ \param "bitstream" "Pointer to AVCEncBitstream."
+ \param "codeNum" "Pointer to the value of the codeNum."
+ \return "AVCENC_SUCCESS for success or bitstream error messages for fail."
+ */
+ AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum);
+
+ /**
+ This function maps and encodes signed Exp-Golomb codes.
+ \param "bitstream" "Pointer to AVCEncBitstream."
+ \param "value" "Pointer to syntax element value."
+ \return "AVCENC_SUCCESS or AVCENC_FAIL."
+ */
+ AVCEnc_Status se_v(AVCEncBitstream *bitstream, int value);
+
+ /**
+ This function maps and encodes truncated Exp-Golomb codes.
+ \param "bitstream" "Pointer to AVCEncBitstream."
+ \param "value" "Pointer to syntax element value."
+ \param "range" "Range of the value as input to determine the algorithm."
+ \return "AVCENC_SUCCESS or AVCENC_FAIL."
+ */
+ AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range);
+
+ /**
+ This function creates Exp-Golomb codeword from codeNum.
+ \param "bitstream" "Pointer to AVCEncBitstream."
+ \param "codeNum" "Pointer to the codeNum value."
+ \return "AVCENC_SUCCESS for success or bitstream error messages for fail."
+ */
+ AVCEnc_Status SetEGBitstring(AVCEncBitstream *bitstream, uint codeNum);
+
+ /**
+ This function performs CAVLC encoding of the CBP (coded block pattern) of a macroblock
+ by calling ue_v() and then mapping the CBP to the corresponding VLC codeNum.
+ \param "currMB" "Pointer to the current AVCMacroblock structure."
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \return "void"
+ */
+ AVCEnc_Status EncodeCBP(AVCMacroblock *currMB, AVCEncBitstream *stream);
+
+ /**
+ This function encodes trailing ones and total coefficient.
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \param "TrailingOnes" "The trailing one variable output."
+ \param "TotalCoeff" "The total coefficient variable output."
+ \param "nC" "Context for number of nonzero coefficient (prediction context)."
+ \return "AVCENC_SUCCESS for success or else for bitstream failure."
+ */
+ AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC);
+
+ /**
+ This function encodes trailing ones and total coefficient for chroma DC block.
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \param "TrailingOnes" "The trailing one variable output."
+ \param "TotalCoeff" "The total coefficient variable output."
+ \return "AVCENC_SUCCESS for success or else for bitstream failure."
+ */
+ AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff);
+
+ /**
+ This function encodes total_zeros value as in Table 9-7 and 9-8.
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \param "TotalZeros" "The total_zeros value."
+ \param "TotalCoeff" "The total coefficient variable output."
+ \return "AVCENC_SUCCESS for success or else for bitstream failure."
+ */
+ AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff);
+
+ /**
+ This function encodes total_zeros VLC syntax for chroma DC as in Table 9-9.
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \param "TotalZeros" "The total_zeros value."
+ \param "TotalCoeff" "The total coefficient variable output."
+ \return "AVCENC_SUCCESS for success or else for bitstream failure."
+ */
+ AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff);
+
+ /**
+ This function encodes run_before VLC syntax as in Table 9-10.
+ \param "stream" "Pointer to the AVCEncBitstream."
+ \param "run_before" "The run_before value."
+ \param "zerosLeft" "The context for number of zeros left."
+ \return "AVCENC_SUCCESS for success or else for bitstream failure."
+ */
+ AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _AVCENC_LIB_H_ */
+
diff --git a/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp
new file mode 100644
index 0000000..75ab514
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/bitstream_io.cpp
@@ -0,0 +1,336 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define WORD_SIZE 32
+
+/* array for trailing bit pattern as function of number of bits */
+/* the first one is unused. */
+const static uint8 trailing_bits[9] = {0, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+
+/* ======================================================================== */
+/* Function : BitstreamInit() */
+/* Date : 11/4/2003 */
+/* Purpose : Populate bitstream structure with bitstream buffer and size */
+/* it also initializes internal data */
+/* In/out : */
+/* Return : AVCENC_SUCCESS if successed, AVCENC_FAIL if failed. */
+/* Modified : */
+/* ======================================================================== */
+/* |--------|--------|----~~~~~-----|---------|---------|---------|
+ ^ ^write_pos ^buf_size
+ bitstreamBuffer <--------->
+ current_word
+
+ |-----xxxxxxxxxxxxx| = current_word 32 or 16 bits
+ <---->
+ bit_left
+ ======================================================================== */
+
+AVCEnc_Status BitstreamEncInit(AVCEncBitstream *stream, uint8 *buffer, int buf_size,
+ uint8 *overrunBuffer, int oBSize)
+{
+ if (stream == NULL || buffer == NULL || buf_size <= 0)
+ {
+ return AVCENC_BITSTREAM_INIT_FAIL;
+ }
+
+ stream->bitstreamBuffer = buffer;
+
+ stream->buf_size = buf_size;
+
+ stream->write_pos = 0;
+
+ stream->count_zeros = 0;
+
+ stream->current_word = 0;
+
+ stream->bit_left = WORD_SIZE;
+
+ stream->overrunBuffer = overrunBuffer;
+
+ stream->oBSize = oBSize;
+
+ return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/* Function : AVCBitstreamSaveWord() */
+/* Date : 3/29/2004 */
+/* Purpose : Save the current_word into the buffer, byte-swap, and */
+/* add emulation prevention insertion. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */
+/* full. */
+/* Modified : */
+/* ======================================================================== */
+AVCEnc_Status AVCBitstreamSaveWord(AVCEncBitstream *stream)
+{
+ int num_bits;
+ uint8 *write_pnt, byte;
+ uint current_word;
+
+ /* check number of bytes in current_word, must always be byte-aligned!!!! */
+ num_bits = WORD_SIZE - stream->bit_left; /* must be multiple of 8 !!*/
+
+ if (stream->buf_size - stream->write_pos <= (num_bits >> 3) + 2) /* 2 more bytes for possible EPBS */
+ {
+ if (AVCENC_SUCCESS != AVCBitstreamUseOverrunBuffer(stream, (num_bits >> 3) + 2))
+ {
+ return AVCENC_BITSTREAM_BUFFER_FULL;
+ }
+ }
+
+ /* write word, byte-by-byte */
+ write_pnt = stream->bitstreamBuffer + stream->write_pos;
+ current_word = stream->current_word;
+ while (num_bits) /* no need to check stream->buf_size and stream->write_pos, taken care already */
+ {
+ num_bits -= 8;
+ byte = (current_word >> num_bits) & 0xFF;
+ if (byte != 0)
+ {
+ *write_pnt++ = byte;
+ stream->write_pos++;
+ stream->count_zeros = 0;
+ }
+ else
+ {
+ stream->count_zeros++;
+ *write_pnt++ = byte;
+ stream->write_pos++;
+ if (stream->count_zeros == 2)
+ { /* for num_bits = 32, this can add 2 more bytes extra for EPBS */
+ *write_pnt++ = 0x3;
+ stream->write_pos++;
+ stream->count_zeros = 0;
+ }
+ }
+ }
+
+ /* reset current_word and bit_left */
+ stream->current_word = 0;
+ stream->bit_left = WORD_SIZE;
+
+ return AVCENC_SUCCESS;
+}
+
+/* ======================================================================== */
+/* Function : BitstreamWriteBits() */
+/* Date : 3/29/2004 */
+/* Purpose : Write up to machine word. */
+/* In/out : Unused bits in 'code' must be all zeros. */
+/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */
+/* full. */
+/* Modified : */
+/* ======================================================================== */
+AVCEnc_Status BitstreamWriteBits(AVCEncBitstream *stream, int nBits, uint code)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ int bit_left = stream->bit_left;
+ uint current_word = stream->current_word;
+
+ //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWriteBits",nBits,-1);
+
+ if (nBits > WORD_SIZE) /* has to be taken care of specially */
+ {
+ return AVCENC_FAIL; /* for now */
+ /* otherwise, break it down to 2 write of less than 16 bits at a time. */
+ }
+
+ if (nBits <= bit_left) /* more bits left in current_word */
+ {
+ stream->current_word = (current_word << nBits) | code;
+ stream->bit_left -= nBits;
+ if (stream->bit_left == 0) /* prepare for the next word */
+ {
+ status = AVCBitstreamSaveWord(stream);
+ return status;
+ }
+ }
+ else
+ {
+ stream->current_word = (current_word << bit_left) | (code >> (nBits - bit_left));
+
+ nBits -= bit_left;
+
+ stream->bit_left = 0;
+
+ status = AVCBitstreamSaveWord(stream); /* save current word */
+
+ stream->bit_left = WORD_SIZE - nBits;
+
+ stream->current_word = code; /* no extra masking for code, must be handled before saving */
+ }
+
+ return status;
+}
+
+
+/* ======================================================================== */
+/* Function : BitstreamWrite1Bit() */
+/* Date : 3/30/2004 */
+/* Purpose : Write 1 bit */
+/* In/out : Unused bits in 'code' must be all zeros. */
+/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */
+/* full. */
+/* Modified : */
+/* ======================================================================== */
+AVCEnc_Status BitstreamWrite1Bit(AVCEncBitstream *stream, uint code)
+{
+ AVCEnc_Status status;
+ uint current_word = stream->current_word;
+
+ //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"BitstreamWrite1Bit",code,-1);
+
+ //if(1 <= bit_left) /* more bits left in current_word */
+ /* we can assume that there always be positive bit_left in the current word */
+ stream->current_word = (current_word << 1) | code;
+ stream->bit_left--;
+ if (stream->bit_left == 0) /* prepare for the next word */
+ {
+ status = AVCBitstreamSaveWord(stream);
+ return status;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+
+/* ======================================================================== */
+/* Function : BitstreamTrailingBits() */
+/* Date : 3/31/2004 */
+/* Purpose : Add trailing bits and report the final EBSP size. */
+/* In/out : */
+/* Return : AVCENC_SUCCESS if successed, AVCENC_WRITE_FAIL if buffer is */
+/* full. */
+/* Modified : */
+/* ======================================================================== */
+AVCEnc_Status BitstreamTrailingBits(AVCEncBitstream *bitstream, uint *nal_size)
+{
+ (void)(nal_size);
+
+ AVCEnc_Status status;
+ int bit_left = bitstream->bit_left;
+
+ bit_left &= 0x7; /* modulo by 8 */
+ if (bit_left == 0) bit_left = 8;
+ /* bitstream->bit_left == 0 cannot happen here since it would have been Saved already */
+
+ status = BitstreamWriteBits(bitstream, bit_left, trailing_bits[bit_left]);
+
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ /* if it's not saved, save it. */
+ //if(bitstream->bit_left<(WORD_SIZE<<3)) /* in fact, no need to check */
+ {
+ status = AVCBitstreamSaveWord(bitstream);
+ }
+
+ return status;
+}
+
+/* check whether it's byte-aligned */
+bool byte_aligned(AVCEncBitstream *stream)
+{
+ if (stream->bit_left % 8)
+ return false;
+ else
+ return true;
+}
+
+
+/* determine whether overrun buffer can be used or not */
+AVCEnc_Status AVCBitstreamUseOverrunBuffer(AVCEncBitstream* stream, int numExtraBytes)
+{
+ AVCEncObject *encvid = (AVCEncObject*)stream->encvid;
+
+ if (stream->overrunBuffer != NULL) // overrunBuffer is set
+ {
+ if (stream->bitstreamBuffer != stream->overrunBuffer) // not already used
+ {
+ if (stream->write_pos + numExtraBytes >= stream->oBSize)
+ {
+ stream->oBSize = stream->write_pos + numExtraBytes + 100;
+ stream->oBSize &= (~0x3); // make it multiple of 4
+
+ // allocate new overrun Buffer
+ if (encvid->overrunBuffer)
+ {
+ encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData,
+ (int)encvid->overrunBuffer);
+ }
+
+ encvid->oBSize = stream->oBSize;
+ encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+ stream->oBSize, DEFAULT_ATTR);
+
+ stream->overrunBuffer = encvid->overrunBuffer;
+ if (stream->overrunBuffer == NULL)
+ {
+ return AVCENC_FAIL;
+ }
+ }
+
+ // copy everything to overrun buffer and start using it.
+ memcpy(stream->overrunBuffer, stream->bitstreamBuffer, stream->write_pos);
+ stream->bitstreamBuffer = stream->overrunBuffer;
+ stream->buf_size = stream->oBSize;
+ }
+ else // overrun buffer is already used
+ {
+ stream->oBSize = stream->write_pos + numExtraBytes + 100;
+ stream->oBSize &= (~0x3); // make it multiple of 4
+
+ // allocate new overrun buffer
+ encvid->oBSize = stream->oBSize;
+ encvid->overrunBuffer = (uint8*) encvid->avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+ stream->oBSize, DEFAULT_ATTR);
+
+ if (encvid->overrunBuffer == NULL)
+ {
+ return AVCENC_FAIL;
+ }
+
+
+ // copy from the old buffer to new buffer
+ memcpy(encvid->overrunBuffer, stream->overrunBuffer, stream->write_pos);
+ // free old buffer
+ encvid->avcHandle->CBAVC_Free((uint32*)encvid->avcHandle->userData,
+ (int)stream->overrunBuffer);
+
+ // assign pointer to new buffer
+ stream->overrunBuffer = encvid->overrunBuffer;
+ stream->bitstreamBuffer = stream->overrunBuffer;
+ stream->buf_size = stream->oBSize;
+ }
+
+ return AVCENC_SUCCESS;
+ }
+ else // overrunBuffer is not enable.
+ {
+ return AVCENC_FAIL;
+ }
+
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/block.cpp b/media/libstagefright/codecs/avc/enc/src/block.cpp
new file mode 100644
index 0000000..01e26a6
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/block.cpp
@@ -0,0 +1,1283 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+/* subtract with the prediction and do transformation */
+void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
+{
+ int16 *ptr = dataBlock;
+ int r0, r1, r2, r3, j;
+ int curpitch = (uint)pitch >> 16;
+ int predpitch = (pitch & 0xFFFF);
+
+ /* horizontal */
+ j = 4;
+ while (j > 0)
+ {
+ /* calculate the residue first */
+ r0 = cur[0] - predBlock[0];
+ r1 = cur[1] - predBlock[1];
+ r2 = cur[2] - predBlock[2];
+ r3 = cur[3] - predBlock[3];
+
+ r0 += r3; //ptr[0] + ptr[3];
+ r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
+ r1 += r2; //ptr[1] + ptr[2];
+ r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
+
+ ptr[0] = r0 + r1;
+ ptr[2] = r0 - r1;
+ ptr[1] = (r3 << 1) + r2;
+ ptr[3] = r3 - (r2 << 1);
+
+ ptr += 16;
+ predBlock += predpitch;
+ cur += curpitch;
+ j--;
+ }
+ /* vertical */
+ ptr = dataBlock;
+ j = 4;
+ while (j > 0)
+ {
+ r0 = ptr[0] + ptr[48];
+ r3 = ptr[0] - ptr[48];
+ r1 = ptr[16] + ptr[32];
+ r2 = ptr[16] - ptr[32];
+
+ ptr[0] = r0 + r1;
+ ptr[32] = r0 - r1;
+ ptr[16] = (r3 << 1) + r2;
+ ptr[48] = r3 - (r2 << 1);
+
+ ptr++;
+ j--;
+ }
+
+ return ;
+}
+
+
+/* do residue transform quant invquant, invtrans and write output out */
+int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
+{
+ AVCCommonObj *video = encvid->common;
+ int org_pitch = encvid->currInput->pitch;
+ int pitch = video->currPic->pitch;
+ int16 *coef = video->block;
+ uint8 *pred = video->pred_block; // size 16 for a 4x4 block
+ int pred_pitch = video->pred_pitch;
+ int r0, r1, r2, r3, j, k, idx;
+ int *level, *run;
+ int Qq, Rq, q_bits, qp_const, quant;
+ int data, lev, zero_run;
+ int numcoeff;
+
+ coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
+
+ /* first take a 4x4 transform */
+ /* horizontal */
+ j = 4;
+ while (j > 0)
+ {
+ /* calculate the residue first */
+ r0 = org[0] - pred[0]; /* OPTIMIZEABLE */
+ r1 = org[1] - pred[1];
+ r2 = org[2] - pred[2];
+ r3 = org[3] - pred[3];
+
+ r0 += r3; //ptr[0] + ptr[3];
+ r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
+ r1 += r2; //ptr[1] + ptr[2];
+ r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
+
+ coef[0] = r0 + r1;
+ coef[2] = r0 - r1;
+ coef[1] = (r3 << 1) + r2;
+ coef[3] = r3 - (r2 << 1);
+
+ coef += 16;
+ org += org_pitch;
+ pred += pred_pitch;
+ j--;
+ }
+ /* vertical */
+ coef -= 64;
+ pred -= (pred_pitch << 2);
+ j = 4;
+ while (j > 0) /* OPTIMIZABLE */
+ {
+ r0 = coef[0] + coef[48];
+ r3 = coef[0] - coef[48];
+ r1 = coef[16] + coef[32];
+ r2 = coef[16] - coef[32];
+
+ coef[0] = r0 + r1;
+ coef[32] = r0 - r1;
+ coef[16] = (r3 << 1) + r2;
+ coef[48] = r3 - (r2 << 1);
+
+ coef++;
+ j--;
+ }
+
+ coef -= 4;
+
+ /* quant */
+ level = encvid->level[ras2dec[blkidx]];
+ run = encvid->run[ras2dec[blkidx]];
+
+ Rq = video->QPy_mod_6;
+ Qq = video->QPy_div_6;
+ qp_const = encvid->qp_const;
+ q_bits = 15 + Qq;
+
+ zero_run = 0;
+ numcoeff = 0;
+ for (k = 0; k < 16; k++)
+ {
+ idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+ data = coef[idx];
+ quant = quant_coef[Rq][k];
+ if (data > 0)
+ {
+ lev = data * quant + qp_const;
+ }
+ else
+ {
+ lev = -data * quant + qp_const;
+ }
+ lev >>= q_bits;
+ if (lev)
+ {
+ *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
+
+ /* dequant */
+ quant = dequant_coefres[Rq][k];
+ if (data > 0)
+ {
+ level[numcoeff] = lev;
+ coef[idx] = (lev * quant) << Qq;
+ }
+ else
+ {
+ level[numcoeff] = -lev;
+ coef[idx] = (-lev * quant) << Qq;
+ }
+ run[numcoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ coef[idx] = 0;
+ }
+ }
+
+ if (video->currMB->mb_intra) // only do inverse transform with intra block
+ {
+ if (numcoeff) /* then do inverse transform */
+ {
+ for (j = 4; j > 0; j--) /* horizontal */
+ {
+ r0 = coef[0] + coef[2];
+ r1 = coef[0] - coef[2];
+ r2 = (coef[1] >> 1) - coef[3];
+ r3 = coef[1] + (coef[3] >> 1);
+
+ coef[0] = r0 + r3;
+ coef[1] = r1 + r2;
+ coef[2] = r1 - r2;
+ coef[3] = r0 - r3;
+
+ coef += 16;
+ }
+
+ coef -= 64;
+ for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
+ {
+ r0 = coef[0] + coef[32];
+ r1 = coef[0] - coef[32];
+ r2 = (coef[16] >> 1) - coef[48];
+ r3 = coef[16] + (coef[48] >> 1);
+ r0 += r3;
+ r3 = (r0 - (r3 << 1)); /* r0-r3 */
+ r1 += r2;
+ r2 = (r1 - (r2 << 1)); /* r1-r2 */
+ r0 += 32;
+ r1 += 32;
+ r2 += 32;
+ r3 += 32;
+
+ r0 = pred[0] + (r0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = *(pred += pred_pitch) + (r1 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = *(pred += pred_pitch) + (r2 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[pred_pitch] + (r3 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+
+ *cur = r0;
+ *(cur += pitch) = r1;
+ *(cur += pitch) = r2;
+ cur[pitch] = r3;
+ cur -= (pitch << 1);
+ cur++;
+ pred -= (pred_pitch << 1);
+ pred++;
+ coef++;
+ }
+ }
+ else // copy from pred to cur
+ {
+ *((uint32*)cur) = *((uint32*)pred);
+ *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+ *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+ *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
+ }
+ }
+
+ return numcoeff;
+}
+
+
+void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
+{
+ int16 *coef, *coef8 = video->block;
+ uint8 *cur; // the same as curL
+ int b8, b4;
+ int r0, r1, r2, r3, j, blkidx;
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ cur = curL;
+ coef = coef8;
+
+ if (currMB->CBP&(1 << b8))
+ {
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ blkidx = blkIdx2blkXY[b8][b4];
+ /* do IDCT */
+ if (currMB->nz_coeff[blkidx])
+ {
+ for (j = 4; j > 0; j--) /* horizontal */
+ {
+ r0 = coef[0] + coef[2];
+ r1 = coef[0] - coef[2];
+ r2 = (coef[1] >> 1) - coef[3];
+ r3 = coef[1] + (coef[3] >> 1);
+
+ coef[0] = r0 + r3;
+ coef[1] = r1 + r2;
+ coef[2] = r1 - r2;
+ coef[3] = r0 - r3;
+
+ coef += 16;
+ }
+
+ coef -= 64;
+ for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
+ {
+ r0 = coef[0] + coef[32];
+ r1 = coef[0] - coef[32];
+ r2 = (coef[16] >> 1) - coef[48];
+ r3 = coef[16] + (coef[48] >> 1);
+ r0 += r3;
+ r3 = (r0 - (r3 << 1)); /* r0-r3 */
+ r1 += r2;
+ r2 = (r1 - (r2 << 1)); /* r1-r2 */
+ r0 += 32;
+ r1 += 32;
+ r2 += 32;
+ r3 += 32;
+
+ r0 = cur[0] + (r0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ *cur = r0;
+ r1 = *(cur += picPitch) + (r1 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ *cur = r1;
+ r2 = *(cur += picPitch) + (r2 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ *cur = r2;
+ r3 = cur[picPitch] + (r3 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ cur[picPitch] = r3;
+
+ cur -= (picPitch << 1);
+ cur++;
+ coef++;
+ }
+ cur -= 4;
+ coef -= 4;
+ }
+ if (b4&1)
+ {
+ cur += ((picPitch << 2) - 4);
+ coef += 60;
+ }
+ else
+ {
+ cur += 4;
+ coef += 4;
+ }
+ }
+ }
+
+ if (b8&1)
+ {
+ curL += ((picPitch << 3) - 8);
+ coef8 += 120;
+ }
+ else
+ {
+ curL += 8;
+ coef8 += 8;
+ }
+ }
+
+ return ;
+}
+
+/* performa dct, quant, iquant, idct for the entire MB */
+void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
+{
+ AVCCommonObj *video = encvid->common;
+ int pitch = video->currPic->pitch;
+ int org_pitch = encvid->currInput->pitch;
+ AVCMacroblock *currMB = video->currMB;
+ int16 *coef = video->block;
+ uint8 *pred = encvid->pred_i16[currMB->i16Mode];
+ int blk_x, blk_y, j, k, idx, b8, b4;
+ int r0, r1, r2, r3, m0, m1, m2 , m3;
+ int data, lev;
+ int *level, *run, zero_run, ncoeff;
+ int Rq, Qq, quant, q_bits, qp_const;
+ int offset_cur[4], offset_pred[4], offset;
+
+ /* horizontal */
+ for (j = 16; j > 0; j--)
+ {
+ for (blk_x = 4; blk_x > 0; blk_x--)
+ {
+ /* calculate the residue first */
+ r0 = *orgL++ - *pred++;
+ r1 = *orgL++ - *pred++;
+ r2 = *orgL++ - *pred++;
+ r3 = *orgL++ - *pred++;
+
+ r0 += r3; //ptr[0] + ptr[3];
+ r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
+ r1 += r2; //ptr[1] + ptr[2];
+ r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
+
+ *coef++ = r0 + r1;
+ *coef++ = (r3 << 1) + r2;
+ *coef++ = r0 - r1;
+ *coef++ = r3 - (r2 << 1);
+ }
+ orgL += (org_pitch - 16);
+ }
+ pred -= 256;
+ coef -= 256;
+ /* vertical */
+ for (blk_y = 4; blk_y > 0; blk_y--)
+ {
+ for (j = 16; j > 0; j--)
+ {
+ r0 = coef[0] + coef[48];
+ r3 = coef[0] - coef[48];
+ r1 = coef[16] + coef[32];
+ r2 = coef[16] - coef[32];
+
+ coef[0] = r0 + r1;
+ coef[32] = r0 - r1;
+ coef[16] = (r3 << 1) + r2;
+ coef[48] = r3 - (r2 << 1);
+
+ coef++;
+ }
+ coef += 48;
+ }
+
+ /* then perform DC transform */
+ coef -= 256;
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[12];
+ r3 = coef[0] - coef[12];
+ r1 = coef[4] + coef[8];
+ r2 = coef[4] - coef[8];
+
+ coef[0] = r0 + r1;
+ coef[8] = r0 - r1;
+ coef[4] = r3 + r2;
+ coef[12] = r3 - r2;
+ coef += 64;
+ }
+ coef -= 256;
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[192];
+ r3 = coef[0] - coef[192];
+ r1 = coef[64] + coef[128];
+ r2 = coef[64] - coef[128];
+
+ coef[0] = (r0 + r1) >> 1;
+ coef[128] = (r0 - r1) >> 1;
+ coef[64] = (r3 + r2) >> 1;
+ coef[192] = (r3 - r2) >> 1;
+ coef += 4;
+ }
+
+ coef -= 16;
+ // then quantize DC
+ level = encvid->leveldc;
+ run = encvid->rundc;
+
+ Rq = video->QPy_mod_6;
+ Qq = video->QPy_div_6;
+ quant = quant_coef[Rq][0];
+ q_bits = 15 + Qq;
+ qp_const = encvid->qp_const;
+
+ zero_run = 0;
+ ncoeff = 0;
+ for (k = 0; k < 16; k++) /* in zigzag scan order */
+ {
+ idx = ZIGZAG2RASTERDC[k];
+ data = coef[idx];
+ if (data > 0) // quant
+ {
+ lev = data * quant + (qp_const << 1);
+ }
+ else
+ {
+ lev = -data * quant + (qp_const << 1);
+ }
+ lev >>= (q_bits + 1);
+ if (lev) // dequant
+ {
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ coef[idx] = lev;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ coef[idx] = -lev;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ coef[idx] = 0;
+ }
+ }
+
+ /* inverse transform DC */
+ encvid->numcoefdc = ncoeff;
+ if (ncoeff)
+ {
+ quant = dequant_coefres[Rq][0];
+
+ for (j = 0; j < 4; j++)
+ {
+ m0 = coef[0] + coef[4];
+ m1 = coef[0] - coef[4];
+ m2 = coef[8] + coef[12];
+ m3 = coef[8] - coef[12];
+
+
+ coef[0] = m0 + m2;
+ coef[4] = m0 - m2;
+ coef[8] = m1 - m3;
+ coef[12] = m1 + m3;
+ coef += 64;
+ }
+
+ coef -= 256;
+
+ if (Qq >= 2) /* this way should be faster than JM */
+ { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
+ Qq -= 2;
+ for (j = 0; j < 4; j++)
+ {
+ m0 = coef[0] + coef[64];
+ m1 = coef[0] - coef[64];
+ m2 = coef[128] + coef[192];
+ m3 = coef[128] - coef[192];
+
+ coef[0] = ((m0 + m2) * quant) << Qq;
+ coef[64] = ((m0 - m2) * quant) << Qq;
+ coef[128] = ((m1 - m3) * quant) << Qq;
+ coef[192] = ((m1 + m3) * quant) << Qq;
+ coef += 4;
+ }
+ Qq += 2; /* restore the value */
+ }
+ else
+ {
+ Qq = 2 - Qq;
+ offset = 1 << (Qq - 1);
+
+ for (j = 0; j < 4; j++)
+ {
+ m0 = coef[0] + coef[64];
+ m1 = coef[0] - coef[64];
+ m2 = coef[128] + coef[192];
+ m3 = coef[128] - coef[192];
+
+ coef[0] = (((m0 + m2) * quant + offset) >> Qq);
+ coef[64] = (((m0 - m2) * quant + offset) >> Qq);
+ coef[128] = (((m1 - m3) * quant + offset) >> Qq);
+ coef[192] = (((m1 + m3) * quant + offset) >> Qq);
+ coef += 4;
+ }
+ Qq = 2 - Qq; /* restore the value */
+ }
+ coef -= 16; /* back to the origin */
+ }
+
+ /* now zigzag scan ac coefs, quant, iquant and itrans */
+ run = encvid->run[0];
+ level = encvid->level[0];
+
+ /* offset btw 4x4 block */
+ offset_cur[0] = 0;
+ offset_cur[1] = (pitch << 2) - 8;
+
+ /* offset btw 8x8 block */
+ offset_cur[2] = 8 - (pitch << 3);
+ offset_cur[3] = -8;
+
+ /* similarly for pred */
+ offset_pred[0] = 0;
+ offset_pred[1] = 56;
+ offset_pred[2] = -120;
+ offset_pred[3] = -8;
+
+ currMB->CBP = 0;
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ for (b4 = 0; b4 < 4; b4++)
+ {
+
+ zero_run = 0;
+ ncoeff = 0;
+
+ for (k = 1; k < 16; k++)
+ {
+ idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+ data = coef[idx];
+ quant = quant_coef[Rq][k];
+ if (data > 0)
+ {
+ lev = data * quant + qp_const;
+ }
+ else
+ {
+ lev = -data * quant + qp_const;
+ }
+ lev >>= q_bits;
+ if (lev)
+ { /* dequant */
+ quant = dequant_coefres[Rq][k];
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ coef[idx] = (lev * quant) << Qq;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ coef[idx] = (-lev * quant) << Qq;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ coef[idx] = 0;
+ }
+ }
+
+ currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
+ if (ncoeff)
+ {
+ currMB->CBP |= (1 << b8);
+
+ // do inverse transform here
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[2];
+ r1 = coef[0] - coef[2];
+ r2 = (coef[1] >> 1) - coef[3];
+ r3 = coef[1] + (coef[3] >> 1);
+
+ coef[0] = r0 + r3;
+ coef[1] = r1 + r2;
+ coef[2] = r1 - r2;
+ coef[3] = r0 - r3;
+
+ coef += 16;
+ }
+ coef -= 64;
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[32];
+ r1 = coef[0] - coef[32];
+ r2 = (coef[16] >> 1) - coef[48];
+ r3 = coef[16] + (coef[48] >> 1);
+
+ r0 += r3;
+ r3 = (r0 - (r3 << 1)); /* r0-r3 */
+ r1 += r2;
+ r2 = (r1 - (r2 << 1)); /* r1-r2 */
+ r0 += 32;
+ r1 += 32;
+ r2 += 32;
+ r3 += 32;
+ r0 = pred[0] + (r0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = pred[16] + (r1 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = pred[32] + (r2 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[48] + (r3 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ *curL = r0;
+ *(curL += pitch) = r1;
+ *(curL += pitch) = r2;
+ curL[pitch] = r3;
+ curL -= (pitch << 1);
+ curL++;
+ pred++;
+ coef++;
+ }
+ }
+ else // do DC-only inverse
+ {
+ m0 = coef[0] + 32;
+
+ for (j = 4; j > 0; j--)
+ {
+ r0 = pred[0] + (m0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = pred[16] + (m0 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = pred[32] + (m0 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[48] + (m0 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ *curL = r0;
+ *(curL += pitch) = r1;
+ *(curL += pitch) = r2;
+ curL[pitch] = r3;
+ curL -= (pitch << 1);
+ curL++;
+ pred++;
+ }
+ coef += 4;
+ }
+
+ run += 16; // follow coding order
+ level += 16;
+ curL += offset_cur[b4&1];
+ pred += offset_pred[b4&1];
+ coef += offset_pred[b4&1];
+ }
+
+ curL += offset_cur[2 + (b8&1)];
+ pred += offset_pred[2 + (b8&1)];
+ coef += offset_pred[2 + (b8&1)];
+ }
+
+ return ;
+}
+
+
+void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMacroblock *currMB = video->currMB;
+ int org_pitch = (encvid->currInput->pitch) >> 1;
+ int pitch = (video->currPic->pitch) >> 1;
+ int pred_pitch = 16;
+ int16 *coef = video->block + 256;
+ uint8 *pred = video->pred_block;
+ int j, blk_x, blk_y, k, idx, b4;
+ int r0, r1, r2, r3, m0;
+ int Qq, Rq, qp_const, q_bits, quant;
+ int *level, *run, zero_run, ncoeff;
+ int data, lev;
+ int offset_cur[2], offset_pred[2], offset_coef[2];
+ uint8 nz_temp[4];
+ int coeff_cost;
+
+ if (cr)
+ {
+ coef += 8;
+ pred += 8;
+ }
+
+ if (currMB->mb_intra == 0) // inter mode
+ {
+ pred = curC;
+ pred_pitch = pitch;
+ }
+
+ /* do 4x4 transform */
+ /* horizontal */
+ for (j = 8; j > 0; j--)
+ {
+ for (blk_x = 2; blk_x > 0; blk_x--)
+ {
+ /* calculate the residue first */
+ r0 = *orgC++ - *pred++;
+ r1 = *orgC++ - *pred++;
+ r2 = *orgC++ - *pred++;
+ r3 = *orgC++ - *pred++;
+
+ r0 += r3; //ptr[0] + ptr[3];
+ r3 = r0 - (r3 << 1); //ptr[0] - ptr[3];
+ r1 += r2; //ptr[1] + ptr[2];
+ r2 = r1 - (r2 << 1); //ptr[1] - ptr[2];
+
+ *coef++ = r0 + r1;
+ *coef++ = (r3 << 1) + r2;
+ *coef++ = r0 - r1;
+ *coef++ = r3 - (r2 << 1);
+
+ }
+ coef += 8; // coef pitch is 16
+ pred += (pred_pitch - 8); // pred_pitch is 16
+ orgC += (org_pitch - 8);
+ }
+ pred -= (pred_pitch << 3);
+ coef -= 128;
+ /* vertical */
+ for (blk_y = 2; blk_y > 0; blk_y--)
+ {
+ for (j = 8; j > 0; j--)
+ {
+ r0 = coef[0] + coef[48];
+ r3 = coef[0] - coef[48];
+ r1 = coef[16] + coef[32];
+ r2 = coef[16] - coef[32];
+
+ coef[0] = r0 + r1;
+ coef[32] = r0 - r1;
+ coef[16] = (r3 << 1) + r2;
+ coef[48] = r3 - (r2 << 1);
+
+ coef++;
+ }
+ coef += 56;
+ }
+ /* then perform DC transform */
+ coef -= 128;
+
+ /* 2x2 transform of DC components*/
+ r0 = coef[0];
+ r1 = coef[4];
+ r2 = coef[64];
+ r3 = coef[68];
+
+ coef[0] = r0 + r1 + r2 + r3;
+ coef[4] = r0 - r1 + r2 - r3;
+ coef[64] = r0 + r1 - r2 - r3;
+ coef[68] = r0 - r1 - r2 + r3;
+
+ Qq = video->QPc_div_6;
+ Rq = video->QPc_mod_6;
+ quant = quant_coef[Rq][0];
+ q_bits = 15 + Qq;
+ qp_const = encvid->qp_const_c;
+
+ zero_run = 0;
+ ncoeff = 0;
+ run = encvid->runcdc + (cr << 2);
+ level = encvid->levelcdc + (cr << 2);
+
+ /* in zigzag scan order */
+ for (k = 0; k < 4; k++)
+ {
+ idx = ((k >> 1) << 6) + ((k & 1) << 2);
+ data = coef[idx];
+ if (data > 0)
+ {
+ lev = data * quant + (qp_const << 1);
+ }
+ else
+ {
+ lev = -data * quant + (qp_const << 1);
+ }
+ lev >>= (q_bits + 1);
+ if (lev)
+ {
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ coef[idx] = lev;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ coef[idx] = -lev;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ coef[idx] = 0;
+ }
+ }
+
+ encvid->numcoefcdc[cr] = ncoeff;
+
+ if (ncoeff)
+ {
+ currMB->CBP |= (1 << 4); // DC present
+ // do inverse transform
+ quant = dequant_coefres[Rq][0];
+
+ r0 = coef[0] + coef[4];
+ r1 = coef[0] - coef[4];
+ r2 = coef[64] + coef[68];
+ r3 = coef[64] - coef[68];
+
+ r0 += r2;
+ r2 = r0 - (r2 << 1);
+ r1 += r3;
+ r3 = r1 - (r3 << 1);
+
+ if (Qq >= 1)
+ {
+ Qq -= 1;
+ coef[0] = (r0 * quant) << Qq;
+ coef[4] = (r1 * quant) << Qq;
+ coef[64] = (r2 * quant) << Qq;
+ coef[68] = (r3 * quant) << Qq;
+ Qq++;
+ }
+ else
+ {
+ coef[0] = (r0 * quant) >> 1;
+ coef[4] = (r1 * quant) >> 1;
+ coef[64] = (r2 * quant) >> 1;
+ coef[68] = (r3 * quant) >> 1;
+ }
+ }
+
+ /* now do AC zigzag scan, quant, iquant and itrans */
+ if (cr)
+ {
+ run = encvid->run[20];
+ level = encvid->level[20];
+ }
+ else
+ {
+ run = encvid->run[16];
+ level = encvid->level[16];
+ }
+
+ /* offset btw 4x4 block */
+ offset_cur[0] = 0;
+ offset_cur[1] = (pitch << 2) - 8;
+ offset_pred[0] = 0;
+ offset_pred[1] = (pred_pitch << 2) - 8;
+ offset_coef[0] = 0;
+ offset_coef[1] = 56;
+
+ coeff_cost = 0;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ zero_run = 0;
+ ncoeff = 0;
+ for (k = 1; k < 16; k++) /* in zigzag scan order */
+ {
+ idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
+ data = coef[idx];
+ quant = quant_coef[Rq][k];
+ if (data > 0)
+ {
+ lev = data * quant + qp_const;
+ }
+ else
+ {
+ lev = -data * quant + qp_const;
+ }
+ lev >>= q_bits;
+ if (lev)
+ {
+ /* for RD performance*/
+ if (lev > 1)
+ coeff_cost += MAX_VALUE; // set high cost, shall not be discarded
+ else
+ coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
+
+ /* dequant */
+ quant = dequant_coefres[Rq][k];
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ coef[idx] = (lev * quant) << Qq;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ coef[idx] = (-lev * quant) << Qq;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ coef[idx] = 0;
+ }
+ }
+
+ nz_temp[b4] = ncoeff; // raster scan
+
+ // just advance the pointers for now, do IDCT later
+ coef += 4;
+ run += 16;
+ level += 16;
+ coef += offset_coef[b4&1];
+ }
+
+ /* rewind the pointers */
+ coef -= 128;
+
+ if (coeff_cost < _CHROMA_COEFF_COST_)
+ {
+ /* if it's not efficient to encode any blocks.
+ Just do DC only */
+ /* We can reset level and run also, but setting nz to zero should be enough. */
+ currMB->nz_coeff[16+(cr<<1)] = 0;
+ currMB->nz_coeff[17+(cr<<1)] = 0;
+ currMB->nz_coeff[20+(cr<<1)] = 0;
+ currMB->nz_coeff[21+(cr<<1)] = 0;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ // do DC-only inverse
+ m0 = coef[0] + 32;
+
+ for (j = 4; j > 0; j--)
+ {
+ r0 = pred[0] + (m0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = *(pred += pred_pitch) + (m0 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = pred[pred_pitch] + (m0 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[pred_pitch<<1] + (m0 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ *curC = r0;
+ *(curC += pitch) = r1;
+ *(curC += pitch) = r2;
+ curC[pitch] = r3;
+ curC -= (pitch << 1);
+ curC++;
+ pred += (1 - pred_pitch);
+ }
+ coef += 4;
+ curC += offset_cur[b4&1];
+ pred += offset_pred[b4&1];
+ coef += offset_coef[b4&1];
+ }
+ }
+ else // not dropping anything, continue with the IDCT
+ {
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ ncoeff = nz_temp[b4] ; // in raster scan
+ currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
+
+ if (ncoeff) // do a check on the nonzero-coeff
+ {
+ currMB->CBP |= (2 << 4);
+
+ // do inverse transform here
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[2];
+ r1 = coef[0] - coef[2];
+ r2 = (coef[1] >> 1) - coef[3];
+ r3 = coef[1] + (coef[3] >> 1);
+
+ coef[0] = r0 + r3;
+ coef[1] = r1 + r2;
+ coef[2] = r1 - r2;
+ coef[3] = r0 - r3;
+
+ coef += 16;
+ }
+ coef -= 64;
+ for (j = 4; j > 0; j--)
+ {
+ r0 = coef[0] + coef[32];
+ r1 = coef[0] - coef[32];
+ r2 = (coef[16] >> 1) - coef[48];
+ r3 = coef[16] + (coef[48] >> 1);
+
+ r0 += r3;
+ r3 = (r0 - (r3 << 1)); /* r0-r3 */
+ r1 += r2;
+ r2 = (r1 - (r2 << 1)); /* r1-r2 */
+ r0 += 32;
+ r1 += 32;
+ r2 += 32;
+ r3 += 32;
+ r0 = pred[0] + (r0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = *(pred += pred_pitch) + (r1 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = pred[pred_pitch] + (r2 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[pred_pitch<<1] + (r3 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ *curC = r0;
+ *(curC += pitch) = r1;
+ *(curC += pitch) = r2;
+ curC[pitch] = r3;
+ curC -= (pitch << 1);
+ curC++;
+ pred += (1 - pred_pitch);
+ coef++;
+ }
+ }
+ else
+ {
+ // do DC-only inverse
+ m0 = coef[0] + 32;
+
+ for (j = 4; j > 0; j--)
+ {
+ r0 = pred[0] + (m0 >> 6);
+ if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */
+ r1 = *(pred += pred_pitch) + (m0 >> 6);
+ if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */
+ r2 = pred[pred_pitch] + (m0 >> 6);
+ if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */
+ r3 = pred[pred_pitch<<1] + (m0 >> 6);
+ if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */
+ *curC = r0;
+ *(curC += pitch) = r1;
+ *(curC += pitch) = r2;
+ curC[pitch] = r3;
+ curC -= (pitch << 1);
+ curC++;
+ pred += (1 - pred_pitch);
+ }
+ coef += 4;
+ }
+ curC += offset_cur[b4&1];
+ pred += offset_pred[b4&1];
+ coef += offset_coef[b4&1];
+ }
+ }
+
+ return ;
+}
+
+
+/* only DC transform */
+int TransQuantIntra16DC(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ int16 *block = video->block;
+ int *level = encvid->leveldc;
+ int *run = encvid->rundc;
+ int16 *ptr = block;
+ int r0, r1, r2, r3, j;
+ int Qq = video->QPy_div_6;
+ int Rq = video->QPy_mod_6;
+ int q_bits, qp_const, quant;
+ int data, lev, zero_run;
+ int k, ncoeff, idx;
+
+ /* DC transform */
+ /* horizontal */
+ j = 4;
+ while (j)
+ {
+ r0 = ptr[0] + ptr[12];
+ r3 = ptr[0] - ptr[12];
+ r1 = ptr[4] + ptr[8];
+ r2 = ptr[4] - ptr[8];
+
+ ptr[0] = r0 + r1;
+ ptr[8] = r0 - r1;
+ ptr[4] = r3 + r2;
+ ptr[12] = r3 - r2;
+ ptr += 64;
+ j--;
+ }
+ /* vertical */
+ ptr = block;
+ j = 4;
+ while (j)
+ {
+ r0 = ptr[0] + ptr[192];
+ r3 = ptr[0] - ptr[192];
+ r1 = ptr[64] + ptr[128];
+ r2 = ptr[64] - ptr[128];
+
+ ptr[0] = (r0 + r1) >> 1;
+ ptr[128] = (r0 - r1) >> 1;
+ ptr[64] = (r3 + r2) >> 1;
+ ptr[192] = (r3 - r2) >> 1;
+ ptr += 4;
+ j--;
+ }
+
+ quant = quant_coef[Rq][0];
+ q_bits = 15 + Qq;
+ qp_const = (1 << q_bits) / 3; // intra
+
+ zero_run = 0;
+ ncoeff = 0;
+
+ for (k = 0; k < 16; k++) /* in zigzag scan order */
+ {
+ idx = ZIGZAG2RASTERDC[k];
+ data = block[idx];
+ if (data > 0)
+ {
+ lev = data * quant + (qp_const << 1);
+ }
+ else
+ {
+ lev = -data * quant + (qp_const << 1);
+ }
+ lev >>= (q_bits + 1);
+ if (lev)
+ {
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ block[idx] = lev;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ block[idx] = -lev;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ block[idx] = 0;
+ }
+ }
+ return ncoeff;
+}
+
+int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
+{
+ AVCCommonObj *video = encvid->common;
+ int *level, *run;
+ int r0, r1, r2, r3;
+ int Qq, Rq, q_bits, qp_const, quant;
+ int data, lev, zero_run;
+ int k, ncoeff, idx;
+
+ level = encvid->levelcdc + (cr << 2); /* cb or cr */
+ run = encvid->runcdc + (cr << 2);
+
+ /* 2x2 transform of DC components*/
+ r0 = block[0];
+ r1 = block[4];
+ r2 = block[64];
+ r3 = block[68];
+
+ block[0] = r0 + r1 + r2 + r3;
+ block[4] = r0 - r1 + r2 - r3;
+ block[64] = r0 + r1 - r2 - r3;
+ block[68] = r0 - r1 - r2 + r3;
+
+ Qq = video->QPc_div_6;
+ Rq = video->QPc_mod_6;
+ quant = quant_coef[Rq][0];
+ q_bits = 15 + Qq;
+ if (slice_type == AVC_I_SLICE)
+ {
+ qp_const = (1 << q_bits) / 3;
+ }
+ else
+ {
+ qp_const = (1 << q_bits) / 6;
+ }
+
+ zero_run = 0;
+ ncoeff = 0;
+
+ for (k = 0; k < 4; k++) /* in zigzag scan order */
+ {
+ idx = ((k >> 1) << 6) + ((k & 1) << 2);
+ data = block[idx];
+ if (data > 0)
+ {
+ lev = data * quant + (qp_const << 1);
+ }
+ else
+ {
+ lev = -data * quant + (qp_const << 1);
+ }
+ lev >>= (q_bits + 1);
+ if (lev)
+ {
+ if (data > 0)
+ {
+ level[ncoeff] = lev;
+ block[idx] = lev;
+ }
+ else
+ {
+ level[ncoeff] = -lev;
+ block[idx] = -lev;
+ }
+ run[ncoeff++] = zero_run;
+ zero_run = 0;
+ }
+ else
+ {
+ zero_run++;
+ block[idx] = 0;
+ }
+ }
+ return ncoeff;
+}
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp
new file mode 100644
index 0000000..38a2a15
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/findhalfpel.cpp
@@ -0,0 +1,622 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+/* 3/29/01 fast half-pel search based on neighboring guess */
+/* value ranging from 0 to 4, high complexity (more accurate) to
+ low complexity (less accurate) */
+#define HP_DISTANCE_TH 5 // 2 /* half-pel distance threshold */
+
+#define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/
+
+const static int distance_tab[9][9] = /* [hp_guess][k] */
+{
+ {0, 1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 0, 1, 2, 3, 4, 3, 2, 1},
+ {1, 0, 0, 0, 1, 2, 3, 2, 1},
+ {1, 2, 1, 0, 1, 2, 3, 4, 3},
+ {1, 2, 1, 0, 0, 0, 1, 2, 3},
+ {1, 4, 3, 2, 1, 0, 1, 2, 3},
+ {1, 2, 3, 2, 1, 0, 0, 0, 1},
+ {1, 2, 3, 4, 3, 2, 1, 0, 1},
+ {1, 0, 1, 2, 3, 2, 1, 0, 0}
+};
+
+#define CLIP_RESULT(x) if((uint)x > 0xFF){ \
+ x = 0xFF & (~(x>>31));}
+
+#define CLIP_UPPER16(x) if((uint)x >= 0x20000000){ \
+ x = 0xFF0000 & (~(x>>31));} \
+ else { \
+ x = (x>>5)&0xFF0000; \
+ }
+
+/*=====================================================================
+ Function: AVCFindHalfPelMB
+ Date: 10/31/2007
+ Purpose: Find half pel resolution MV surrounding the full-pel MV
+=====================================================================*/
+
+int AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand,
+ int xpos, int ypos, int hp_guess, int cmvx, int cmvy)
+{
+ AVCPictureData *currPic = encvid->common->currPic;
+ int lx = currPic->pitch;
+ int d, dmin, satd_min;
+ uint8* cand;
+ int lambda_motion = encvid->lambda_motion;
+ uint8 *mvbits = encvid->mvbits;
+ int mvcost;
+ /* list of candidate to go through for half-pel search*/
+ uint8 *subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
+ uint8 **hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */
+
+ int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2};
+ int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2};
+ int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1};
+ int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1};
+ int h, hmin, q, qmin;
+
+ OSCL_UNUSED_ARG(xpos);
+ OSCL_UNUSED_ARG(ypos);
+ OSCL_UNUSED_ARG(hp_guess);
+
+ GenerateHalfPelPred(subpel_pred, ncand, lx);
+
+ cur = encvid->currYMB; // pre-load current original MB
+
+ cand = hpel_cand[0];
+
+ // find cost for the current full-pel position
+ dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD
+ mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy);
+ satd_min = dmin;
+ dmin += mvcost;
+ hmin = 0;
+
+ /* find half-pel */
+ for (h = 1; h < 9; h++)
+ {
+ d = SATD_MB(hpel_cand[h], cur, dmin);
+ mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy);
+ d += mvcost;
+
+ if (d < dmin)
+ {
+ dmin = d;
+ hmin = h;
+ satd_min = d - mvcost;
+ }
+ }
+
+ mot->sad = dmin;
+ mot->x += xh[hmin];
+ mot->y += yh[hmin];
+ encvid->best_hpel_pos = hmin;
+
+ /*** search for quarter-pel ****/
+ GenerateQuartPelPred(encvid->bilin_base[hmin], &(encvid->qpel_cand[0][0]), hmin);
+
+ encvid->best_qpel_pos = qmin = -1;
+
+ for (q = 0; q < 8; q++)
+ {
+ d = SATD_MB(encvid->qpel_cand[q], cur, dmin);
+ mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy);
+ d += mvcost;
+ if (d < dmin)
+ {
+ dmin = d;
+ qmin = q;
+ satd_min = d - mvcost;
+ }
+ }
+
+ if (qmin != -1)
+ {
+ mot->sad = dmin;
+ mot->x += xq[qmin];
+ mot->y += yq[qmin];
+ encvid->best_qpel_pos = qmin;
+ }
+
+ return satd_min;
+}
+
+
+
+/** This function generates sub-pel prediction around the full-pel candidate.
+Each sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */
+/** The sub-pel position is labeled in spiral manner from the center. */
+
+void GenerateHalfPelPred(uint8* subpel_pred, uint8 *ncand, int lx)
+{
+ /* let's do straightforward way first */
+ uint8 *ref;
+ uint8 *dst;
+ uint8 tmp8;
+ int32 tmp32;
+ int16 tmp_horz[18*22], *dst_16, *src_16;
+ register int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; // temp register
+ int msk;
+ int i, j;
+
+ /* first copy full-pel to the first array */
+ /* to be optimized later based on byte-offset load */
+ ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */
+ dst = subpel_pred;
+
+ dst -= 4; /* offset */
+ for (j = 0; j < 22; j++) /* 24x22 */
+ {
+ i = 6;
+ while (i > 0)
+ {
+ tmp32 = *ref++;
+ tmp8 = *ref++;
+ tmp32 |= (tmp8 << 8);
+ tmp8 = *ref++;
+ tmp32 |= (tmp8 << 16);
+ tmp8 = *ref++;
+ tmp32 |= (tmp8 << 24);
+ *((uint32*)(dst += 4)) = tmp32;
+ i--;
+ }
+ ref += (lx - 24);
+ }
+
+ /* from the first array, we do horizontal interp */
+ ref = subpel_pred + 2;
+ dst_16 = tmp_horz; /* 17 x 22 */
+
+ for (j = 4; j > 0; j--)
+ {
+ for (i = 16; i > 0; i -= 4)
+ {
+ a = ref[-2];
+ b = ref[-1];
+ c = ref[0];
+ d = ref[1];
+ e = ref[2];
+ f = ref[3];
+ *dst_16++ = a + f - 5 * (b + e) + 20 * (c + d);
+ a = ref[4];
+ *dst_16++ = b + a - 5 * (c + f) + 20 * (d + e);
+ b = ref[5];
+ *dst_16++ = c + b - 5 * (d + a) + 20 * (e + f);
+ c = ref[6];
+ *dst_16++ = d + c - 5 * (e + b) + 20 * (f + a);
+
+ ref += 4;
+ }
+ /* do the 17th column here */
+ d = ref[3];
+ *dst_16 = e + d - 5 * (f + c) + 20 * (a + b);
+ dst_16 += 2; /* stride for tmp_horz is 18 */
+ ref += 8; /* stride for ref is 24 */
+ if (j == 3) // move 18 lines down
+ {
+ dst_16 += 324;//18*18;
+ ref += 432;//18*24;
+ }
+ }
+
+ ref -= 480;//20*24;
+ dst_16 -= 360;//20*18;
+ dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/
+
+ for (j = 18; j > 0; j--)
+ {
+ for (i = 16; i > 0; i -= 4)
+ {
+ a = ref[-2];
+ b = ref[-1];
+ c = ref[0];
+ d = ref[1];
+ e = ref[2];
+ f = ref[3];
+ tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+ *dst_16++ = tmp32;
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *dst++ = tmp32;
+
+ a = ref[4];
+ tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+ *dst_16++ = tmp32;
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *dst++ = tmp32;
+
+ b = ref[5];
+ tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+ *dst_16++ = tmp32;
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *dst++ = tmp32;
+
+ c = ref[6];
+ tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+ *dst_16++ = tmp32;
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *dst++ = tmp32;
+
+ ref += 4;
+ }
+ /* do the 17th column here */
+ d = ref[3];
+ tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+ *dst_16 = tmp32;
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *dst = tmp32;
+
+ dst += 8; /* stride for dst is 24 */
+ dst_16 += 2; /* stride for tmp_horz is 18 */
+ ref += 8; /* stride for ref is 24 */
+ }
+
+
+ /* Do middle point filtering*/
+ src_16 = tmp_horz; /* 17 x 22 */
+ dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/
+ dst -= 24; // offset
+ for (i = 0; i < 17; i++)
+ {
+ for (j = 16; j > 0; j -= 4)
+ {
+ a = *src_16;
+ b = *(src_16 += 18);
+ c = *(src_16 += 18);
+ d = *(src_16 += 18);
+ e = *(src_16 += 18);
+ f = *(src_16 += 18);
+
+ tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+ tmp32 = (tmp32 + 512) >> 10;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32;
+
+ a = *(src_16 += 18);
+ tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+ tmp32 = (tmp32 + 512) >> 10;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32;
+
+ b = *(src_16 += 18);
+ tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+ tmp32 = (tmp32 + 512) >> 10;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32;
+
+ c = *(src_16 += 18);
+ tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+ tmp32 = (tmp32 + 512) >> 10;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32;
+
+ src_16 -= (18 << 2);
+ }
+
+ d = src_16[90]; // 18*5
+ tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+ tmp32 = (tmp32 + 512) >> 10;
+ CLIP_RESULT(tmp32)
+ dst[24] = tmp32;
+
+ src_16 -= ((18 << 4) - 1);
+ dst -= ((24 << 4) - 1);
+ }
+
+ /* do vertical interpolation */
+ ref = subpel_pred + 2;
+ dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */
+ dst -= 24; // offset
+
+ for (i = 2; i > 0; i--)
+ {
+ for (j = 16; j > 0; j -= 4)
+ {
+ a = *ref;
+ b = *(ref += 24);
+ c = *(ref += 24);
+ d = *(ref += 24);
+ e = *(ref += 24);
+ f = *(ref += 24);
+
+ tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ a = *(ref += 24);
+ tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ b = *(ref += 24);
+ tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ c = *(ref += 24);
+ tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ ref -= (24 << 2);
+ }
+
+ d = ref[120]; // 24*5
+ tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ dst[24] = tmp32; // 10th
+
+ dst -= ((24 << 4) - 1);
+ ref -= ((24 << 4) - 1);
+ }
+
+ // note that using SIMD here doesn't help much, the cycle almost stays the same
+ // one can just use the above code and change the for(i=2 to for(i=18
+ for (i = 16; i > 0; i -= 4)
+ {
+ msk = 0;
+ for (j = 17; j > 0; j--)
+ {
+ a = *((uint32*)ref); /* load 4 bytes */
+ b = (a >> 8) & 0xFF00FF; /* second and fourth byte */
+ a &= 0xFF00FF;
+
+ c = *((uint32*)(ref + 120));
+ d = (c >> 8) & 0xFF00FF;
+ c &= 0xFF00FF;
+
+ a += c;
+ b += d;
+
+ e = *((uint32*)(ref + 72)); /* e, f */
+ f = (e >> 8) & 0xFF00FF;
+ e &= 0xFF00FF;
+
+ c = *((uint32*)(ref + 48)); /* c, d */
+ d = (c >> 8) & 0xFF00FF;
+ c &= 0xFF00FF;
+
+ c += e;
+ d += f;
+
+ a += 20 * c;
+ b += 20 * d;
+ a += 0x100010;
+ b += 0x100010;
+
+ e = *((uint32*)(ref += 24)); /* e, f */
+ f = (e >> 8) & 0xFF00FF;
+ e &= 0xFF00FF;
+
+ c = *((uint32*)(ref + 72)); /* c, d */
+ d = (c >> 8) & 0xFF00FF;
+ c &= 0xFF00FF;
+
+ c += e;
+ d += f;
+
+ a -= 5 * c;
+ b -= 5 * d;
+
+ c = a << 16;
+ d = b << 16;
+ CLIP_UPPER16(a)
+ CLIP_UPPER16(c)
+ CLIP_UPPER16(b)
+ CLIP_UPPER16(d)
+
+ a |= (c >> 16);
+ b |= (d >> 16);
+ // a>>=5;
+ // b>>=5;
+ /* clip */
+ // msk |= b; msk|=a;
+ // a &= 0xFF00FF;
+ // b &= 0xFF00FF;
+ a |= (b << 8); /* pack it back */
+
+ *((uint16*)(dst += 24)) = a & 0xFFFF; //dst is not word-aligned.
+ *((uint16*)(dst + 2)) = a >> 16;
+
+ }
+ dst -= 404; // 24*17-4
+ ref -= 404;
+ /* if(msk & 0xFF00FF00) // need clipping
+ {
+ VertInterpWClip(dst,ref); // re-do 4 column with clip
+ }*/
+ }
+
+ return ;
+}
+
+void VertInterpWClip(uint8 *dst, uint8 *ref)
+{
+ int i, j;
+ int a, b, c, d, e, f;
+ int32 tmp32;
+
+ dst -= 4;
+ ref -= 4;
+
+ for (i = 4; i > 0; i--)
+ {
+ for (j = 16; j > 0; j -= 4)
+ {
+ a = *ref;
+ b = *(ref += 24);
+ c = *(ref += 24);
+ d = *(ref += 24);
+ e = *(ref += 24);
+ f = *(ref += 24);
+
+ tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ a = *(ref += 24);
+ tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ b = *(ref += 24);
+ tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ c = *(ref += 24);
+ tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ *(dst += 24) = tmp32; // 10th
+
+ ref -= (24 << 2);
+ }
+
+ d = ref[120]; // 24*5
+ tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
+ tmp32 = (tmp32 + 16) >> 5;
+ CLIP_RESULT(tmp32)
+ dst[24] = tmp32; // 10th
+
+ dst -= ((24 << 4) - 1);
+ ref -= ((24 << 4) - 1);
+ }
+
+ return ;
+}
+
+
+void GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_cand, int hpel_pos)
+{
+ // for even value of hpel_pos, start with pattern 1, otherwise, start with pattern 2
+ int i, j;
+
+ uint8 *c1 = qpel_cand;
+ uint8 *tl = bilin_base[0];
+ uint8 *tr = bilin_base[1];
+ uint8 *bl = bilin_base[2];
+ uint8 *br = bilin_base[3];
+ int a, b, c, d;
+ int offset = 1 - (384 * 7);
+
+ if (!(hpel_pos&1)) // diamond pattern
+ {
+ j = 16;
+ while (j--)
+ {
+ i = 16;
+ while (i--)
+ {
+ d = tr[24];
+ a = *tr++;
+ b = bl[1];
+ c = *br++;
+
+ *c1 = (c + a + 1) >> 1;
+ *(c1 += 384) = (b + a + 1) >> 1; /* c2 */
+ *(c1 += 384) = (b + c + 1) >> 1; /* c3 */
+ *(c1 += 384) = (b + d + 1) >> 1; /* c4 */
+
+ b = *bl++;
+
+ *(c1 += 384) = (c + d + 1) >> 1; /* c5 */
+ *(c1 += 384) = (b + d + 1) >> 1; /* c6 */
+ *(c1 += 384) = (b + c + 1) >> 1; /* c7 */
+ *(c1 += 384) = (b + a + 1) >> 1; /* c8 */
+
+ c1 += offset;
+ }
+ // advance to the next line, pitch is 24
+ tl += 8;
+ tr += 8;
+ bl += 8;
+ br += 8;
+ c1 += 8;
+ }
+ }
+ else // star pattern
+ {
+ j = 16;
+ while (j--)
+ {
+ i = 16;
+ while (i--)
+ {
+ a = *br++;
+ b = *tr++;
+ c = tl[1];
+ *c1 = (a + b + 1) >> 1;
+ b = bl[1];
+ *(c1 += 384) = (a + c + 1) >> 1; /* c2 */
+ c = tl[25];
+ *(c1 += 384) = (a + b + 1) >> 1; /* c3 */
+ b = tr[23];
+ *(c1 += 384) = (a + c + 1) >> 1; /* c4 */
+ c = tl[24];
+ *(c1 += 384) = (a + b + 1) >> 1; /* c5 */
+ b = *bl++;
+ *(c1 += 384) = (a + c + 1) >> 1; /* c6 */
+ c = *tl++;
+ *(c1 += 384) = (a + b + 1) >> 1; /* c7 */
+ *(c1 += 384) = (a + c + 1) >> 1; /* c8 */
+
+ c1 += offset;
+ }
+ // advance to the next line, pitch is 24
+ tl += 8;
+ tr += 8;
+ bl += 8;
+ br += 8;
+ c1 += 8;
+ }
+ }
+
+ return ;
+}
+
+
+/* assuming cand always has a pitch of 24 */
+int SATD_MB(uint8 *cand, uint8 *cur, int dmin)
+{
+ int cost;
+
+
+ dmin = (dmin << 16) | 24;
+ cost = AVCSAD_Macroblock_C(cand, cur, dmin, NULL);
+
+ return cost;
+}
+
+
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/header.cpp b/media/libstagefright/codecs/avc/enc/src/header.cpp
new file mode 100644
index 0000000..9acff9e
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/header.cpp
@@ -0,0 +1,917 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_api.h"
+
+/** see subclause 7.4.2.1 */
+/* no need for checking the valid range , already done in SetEncodeParam(),
+if we have to send another SPS, the ranges should be verified first before
+users call PVAVCEncodeSPS() */
+AVCEnc_Status EncodeSPS(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCSeqParamSet *seqParam = video->currSeqParams;
+ AVCVUIParams *vui = &(seqParam->vui_parameters);
+ int i;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+
+ //DEBUG_LOG(userData,AVC_LOGTYPE_INFO,"EncodeSPS",-1,-1);
+
+ status = BitstreamWriteBits(stream, 8, seqParam->profile_idc);
+ status = BitstreamWrite1Bit(stream, seqParam->constrained_set0_flag);
+ status = BitstreamWrite1Bit(stream, seqParam->constrained_set1_flag);
+ status = BitstreamWrite1Bit(stream, seqParam->constrained_set2_flag);
+ status = BitstreamWrite1Bit(stream, seqParam->constrained_set3_flag);
+ status = BitstreamWriteBits(stream, 4, 0); /* forbidden zero bits */
+ if (status != AVCENC_SUCCESS) /* we can check after each write also */
+ {
+ return status;
+ }
+
+ status = BitstreamWriteBits(stream, 8, seqParam->level_idc);
+ status = ue_v(stream, seqParam->seq_parameter_set_id);
+ status = ue_v(stream, seqParam->log2_max_frame_num_minus4);
+ status = ue_v(stream, seqParam->pic_order_cnt_type);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ if (seqParam->pic_order_cnt_type == 0)
+ {
+ status = ue_v(stream, seqParam->log2_max_pic_order_cnt_lsb_minus4);
+ }
+ else if (seqParam->pic_order_cnt_type == 1)
+ {
+ status = BitstreamWrite1Bit(stream, seqParam->delta_pic_order_always_zero_flag);
+ status = se_v(stream, seqParam->offset_for_non_ref_pic); /* upto 32 bits */
+ status = se_v(stream, seqParam->offset_for_top_to_bottom_field); /* upto 32 bits */
+ status = ue_v(stream, seqParam->num_ref_frames_in_pic_order_cnt_cycle);
+
+ for (i = 0; i < (int)(seqParam->num_ref_frames_in_pic_order_cnt_cycle); i++)
+ {
+ status = se_v(stream, seqParam->offset_for_ref_frame[i]); /* upto 32 bits */
+ }
+ }
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = ue_v(stream, seqParam->num_ref_frames);
+ status = BitstreamWrite1Bit(stream, seqParam->gaps_in_frame_num_value_allowed_flag);
+ status = ue_v(stream, seqParam->pic_width_in_mbs_minus1);
+ status = ue_v(stream, seqParam->pic_height_in_map_units_minus1);
+ status = BitstreamWrite1Bit(stream, seqParam->frame_mbs_only_flag);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ /* if frame_mbs_only_flag is 0, then write, mb_adaptive_frame_field_frame here */
+
+ status = BitstreamWrite1Bit(stream, seqParam->direct_8x8_inference_flag);
+ status = BitstreamWrite1Bit(stream, seqParam->frame_cropping_flag);
+ if (seqParam->frame_cropping_flag)
+ {
+ status = ue_v(stream, seqParam->frame_crop_left_offset);
+ status = ue_v(stream, seqParam->frame_crop_right_offset);
+ status = ue_v(stream, seqParam->frame_crop_top_offset);
+ status = ue_v(stream, seqParam->frame_crop_bottom_offset);
+ }
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = BitstreamWrite1Bit(stream, seqParam->vui_parameters_present_flag);
+ if (seqParam->vui_parameters_present_flag)
+ {
+ /* not supported */
+ //return AVCENC_SPS_FAIL;
+ EncodeVUI(stream, vui);
+ }
+
+ return status;
+}
+
+
+void EncodeVUI(AVCEncBitstream* stream, AVCVUIParams* vui)
+{
+ int temp;
+
+ temp = vui->aspect_ratio_info_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWriteBits(stream, 8, vui->aspect_ratio_idc);
+ if (vui->aspect_ratio_idc == 255)
+ {
+ BitstreamWriteBits(stream, 16, vui->sar_width);
+ BitstreamWriteBits(stream, 16, vui->sar_height);
+ }
+ }
+ temp = vui->overscan_info_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWrite1Bit(stream, vui->overscan_appropriate_flag);
+ }
+ temp = vui->video_signal_type_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWriteBits(stream, 3, vui->video_format);
+ BitstreamWrite1Bit(stream, vui->video_full_range_flag);
+ temp = vui->colour_description_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWriteBits(stream, 8, vui->colour_primaries);
+ BitstreamWriteBits(stream, 8, vui->transfer_characteristics);
+ BitstreamWriteBits(stream, 8, vui->matrix_coefficients);
+ }
+ }
+ temp = vui->chroma_location_info_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ ue_v(stream, vui->chroma_sample_loc_type_top_field);
+ ue_v(stream, vui->chroma_sample_loc_type_bottom_field);
+ }
+
+ temp = vui->timing_info_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWriteBits(stream, 32, vui->num_units_in_tick);
+ BitstreamWriteBits(stream, 32, vui->time_scale);
+ BitstreamWrite1Bit(stream, vui->fixed_frame_rate_flag);
+ }
+
+ temp = vui->nal_hrd_parameters_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ EncodeHRD(stream, &(vui->nal_hrd_parameters));
+ }
+ temp = vui->vcl_hrd_parameters_present_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ EncodeHRD(stream, &(vui->vcl_hrd_parameters));
+ }
+ if (vui->nal_hrd_parameters_present_flag || vui->vcl_hrd_parameters_present_flag)
+ {
+ BitstreamWrite1Bit(stream, vui->low_delay_hrd_flag);
+ }
+ BitstreamWrite1Bit(stream, vui->pic_struct_present_flag);
+ temp = vui->bitstream_restriction_flag;
+ BitstreamWrite1Bit(stream, temp);
+ if (temp)
+ {
+ BitstreamWrite1Bit(stream, vui->motion_vectors_over_pic_boundaries_flag);
+ ue_v(stream, vui->max_bytes_per_pic_denom);
+ ue_v(stream, vui->max_bits_per_mb_denom);
+ ue_v(stream, vui->log2_max_mv_length_horizontal);
+ ue_v(stream, vui->log2_max_mv_length_vertical);
+ ue_v(stream, vui->max_dec_frame_reordering);
+ ue_v(stream, vui->max_dec_frame_buffering);
+ }
+
+ return ;
+}
+
+
+void EncodeHRD(AVCEncBitstream* stream, AVCHRDParams* hrd)
+{
+ int i;
+
+ ue_v(stream, hrd->cpb_cnt_minus1);
+ BitstreamWriteBits(stream, 4, hrd->bit_rate_scale);
+ BitstreamWriteBits(stream, 4, hrd->cpb_size_scale);
+ for (i = 0; i <= (int)hrd->cpb_cnt_minus1; i++)
+ {
+ ue_v(stream, hrd->bit_rate_value_minus1[i]);
+ ue_v(stream, hrd->cpb_size_value_minus1[i]);
+ ue_v(stream, hrd->cbr_flag[i]);
+ }
+ BitstreamWriteBits(stream, 5, hrd->initial_cpb_removal_delay_length_minus1);
+ BitstreamWriteBits(stream, 5, hrd->cpb_removal_delay_length_minus1);
+ BitstreamWriteBits(stream, 5, hrd->dpb_output_delay_length_minus1);
+ BitstreamWriteBits(stream, 5, hrd->time_offset_length);
+
+ return ;
+}
+
+
+
+/** see subclause 7.4.2.2 */
+/* no need for checking the valid range , already done in SetEncodeParam().
+If we have to send another SPS, the ranges should be verified first before
+users call PVAVCEncodeSPS()*/
+AVCEnc_Status EncodePPS(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ AVCPicParamSet *picParam = video->currPicParams;
+ int i, iGroup, numBits;
+ uint temp;
+
+ status = ue_v(stream, picParam->pic_parameter_set_id);
+ status = ue_v(stream, picParam->seq_parameter_set_id);
+ status = BitstreamWrite1Bit(stream, picParam->entropy_coding_mode_flag);
+ status = BitstreamWrite1Bit(stream, picParam->pic_order_present_flag);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = ue_v(stream, picParam->num_slice_groups_minus1);
+ if (picParam->num_slice_groups_minus1 > 0)
+ {
+ status = ue_v(stream, picParam->slice_group_map_type);
+ if (picParam->slice_group_map_type == 0)
+ {
+ for (iGroup = 0; iGroup <= (int)picParam->num_slice_groups_minus1; iGroup++)
+ {
+ status = ue_v(stream, picParam->run_length_minus1[iGroup]);
+ }
+ }
+ else if (picParam->slice_group_map_type == 2)
+ {
+ for (iGroup = 0; iGroup < (int)picParam->num_slice_groups_minus1; iGroup++)
+ {
+ status = ue_v(stream, picParam->top_left[iGroup]);
+ status = ue_v(stream, picParam->bottom_right[iGroup]);
+ }
+ }
+ else if (picParam->slice_group_map_type == 3 ||
+ picParam->slice_group_map_type == 4 ||
+ picParam->slice_group_map_type == 5)
+ {
+ status = BitstreamWrite1Bit(stream, picParam->slice_group_change_direction_flag);
+ status = ue_v(stream, picParam->slice_group_change_rate_minus1);
+ }
+ else /*if(picParam->slice_group_map_type == 6)*/
+ {
+ status = ue_v(stream, picParam->pic_size_in_map_units_minus1);
+
+ numBits = 0;/* ceil(log2(num_slice_groups_minus1+1)) bits */
+ i = picParam->num_slice_groups_minus1;
+ while (i > 0)
+ {
+ numBits++;
+ i >>= 1;
+ }
+
+ for (i = 0; i <= (int)picParam->pic_size_in_map_units_minus1; i++)
+ {
+ status = BitstreamWriteBits(stream, numBits, picParam->slice_group_id[i]);
+ }
+ }
+ }
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = ue_v(stream, picParam->num_ref_idx_l0_active_minus1);
+ status = ue_v(stream, picParam->num_ref_idx_l1_active_minus1);
+ status = BitstreamWrite1Bit(stream, picParam->weighted_pred_flag);
+ status = BitstreamWriteBits(stream, 2, picParam->weighted_bipred_idc);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = se_v(stream, picParam->pic_init_qp_minus26);
+ status = se_v(stream, picParam->pic_init_qs_minus26);
+ status = se_v(stream, picParam->chroma_qp_index_offset);
+
+ temp = picParam->deblocking_filter_control_present_flag << 2;
+ temp |= (picParam->constrained_intra_pred_flag << 1);
+ temp |= picParam->redundant_pic_cnt_present_flag;
+
+ status = BitstreamWriteBits(stream, 3, temp);
+
+ return status;
+}
+
+/** see subclause 7.4.3 */
+AVCEnc_Status EncodeSliceHeader(AVCEncObject *encvid, AVCEncBitstream *stream)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCPicParamSet *currPPS = video->currPicParams;
+ AVCSeqParamSet *currSPS = video->currSeqParams;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ int slice_type, temp, i;
+ int num_bits;
+
+ num_bits = (stream->write_pos << 3) - stream->bit_left;
+
+ status = ue_v(stream, sliceHdr->first_mb_in_slice);
+
+ slice_type = video->slice_type;
+
+ if (video->mbNum == 0) /* first mb in frame */
+ {
+ status = ue_v(stream, sliceHdr->slice_type);
+ }
+ else
+ {
+ status = ue_v(stream, slice_type);
+ }
+
+ status = ue_v(stream, sliceHdr->pic_parameter_set_id);
+
+ status = BitstreamWriteBits(stream, currSPS->log2_max_frame_num_minus4 + 4, sliceHdr->frame_num);
+
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ /* if frame_mbs_only_flag is 0, encode field_pic_flag, bottom_field_flag here */
+
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ status = ue_v(stream, sliceHdr->idr_pic_id);
+ }
+
+ if (currSPS->pic_order_cnt_type == 0)
+ {
+ status = BitstreamWriteBits(stream, currSPS->log2_max_pic_order_cnt_lsb_minus4 + 4,
+ sliceHdr->pic_order_cnt_lsb);
+
+ if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag)
+ {
+ status = se_v(stream, sliceHdr->delta_pic_order_cnt_bottom); /* 32 bits */
+ }
+ }
+ if (currSPS->pic_order_cnt_type == 1 && !currSPS->delta_pic_order_always_zero_flag)
+ {
+ status = se_v(stream, sliceHdr->delta_pic_order_cnt[0]); /* 32 bits */
+ if (currPPS->pic_order_present_flag && !sliceHdr->field_pic_flag)
+ {
+ status = se_v(stream, sliceHdr->delta_pic_order_cnt[1]); /* 32 bits */
+ }
+ }
+
+ if (currPPS->redundant_pic_cnt_present_flag)
+ {
+ status = ue_v(stream, sliceHdr->redundant_pic_cnt);
+ }
+
+ if (slice_type == AVC_B_SLICE)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->direct_spatial_mv_pred_flag);
+ }
+
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ if (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE || slice_type == AVC_B_SLICE)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->num_ref_idx_active_override_flag);
+ if (sliceHdr->num_ref_idx_active_override_flag)
+ {
+ /* we shouldn't enter this part at all */
+ status = ue_v(stream, sliceHdr->num_ref_idx_l0_active_minus1);
+ if (slice_type == AVC_B_SLICE)
+ {
+ status = ue_v(stream, sliceHdr->num_ref_idx_l1_active_minus1);
+ }
+ }
+ }
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ /* ref_pic_list_reordering() */
+ status = ref_pic_list_reordering(video, stream, sliceHdr, slice_type);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) ||
+ (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE))
+ {
+ // pred_weight_table(); // not supported !!
+ return AVCENC_PRED_WEIGHT_TAB_FAIL;
+ }
+
+ if (video->nal_ref_idc != 0)
+ {
+ status = dec_ref_pic_marking(video, stream, sliceHdr);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+
+ if (currPPS->entropy_coding_mode_flag && slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+ {
+ return AVCENC_CABAC_FAIL;
+ /* ue_v(stream,&(sliceHdr->cabac_init_idc));
+ if(sliceHdr->cabac_init_idc > 2){
+ // not supported !!!!
+ }*/
+ }
+
+ status = se_v(stream, sliceHdr->slice_qp_delta);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ if (slice_type == AVC_SP_SLICE || slice_type == AVC_SI_SLICE)
+ {
+ if (slice_type == AVC_SP_SLICE)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->sp_for_switch_flag);
+ /* if sp_for_switch_flag is 0, P macroblocks in SP slice is decoded using
+ SP decoding process for non-switching pictures in 8.6.1 */
+ /* else, P macroblocks in SP slice is decoded using SP and SI decoding
+ process for switching picture in 8.6.2 */
+ }
+ status = se_v(stream, sliceHdr->slice_qs_delta);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+
+ if (currPPS->deblocking_filter_control_present_flag)
+ {
+
+ status = ue_v(stream, sliceHdr->disable_deblocking_filter_idc);
+
+ if (sliceHdr->disable_deblocking_filter_idc != 1)
+ {
+ status = se_v(stream, sliceHdr->slice_alpha_c0_offset_div2);
+
+ status = se_v(stream, sliceHdr->slice_beta_offset_div_2);
+ }
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+
+ if (currPPS->num_slice_groups_minus1 > 0 && currPPS->slice_group_map_type >= 3
+ && currPPS->slice_group_map_type <= 5)
+ {
+ /* Ceil(Log2(PicSizeInMapUnits/(float)SliceGroupChangeRate + 1)) */
+ temp = video->PicSizeInMapUnits / video->SliceGroupChangeRate;
+ if (video->PicSizeInMapUnits % video->SliceGroupChangeRate)
+ {
+ temp++;
+ }
+ i = 0;
+ while (temp > 1)
+ {
+ temp >>= 1;
+ i++;
+ }
+
+ BitstreamWriteBits(stream, i, sliceHdr->slice_group_change_cycle);
+ }
+
+
+ encvid->rateCtrl->NumberofHeaderBits += (stream->write_pos << 3) - stream->bit_left - num_bits;
+
+ return AVCENC_SUCCESS;
+}
+
+/** see subclause 7.4.3.1 */
+AVCEnc_Status ref_pic_list_reordering(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr, int slice_type)
+{
+ (void)(video);
+ int i;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+
+ if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l0);
+ if (sliceHdr->ref_pic_list_reordering_flag_l0)
+ {
+ i = 0;
+ do
+ {
+ status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l0[i]);
+ if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 ||
+ sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1)
+ {
+ status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l0[i]);
+ /* this check should be in InitSlice(), if we ever use it */
+ /*if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 0 &&
+ sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -1)
+ {
+ return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+ }
+ if(sliceHdr->reordering_of_pic_nums_idc_l0[i] == 1 &&
+ sliceHdr->abs_diff_pic_num_minus1_l0[i] > video->MaxPicNum/2 -2)
+ {
+ return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+ }*/
+ }
+ else if (sliceHdr->reordering_of_pic_nums_idc_l0[i] == 2)
+ {
+ status = ue_v(stream, sliceHdr->long_term_pic_num_l0[i]);
+ }
+ i++;
+ }
+ while (sliceHdr->reordering_of_pic_nums_idc_l0[i] != 3
+ && i <= (int)sliceHdr->num_ref_idx_l0_active_minus1 + 1) ;
+ }
+ }
+ if (slice_type == AVC_B_SLICE)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->ref_pic_list_reordering_flag_l1);
+ if (sliceHdr->ref_pic_list_reordering_flag_l1)
+ {
+ i = 0;
+ do
+ {
+ status = ue_v(stream, sliceHdr->reordering_of_pic_nums_idc_l1[i]);
+ if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 ||
+ sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1)
+ {
+ status = ue_v(stream, sliceHdr->abs_diff_pic_num_minus1_l1[i]);
+ /* This check should be in InitSlice() if we ever use it
+ if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 0 &&
+ sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -1)
+ {
+ return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+ }
+ if(sliceHdr->reordering_of_pic_nums_idc_l1[i] == 1 &&
+ sliceHdr->abs_diff_pic_num_minus1_l1[i] > video->MaxPicNum/2 -2)
+ {
+ return AVCENC_REF_PIC_REORDER_FAIL; // out of range
+ }*/
+ }
+ else if (sliceHdr->reordering_of_pic_nums_idc_l1[i] == 2)
+ {
+ status = ue_v(stream, sliceHdr->long_term_pic_num_l1[i]);
+ }
+ i++;
+ }
+ while (sliceHdr->reordering_of_pic_nums_idc_l1[i] != 3
+ && i <= (int)sliceHdr->num_ref_idx_l1_active_minus1 + 1) ;
+ }
+ }
+
+ return status;
+}
+
+/** see subclause 7.4.3.3 */
+AVCEnc_Status dec_ref_pic_marking(AVCCommonObj *video, AVCEncBitstream *stream, AVCSliceHeader *sliceHdr)
+{
+ int i;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->no_output_of_prior_pics_flag);
+ status = BitstreamWrite1Bit(stream, sliceHdr->long_term_reference_flag);
+ if (sliceHdr->long_term_reference_flag == 0) /* used for short-term */
+ {
+ video->MaxLongTermFrameIdx = -1; /* no long-term frame indx */
+ }
+ else /* used for long-term */
+ {
+ video->MaxLongTermFrameIdx = 0;
+ video->LongTermFrameIdx = 0;
+ }
+ }
+ else
+ {
+ status = BitstreamWrite1Bit(stream, sliceHdr->adaptive_ref_pic_marking_mode_flag); /* default to zero */
+ if (sliceHdr->adaptive_ref_pic_marking_mode_flag)
+ {
+ i = 0;
+ do
+ {
+ status = ue_v(stream, sliceHdr->memory_management_control_operation[i]);
+ if (sliceHdr->memory_management_control_operation[i] == 1 ||
+ sliceHdr->memory_management_control_operation[i] == 3)
+ {
+ status = ue_v(stream, sliceHdr->difference_of_pic_nums_minus1[i]);
+ }
+ if (sliceHdr->memory_management_control_operation[i] == 2)
+ {
+ status = ue_v(stream, sliceHdr->long_term_pic_num[i]);
+ }
+ if (sliceHdr->memory_management_control_operation[i] == 3 ||
+ sliceHdr->memory_management_control_operation[i] == 6)
+ {
+ status = ue_v(stream, sliceHdr->long_term_frame_idx[i]);
+ }
+ if (sliceHdr->memory_management_control_operation[i] == 4)
+ {
+ status = ue_v(stream, sliceHdr->max_long_term_frame_idx_plus1[i]);
+ }
+ i++;
+ }
+ while (sliceHdr->memory_management_control_operation[i] != 0 && i < MAX_DEC_REF_PIC_MARKING);
+ if (i >= MAX_DEC_REF_PIC_MARKING && sliceHdr->memory_management_control_operation[i] != 0)
+ {
+ return AVCENC_DEC_REF_PIC_MARK_FAIL; /* we're screwed!!, not enough memory */
+ }
+ }
+ }
+
+ return status;
+}
+
+/* see subclause 8.2.1 Decoding process for picture order count.
+See also PostPOC() for initialization of some variables. */
+AVCEnc_Status InitPOC(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCSeqParamSet *currSPS = video->currSeqParams;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCFrameIO *currInput = encvid->currInput;
+ int i;
+
+ switch (currSPS->pic_order_cnt_type)
+ {
+ case 0: /* POC MODE 0 , subclause 8.2.1.1 */
+ /* encoding part */
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ encvid->dispOrdPOCRef = currInput->disp_order;
+ }
+ while (currInput->disp_order < encvid->dispOrdPOCRef)
+ {
+ encvid->dispOrdPOCRef -= video->MaxPicOrderCntLsb;
+ }
+ sliceHdr->pic_order_cnt_lsb = currInput->disp_order - encvid->dispOrdPOCRef;
+ while (sliceHdr->pic_order_cnt_lsb >= video->MaxPicOrderCntLsb)
+ {
+ sliceHdr->pic_order_cnt_lsb -= video->MaxPicOrderCntLsb;
+ }
+ /* decoding part */
+ /* Calculate the MSBs of current picture */
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ video->prevPicOrderCntMsb = 0;
+ video->prevPicOrderCntLsb = 0;
+ }
+ if (sliceHdr->pic_order_cnt_lsb < video->prevPicOrderCntLsb &&
+ (video->prevPicOrderCntLsb - sliceHdr->pic_order_cnt_lsb) >= (video->MaxPicOrderCntLsb / 2))
+ video->PicOrderCntMsb = video->prevPicOrderCntMsb + video->MaxPicOrderCntLsb;
+ else if (sliceHdr->pic_order_cnt_lsb > video->prevPicOrderCntLsb &&
+ (sliceHdr->pic_order_cnt_lsb - video->prevPicOrderCntLsb) > (video->MaxPicOrderCntLsb / 2))
+ video->PicOrderCntMsb = video->prevPicOrderCntMsb - video->MaxPicOrderCntLsb;
+ else
+ video->PicOrderCntMsb = video->prevPicOrderCntMsb;
+
+ /* JVT-I010 page 81 is different from JM7.3 */
+ if (!sliceHdr->field_pic_flag || !sliceHdr->bottom_field_flag)
+ {
+ video->PicOrderCnt = video->TopFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb;
+ }
+
+ if (!sliceHdr->field_pic_flag)
+ {
+ video->BottomFieldOrderCnt = video->TopFieldOrderCnt + sliceHdr->delta_pic_order_cnt_bottom;
+ }
+ else if (sliceHdr->bottom_field_flag)
+ {
+ video->PicOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCntMsb + sliceHdr->pic_order_cnt_lsb;
+ }
+
+ if (!sliceHdr->field_pic_flag)
+ {
+ video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt);
+ }
+
+ if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag)
+ {
+ sliceHdr->delta_pic_order_cnt_bottom = 0; /* defaulted to zero */
+ }
+
+ break;
+ case 1: /* POC MODE 1, subclause 8.2.1.2 */
+ /* calculate FrameNumOffset */
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ encvid->dispOrdPOCRef = currInput->disp_order; /* reset the reference point */
+ video->prevFrameNumOffset = 0;
+ video->FrameNumOffset = 0;
+ }
+ else if (video->prevFrameNum > sliceHdr->frame_num)
+ {
+ video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum;
+ }
+ else
+ {
+ video->FrameNumOffset = video->prevFrameNumOffset;
+ }
+ /* calculate absFrameNum */
+ if (currSPS->num_ref_frames_in_pic_order_cnt_cycle)
+ {
+ video->absFrameNum = video->FrameNumOffset + sliceHdr->frame_num;
+ }
+ else
+ {
+ video->absFrameNum = 0;
+ }
+
+ if (video->absFrameNum > 0 && video->nal_ref_idc == 0)
+ {
+ video->absFrameNum--;
+ }
+
+ /* derive picOrderCntCycleCnt and frameNumInPicOrderCntCycle */
+ if (video->absFrameNum > 0)
+ {
+ video->picOrderCntCycleCnt = (video->absFrameNum - 1) / currSPS->num_ref_frames_in_pic_order_cnt_cycle;
+ video->frameNumInPicOrderCntCycle = (video->absFrameNum - 1) % currSPS->num_ref_frames_in_pic_order_cnt_cycle;
+ }
+ /* derive expectedDeltaPerPicOrderCntCycle, this value can be computed up front. */
+ video->expectedDeltaPerPicOrderCntCycle = 0;
+ for (i = 0; i < (int)currSPS->num_ref_frames_in_pic_order_cnt_cycle; i++)
+ {
+ video->expectedDeltaPerPicOrderCntCycle += currSPS->offset_for_ref_frame[i];
+ }
+ /* derive expectedPicOrderCnt */
+ if (video->absFrameNum)
+ {
+ video->expectedPicOrderCnt = video->picOrderCntCycleCnt * video->expectedDeltaPerPicOrderCntCycle;
+ for (i = 0; i <= video->frameNumInPicOrderCntCycle; i++)
+ {
+ video->expectedPicOrderCnt += currSPS->offset_for_ref_frame[i];
+ }
+ }
+ else
+ {
+ video->expectedPicOrderCnt = 0;
+ }
+
+ if (video->nal_ref_idc == 0)
+ {
+ video->expectedPicOrderCnt += currSPS->offset_for_non_ref_pic;
+ }
+ /* derive TopFieldOrderCnt and BottomFieldOrderCnt */
+ /* encoding part */
+ if (!currSPS->delta_pic_order_always_zero_flag)
+ {
+ sliceHdr->delta_pic_order_cnt[0] = currInput->disp_order - encvid->dispOrdPOCRef - video->expectedPicOrderCnt;
+
+ if (video->currPicParams->pic_order_present_flag && !sliceHdr->field_pic_flag)
+ {
+ sliceHdr->delta_pic_order_cnt[1] = sliceHdr->delta_pic_order_cnt[0]; /* should be calculated from currInput->bottom_field->disp_order */
+ }
+ else
+ {
+ sliceHdr->delta_pic_order_cnt[1] = 0;
+ }
+ }
+ else
+ {
+ sliceHdr->delta_pic_order_cnt[0] = sliceHdr->delta_pic_order_cnt[1] = 0;
+ }
+
+ if (sliceHdr->field_pic_flag == 0)
+ {
+ video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0];
+ video->BottomFieldOrderCnt = video->TopFieldOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[1];
+
+ video->PicOrderCnt = AVC_MIN(video->TopFieldOrderCnt, video->BottomFieldOrderCnt);
+ }
+ else if (sliceHdr->bottom_field_flag == 0)
+ {
+ video->TopFieldOrderCnt = video->expectedPicOrderCnt + sliceHdr->delta_pic_order_cnt[0];
+ video->PicOrderCnt = video->TopFieldOrderCnt;
+ }
+ else
+ {
+ video->BottomFieldOrderCnt = video->expectedPicOrderCnt + currSPS->offset_for_top_to_bottom_field + sliceHdr->delta_pic_order_cnt[0];
+ video->PicOrderCnt = video->BottomFieldOrderCnt;
+ }
+ break;
+
+
+ case 2: /* POC MODE 2, subclause 8.2.1.3 */
+ /* decoding order must be the same as display order */
+ /* we don't check for that. The decoder will just output in decoding order. */
+ /* Check for 2 consecutive non-reference frame */
+ if (video->nal_ref_idc == 0)
+ {
+ if (encvid->dispOrdPOCRef == 1)
+ {
+ return AVCENC_CONSECUTIVE_NONREF;
+ }
+ encvid->dispOrdPOCRef = 1; /* act as a flag for non ref */
+ }
+ else
+ {
+ encvid->dispOrdPOCRef = 0;
+ }
+
+
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ video->FrameNumOffset = 0;
+ }
+ else if (video->prevFrameNum > sliceHdr->frame_num)
+ {
+ video->FrameNumOffset = video->prevFrameNumOffset + video->MaxFrameNum;
+ }
+ else
+ {
+ video->FrameNumOffset = video->prevFrameNumOffset;
+ }
+ /* derive tempPicOrderCnt, we just use PicOrderCnt */
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ video->PicOrderCnt = 0;
+ }
+ else if (video->nal_ref_idc == 0)
+ {
+ video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num) - 1;
+ }
+ else
+ {
+ video->PicOrderCnt = 2 * (video->FrameNumOffset + sliceHdr->frame_num);
+ }
+ /* derive TopFieldOrderCnt and BottomFieldOrderCnt */
+ if (sliceHdr->field_pic_flag == 0)
+ {
+ video->TopFieldOrderCnt = video->BottomFieldOrderCnt = video->PicOrderCnt;
+ }
+ else if (sliceHdr->bottom_field_flag)
+ {
+ video->BottomFieldOrderCnt = video->PicOrderCnt;
+ }
+ else
+ {
+ video->TopFieldOrderCnt = video->PicOrderCnt;
+ }
+ break;
+ default:
+ return AVCENC_POC_FAIL;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+/** see subclause 8.2.1 */
+AVCEnc_Status PostPOC(AVCCommonObj *video)
+{
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCSeqParamSet *currSPS = video->currSeqParams;
+
+ video->prevFrameNum = sliceHdr->frame_num;
+
+ switch (currSPS->pic_order_cnt_type)
+ {
+ case 0: /* subclause 8.2.1.1 */
+ if (video->mem_mgr_ctrl_eq_5)
+ {
+ video->prevPicOrderCntMsb = 0;
+ video->prevPicOrderCntLsb = video->TopFieldOrderCnt;
+ }
+ else
+ {
+ video->prevPicOrderCntMsb = video->PicOrderCntMsb;
+ video->prevPicOrderCntLsb = sliceHdr->pic_order_cnt_lsb;
+ }
+ break;
+ case 1: /* subclause 8.2.1.2 and 8.2.1.3 */
+ case 2:
+ if (video->mem_mgr_ctrl_eq_5)
+ {
+ video->prevFrameNumOffset = 0;
+ }
+ else
+ {
+ video->prevFrameNumOffset = video->FrameNumOffset;
+ }
+ break;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
diff --git a/media/libstagefright/codecs/avc/enc/src/init.cpp b/media/libstagefright/codecs/avc/enc/src/init.cpp
new file mode 100644
index 0000000..c258b57
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/init.cpp
@@ -0,0 +1,899 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_api.h"
+
+#define LOG2_MAX_FRAME_NUM_MINUS4 12 /* 12 default */
+#define SLICE_GROUP_CHANGE_CYCLE 1 /* default */
+
+/* initialized variables to be used in SPS*/
+AVCEnc_Status SetEncodeParam(AVCHandle* avcHandle, AVCEncParams* encParam,
+ void* extSPS, void* extPPS)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCCommonObj *video = encvid->common;
+ AVCSeqParamSet *seqParam = video->currSeqParams;
+ AVCPicParamSet *picParam = video->currPicParams;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCEnc_Status status;
+ void *userData = avcHandle->userData;
+ int ii, maxFrameNum;
+
+ AVCSeqParamSet* extS = NULL;
+ AVCPicParamSet* extP = NULL;
+
+ if (extSPS) extS = (AVCSeqParamSet*) extSPS;
+ if (extPPS) extP = (AVCPicParamSet*) extPPS;
+
+ /* This part sets the default values of the encoding options this
+ library supports in seqParam, picParam and sliceHdr structures and
+ also copy the values from the encParam into the above 3 structures.
+
+ Some parameters will be assigned later when we encode SPS or PPS such as
+ the seq_parameter_id or pic_parameter_id. Also some of the slice parameters
+ have to be re-assigned per slice basis such as frame_num, slice_type,
+ first_mb_in_slice, pic_order_cnt_lsb, slice_qp_delta, slice_group_change_cycle */
+
+ /* profile_idc, constrained_setx_flag and level_idc is set by VerifyProfile(),
+ and VerifyLevel() functions later. */
+
+ encvid->fullsearch_enable = encParam->fullsearch;
+
+ encvid->outOfBandParamSet = ((encParam->out_of_band_param_set == AVC_ON) ? TRUE : FALSE);
+
+ /* parameters derived from the the encParam that are used in SPS */
+ if (extS)
+ {
+ video->MaxPicOrderCntLsb = 1 << (extS->log2_max_pic_order_cnt_lsb_minus4 + 4);
+ video->PicWidthInMbs = extS->pic_width_in_mbs_minus1 + 1;
+ video->PicHeightInMapUnits = extS->pic_height_in_map_units_minus1 + 1 ;
+ video->FrameHeightInMbs = (2 - extS->frame_mbs_only_flag) * video->PicHeightInMapUnits ;
+ }
+ else
+ {
+ video->MaxPicOrderCntLsb = 1 << (encParam->log2_max_poc_lsb_minus_4 + 4);
+ video->PicWidthInMbs = (encParam->width + 15) >> 4; /* round it to multiple of 16 */
+ video->FrameHeightInMbs = (encParam->height + 15) >> 4; /* round it to multiple of 16 */
+ video->PicHeightInMapUnits = video->FrameHeightInMbs;
+ }
+
+ video->PicWidthInSamplesL = video->PicWidthInMbs * 16 ;
+ if (video->PicWidthInSamplesL + 32 > 0xFFFF)
+ {
+ return AVCENC_NOT_SUPPORTED; // we use 2-bytes for pitch
+ }
+
+ video->PicWidthInSamplesC = video->PicWidthInMbs * 8 ;
+ video->PicHeightInMbs = video->FrameHeightInMbs;
+ video->PicSizeInMapUnits = video->PicWidthInMbs * video->PicHeightInMapUnits ;
+ video->PicHeightInSamplesL = video->PicHeightInMbs * 16;
+ video->PicHeightInSamplesC = video->PicHeightInMbs * 8;
+ video->PicSizeInMbs = video->PicWidthInMbs * video->PicHeightInMbs;
+
+ if (!extS && !extP)
+ {
+ maxFrameNum = (encParam->idr_period == -1) ? (1 << 16) : encParam->idr_period;
+ ii = 0;
+ while (maxFrameNum > 0)
+ {
+ ii++;
+ maxFrameNum >>= 1;
+ }
+ if (ii < 4) ii = 4;
+ else if (ii > 16) ii = 16;
+
+ seqParam->log2_max_frame_num_minus4 = ii - 4;//LOG2_MAX_FRAME_NUM_MINUS4; /* default */
+
+ video->MaxFrameNum = 1 << ii; //(LOG2_MAX_FRAME_NUM_MINUS4 + 4); /* default */
+ video->MaxPicNum = video->MaxFrameNum;
+
+ /************* set the SPS *******************/
+ seqParam->seq_parameter_set_id = 0; /* start with zero */
+ /* POC */
+ seqParam->pic_order_cnt_type = encParam->poc_type; /* POC type */
+ if (encParam->poc_type == 0)
+ {
+ if (/*encParam->log2_max_poc_lsb_minus_4<0 || (no need, it's unsigned)*/
+ encParam->log2_max_poc_lsb_minus_4 > 12)
+ {
+ return AVCENC_INVALID_POC_LSB;
+ }
+ seqParam->log2_max_pic_order_cnt_lsb_minus4 = encParam->log2_max_poc_lsb_minus_4;
+ }
+ else if (encParam->poc_type == 1)
+ {
+ seqParam->delta_pic_order_always_zero_flag = encParam->delta_poc_zero_flag;
+ seqParam->offset_for_non_ref_pic = encParam->offset_poc_non_ref;
+ seqParam->offset_for_top_to_bottom_field = encParam->offset_top_bottom;
+ seqParam->num_ref_frames_in_pic_order_cnt_cycle = encParam->num_ref_in_cycle;
+ if (encParam->offset_poc_ref == NULL)
+ {
+ return AVCENC_ENCPARAM_MEM_FAIL;
+ }
+ for (ii = 0; ii < encParam->num_ref_frame; ii++)
+ {
+ seqParam->offset_for_ref_frame[ii] = encParam->offset_poc_ref[ii];
+ }
+ }
+ /* number of reference frame */
+ if (encParam->num_ref_frame > 16 || encParam->num_ref_frame < 0)
+ {
+ return AVCENC_INVALID_NUM_REF;
+ }
+ seqParam->num_ref_frames = encParam->num_ref_frame; /* num reference frame range 0...16*/
+ seqParam->gaps_in_frame_num_value_allowed_flag = FALSE;
+ seqParam->pic_width_in_mbs_minus1 = video->PicWidthInMbs - 1;
+ seqParam->pic_height_in_map_units_minus1 = video->PicHeightInMapUnits - 1;
+ seqParam->frame_mbs_only_flag = TRUE;
+ seqParam->mb_adaptive_frame_field_flag = FALSE;
+ seqParam->direct_8x8_inference_flag = FALSE; /* default */
+ seqParam->frame_cropping_flag = FALSE;
+ seqParam->frame_crop_bottom_offset = 0;
+ seqParam->frame_crop_left_offset = 0;
+ seqParam->frame_crop_right_offset = 0;
+ seqParam->frame_crop_top_offset = 0;
+ seqParam->vui_parameters_present_flag = FALSE; /* default */
+ }
+ else if (extS) // use external SPS and PPS
+ {
+ seqParam->seq_parameter_set_id = extS->seq_parameter_set_id;
+ seqParam->log2_max_frame_num_minus4 = extS->log2_max_frame_num_minus4;
+ video->MaxFrameNum = 1 << (extS->log2_max_frame_num_minus4 + 4);
+ video->MaxPicNum = video->MaxFrameNum;
+ if (encParam->idr_period > (int)(video->MaxFrameNum) || (encParam->idr_period == -1))
+ {
+ encParam->idr_period = (int)video->MaxFrameNum;
+ }
+
+ seqParam->pic_order_cnt_type = extS->pic_order_cnt_type;
+ if (seqParam->pic_order_cnt_type == 0)
+ {
+ if (/*extS->log2_max_pic_order_cnt_lsb_minus4<0 || (no need it's unsigned)*/
+ extS->log2_max_pic_order_cnt_lsb_minus4 > 12)
+ {
+ return AVCENC_INVALID_POC_LSB;
+ }
+ seqParam->log2_max_pic_order_cnt_lsb_minus4 = extS->log2_max_pic_order_cnt_lsb_minus4;
+ }
+ else if (seqParam->pic_order_cnt_type == 1)
+ {
+ seqParam->delta_pic_order_always_zero_flag = extS->delta_pic_order_always_zero_flag;
+ seqParam->offset_for_non_ref_pic = extS->offset_for_non_ref_pic;
+ seqParam->offset_for_top_to_bottom_field = extS->offset_for_top_to_bottom_field;
+ seqParam->num_ref_frames_in_pic_order_cnt_cycle = extS->num_ref_frames_in_pic_order_cnt_cycle;
+ if (extS->offset_for_ref_frame == NULL)
+ {
+ return AVCENC_ENCPARAM_MEM_FAIL;
+ }
+ for (ii = 0; ii < (int) extS->num_ref_frames; ii++)
+ {
+ seqParam->offset_for_ref_frame[ii] = extS->offset_for_ref_frame[ii];
+ }
+ }
+ /* number of reference frame */
+ if (extS->num_ref_frames > 16 /*|| extS->num_ref_frames<0 (no need, it's unsigned)*/)
+ {
+ return AVCENC_INVALID_NUM_REF;
+ }
+ seqParam->num_ref_frames = extS->num_ref_frames; /* num reference frame range 0...16*/
+ seqParam->gaps_in_frame_num_value_allowed_flag = extS->gaps_in_frame_num_value_allowed_flag;
+ seqParam->pic_width_in_mbs_minus1 = extS->pic_width_in_mbs_minus1;
+ seqParam->pic_height_in_map_units_minus1 = extS->pic_height_in_map_units_minus1;
+ seqParam->frame_mbs_only_flag = extS->frame_mbs_only_flag;
+ if (extS->frame_mbs_only_flag != TRUE)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+ seqParam->mb_adaptive_frame_field_flag = extS->mb_adaptive_frame_field_flag;
+ if (extS->mb_adaptive_frame_field_flag != FALSE)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ seqParam->direct_8x8_inference_flag = extS->direct_8x8_inference_flag;
+ seqParam->frame_cropping_flag = extS->frame_cropping_flag ;
+ if (extS->frame_cropping_flag != FALSE)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ seqParam->frame_crop_bottom_offset = 0;
+ seqParam->frame_crop_left_offset = 0;
+ seqParam->frame_crop_right_offset = 0;
+ seqParam->frame_crop_top_offset = 0;
+ seqParam->vui_parameters_present_flag = extS->vui_parameters_present_flag;
+ if (extS->vui_parameters_present_flag)
+ {
+ memcpy(&(seqParam->vui_parameters), &(extS->vui_parameters), sizeof(AVCVUIParams));
+ }
+ }
+ else
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ /***************** now PPS ******************************/
+ if (!extP && !extS)
+ {
+ picParam->pic_parameter_set_id = (uint)(-1); /* start with zero */
+ picParam->seq_parameter_set_id = (uint)(-1); /* start with zero */
+ picParam->entropy_coding_mode_flag = 0; /* default to CAVLC */
+ picParam->pic_order_present_flag = 0; /* default for now, will need it for B-slice */
+ /* FMO */
+ if (encParam->num_slice_group < 1 || encParam->num_slice_group > MAX_NUM_SLICE_GROUP)
+ {
+ return AVCENC_INVALID_NUM_SLICEGROUP;
+ }
+ picParam->num_slice_groups_minus1 = encParam->num_slice_group - 1;
+
+ if (picParam->num_slice_groups_minus1 > 0)
+ {
+ picParam->slice_group_map_type = encParam->fmo_type;
+ switch (encParam->fmo_type)
+ {
+ case 0:
+ for (ii = 0; ii <= (int)picParam->num_slice_groups_minus1; ii++)
+ {
+ picParam->run_length_minus1[ii] = encParam->run_length_minus1[ii];
+ }
+ break;
+ case 2:
+ for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++)
+ {
+ picParam->top_left[ii] = encParam->top_left[ii];
+ picParam->bottom_right[ii] = encParam->bottom_right[ii];
+ }
+ break;
+ case 3:
+ case 4:
+ case 5:
+ if (encParam->change_dir_flag == AVC_ON)
+ {
+ picParam->slice_group_change_direction_flag = TRUE;
+ }
+ else
+ {
+ picParam->slice_group_change_direction_flag = FALSE;
+ }
+ if (/*encParam->change_rate_minus1 < 0 || (no need it's unsigned) */
+ encParam->change_rate_minus1 > video->PicSizeInMapUnits - 1)
+ {
+ return AVCENC_INVALID_CHANGE_RATE;
+ }
+ picParam->slice_group_change_rate_minus1 = encParam->change_rate_minus1;
+ video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1;
+ break;
+ case 6:
+ picParam->pic_size_in_map_units_minus1 = video->PicSizeInMapUnits - 1;
+
+ /* allocate picParam->slice_group_id */
+ picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR);
+ if (picParam->slice_group_id == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ if (encParam->slice_group == NULL)
+ {
+ return AVCENC_ENCPARAM_MEM_FAIL;
+ }
+ for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++)
+ {
+ picParam->slice_group_id[ii] = encParam->slice_group[ii];
+ }
+ break;
+ default:
+ return AVCENC_INVALID_FMO_TYPE;
+ }
+ }
+ picParam->num_ref_idx_l0_active_minus1 = encParam->num_ref_frame - 1; /* assume frame only */
+ picParam->num_ref_idx_l1_active_minus1 = 0; /* default value */
+ picParam->weighted_pred_flag = 0; /* no weighted prediction supported */
+ picParam->weighted_bipred_idc = 0; /* range 0,1,2 */
+ if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */
+ picParam->weighted_bipred_idc > 2)
+ {
+ return AVCENC_WEIGHTED_BIPRED_FAIL;
+ }
+ picParam->pic_init_qp_minus26 = 0; /* default, will be changed at slice level anyway */
+ if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25)
+ {
+ return AVCENC_INIT_QP_FAIL; /* out of range */
+ }
+ picParam->pic_init_qs_minus26 = 0;
+ if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25)
+ {
+ return AVCENC_INIT_QS_FAIL; /* out of range */
+ }
+
+ picParam->chroma_qp_index_offset = 0; /* default to zero for now */
+ if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12)
+ {
+ return AVCENC_CHROMA_QP_FAIL; /* out of range */
+ }
+ /* deblocking */
+ picParam->deblocking_filter_control_present_flag = (encParam->db_filter == AVC_ON) ? TRUE : FALSE ;
+ /* constrained intra prediction */
+ picParam->constrained_intra_pred_flag = (encParam->constrained_intra_pred == AVC_ON) ? TRUE : FALSE;
+ picParam->redundant_pic_cnt_present_flag = 0; /* default */
+ }
+ else if (extP)// external PPS
+ {
+ picParam->pic_parameter_set_id = extP->pic_parameter_set_id - 1; /* to be increased by one */
+ picParam->seq_parameter_set_id = extP->seq_parameter_set_id;
+ picParam->entropy_coding_mode_flag = extP->entropy_coding_mode_flag;
+ if (extP->entropy_coding_mode_flag != 0) /* default to CAVLC */
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+ picParam->pic_order_present_flag = extP->pic_order_present_flag; /* default for now, will need it for B-slice */
+ if (extP->pic_order_present_flag != 0)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+ /* FMO */
+ if (/*(extP->num_slice_groups_minus1<0) || (no need it's unsigned) */
+ (extP->num_slice_groups_minus1 > MAX_NUM_SLICE_GROUP - 1))
+ {
+ return AVCENC_INVALID_NUM_SLICEGROUP;
+ }
+ picParam->num_slice_groups_minus1 = extP->num_slice_groups_minus1;
+
+ if (picParam->num_slice_groups_minus1 > 0)
+ {
+ picParam->slice_group_map_type = extP->slice_group_map_type;
+ switch (extP->slice_group_map_type)
+ {
+ case 0:
+ for (ii = 0; ii <= (int)extP->num_slice_groups_minus1; ii++)
+ {
+ picParam->run_length_minus1[ii] = extP->run_length_minus1[ii];
+ }
+ break;
+ case 2:
+ for (ii = 0; ii < (int)picParam->num_slice_groups_minus1; ii++)
+ {
+ picParam->top_left[ii] = extP->top_left[ii];
+ picParam->bottom_right[ii] = extP->bottom_right[ii];
+ }
+ break;
+ case 3:
+ case 4:
+ case 5:
+ picParam->slice_group_change_direction_flag = extP->slice_group_change_direction_flag;
+ if (/*extP->slice_group_change_rate_minus1 < 0 || (no need, it's unsigned) */
+ extP->slice_group_change_rate_minus1 > video->PicSizeInMapUnits - 1)
+ {
+ return AVCENC_INVALID_CHANGE_RATE;
+ }
+ picParam->slice_group_change_rate_minus1 = extP->slice_group_change_rate_minus1;
+ video->SliceGroupChangeRate = picParam->slice_group_change_rate_minus1 + 1;
+ break;
+ case 6:
+ if (extP->pic_size_in_map_units_minus1 != video->PicSizeInMapUnits - 1)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ picParam->pic_size_in_map_units_minus1 = extP->pic_size_in_map_units_minus1;
+
+ /* allocate picParam->slice_group_id */
+ picParam->slice_group_id = (uint*)avcHandle->CBAVC_Malloc(userData, sizeof(uint) * video->PicSizeInMapUnits, DEFAULT_ATTR);
+ if (picParam->slice_group_id == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ if (extP->slice_group_id == NULL)
+ {
+ return AVCENC_ENCPARAM_MEM_FAIL;
+ }
+ for (ii = 0; ii < (int)video->PicSizeInMapUnits; ii++)
+ {
+ picParam->slice_group_id[ii] = extP->slice_group_id[ii];
+ }
+ break;
+ default:
+ return AVCENC_INVALID_FMO_TYPE;
+ }
+ }
+ picParam->num_ref_idx_l0_active_minus1 = extP->num_ref_idx_l0_active_minus1;
+ picParam->num_ref_idx_l1_active_minus1 = extP->num_ref_idx_l1_active_minus1; /* default value */
+ if (picParam->num_ref_idx_l1_active_minus1 != 0)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ if (extP->weighted_pred_flag)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ picParam->weighted_pred_flag = 0; /* no weighted prediction supported */
+ picParam->weighted_bipred_idc = extP->weighted_bipred_idc; /* range 0,1,2 */
+ if (/*picParam->weighted_bipred_idc < 0 || (no need, it's unsigned) */
+ picParam->weighted_bipred_idc > 2)
+ {
+ return AVCENC_WEIGHTED_BIPRED_FAIL;
+ }
+ picParam->pic_init_qp_minus26 = extP->pic_init_qp_minus26; /* default, will be changed at slice level anyway */
+ if (picParam->pic_init_qp_minus26 < -26 || picParam->pic_init_qp_minus26 > 25)
+ {
+ return AVCENC_INIT_QP_FAIL; /* out of range */
+ }
+ picParam->pic_init_qs_minus26 = extP->pic_init_qs_minus26;
+ if (picParam->pic_init_qs_minus26 < -26 || picParam->pic_init_qs_minus26 > 25)
+ {
+ return AVCENC_INIT_QS_FAIL; /* out of range */
+ }
+
+ picParam->chroma_qp_index_offset = extP->chroma_qp_index_offset; /* default to zero for now */
+ if (picParam->chroma_qp_index_offset < -12 || picParam->chroma_qp_index_offset > 12)
+ {
+ return AVCENC_CHROMA_QP_FAIL; /* out of range */
+ }
+ /* deblocking */
+ picParam->deblocking_filter_control_present_flag = extP->deblocking_filter_control_present_flag;
+ /* constrained intra prediction */
+ picParam->constrained_intra_pred_flag = extP->constrained_intra_pred_flag;
+ if (extP->redundant_pic_cnt_present_flag != 0)
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+ picParam->redundant_pic_cnt_present_flag = extP->redundant_pic_cnt_present_flag; /* default */
+ }
+ else
+ {
+ return AVCENC_NOT_SUPPORTED;
+ }
+
+ /****************** now set up some SliceHeader parameters ***********/
+ if (picParam->deblocking_filter_control_present_flag == TRUE)
+ {
+ /* these values only present when db_filter is ON */
+ if (encParam->disable_db_idc > 2)
+ {
+ return AVCENC_INVALID_DEBLOCK_IDC; /* out of range */
+ }
+ sliceHdr->disable_deblocking_filter_idc = encParam->disable_db_idc;
+
+ if (encParam->alpha_offset < -6 || encParam->alpha_offset > 6)
+ {
+ return AVCENC_INVALID_ALPHA_OFFSET;
+ }
+ sliceHdr->slice_alpha_c0_offset_div2 = encParam->alpha_offset;
+
+ if (encParam->beta_offset < -6 || encParam->beta_offset > 6)
+ {
+ return AVCENC_INVALID_BETA_OFFSET;
+ }
+ sliceHdr->slice_beta_offset_div_2 = encParam->beta_offset;
+ }
+ if (encvid->outOfBandParamSet == TRUE)
+ {
+ sliceHdr->idr_pic_id = 0;
+ }
+ else
+ {
+ sliceHdr->idr_pic_id = (uint)(-1); /* start with zero */
+ }
+ sliceHdr->field_pic_flag = FALSE;
+ sliceHdr->bottom_field_flag = FALSE; /* won't be used anyway */
+ video->MbaffFrameFlag = (seqParam->mb_adaptive_frame_field_flag && !sliceHdr->field_pic_flag);
+
+ /* the rest will be set in InitSlice() */
+
+ /* now the rate control and performance related parameters */
+ rateCtrl->scdEnable = (encParam->auto_scd == AVC_ON) ? TRUE : FALSE;
+ rateCtrl->idrPeriod = encParam->idr_period + 1;
+ rateCtrl->intraMBRate = encParam->intramb_refresh;
+ rateCtrl->dpEnable = (encParam->data_par == AVC_ON) ? TRUE : FALSE;
+
+ rateCtrl->subPelEnable = (encParam->sub_pel == AVC_ON) ? TRUE : FALSE;
+ rateCtrl->mvRange = encParam->search_range;
+
+ rateCtrl->subMBEnable = (encParam->submb_pred == AVC_ON) ? TRUE : FALSE;
+ rateCtrl->rdOptEnable = (encParam->rdopt_mode == AVC_ON) ? TRUE : FALSE;
+ rateCtrl->bidirPred = (encParam->bidir_pred == AVC_ON) ? TRUE : FALSE;
+
+ rateCtrl->rcEnable = (encParam->rate_control == AVC_ON) ? TRUE : FALSE;
+ rateCtrl->initQP = encParam->initQP;
+ rateCtrl->initQP = AVC_CLIP3(0, 51, rateCtrl->initQP);
+
+ rateCtrl->bitRate = encParam->bitrate;
+ rateCtrl->cpbSize = encParam->CPB_size;
+ rateCtrl->initDelayOffset = (rateCtrl->bitRate * encParam->init_CBP_removal_delay / 1000);
+
+ if (encParam->frame_rate == 0)
+ {
+ return AVCENC_INVALID_FRAMERATE;
+ }
+
+ rateCtrl->frame_rate = (OsclFloat)(encParam->frame_rate * 1.0 / 1000);
+// rateCtrl->srcInterval = encParam->src_interval;
+ rateCtrl->first_frame = 1; /* set this flag for the first time */
+
+ /* contrained_setx_flag will be set inside the VerifyProfile called below.*/
+ if (!extS && !extP)
+ {
+ seqParam->profile_idc = encParam->profile;
+ seqParam->constrained_set0_flag = FALSE;
+ seqParam->constrained_set1_flag = FALSE;
+ seqParam->constrained_set2_flag = FALSE;
+ seqParam->constrained_set3_flag = FALSE;
+ seqParam->level_idc = encParam->level;
+ }
+ else
+ {
+ seqParam->profile_idc = extS->profile_idc;
+ seqParam->constrained_set0_flag = extS->constrained_set0_flag;
+ seqParam->constrained_set1_flag = extS->constrained_set1_flag;
+ seqParam->constrained_set2_flag = extS->constrained_set2_flag;
+ seqParam->constrained_set3_flag = extS->constrained_set3_flag;
+ seqParam->level_idc = extS->level_idc;
+ }
+
+
+ status = VerifyProfile(encvid, seqParam, picParam);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ status = VerifyLevel(encvid, seqParam, picParam);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+/* verify the profile setting */
+AVCEnc_Status VerifyProfile(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam)
+{
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+
+ if (seqParam->profile_idc == 0) /* find profile for this setting */
+ {
+ /* find the right profile for it */
+ if (seqParam->direct_8x8_inference_flag == TRUE &&
+ picParam->entropy_coding_mode_flag == FALSE &&
+ picParam->num_slice_groups_minus1 <= 7 /*&&
+ picParam->num_slice_groups_minus1>=0 (no need, it's unsigned) */)
+ {
+ seqParam->profile_idc = AVC_EXTENDED;
+ seqParam->constrained_set2_flag = TRUE;
+ }
+
+ if (rateCtrl->dpEnable == FALSE &&
+ picParam->num_slice_groups_minus1 == 0 &&
+ picParam->redundant_pic_cnt_present_flag == FALSE)
+ {
+ seqParam->profile_idc = AVC_MAIN;
+ seqParam->constrained_set1_flag = TRUE;
+ }
+
+ if (rateCtrl->bidirPred == FALSE &&
+ rateCtrl->dpEnable == FALSE &&
+ seqParam->frame_mbs_only_flag == TRUE &&
+ picParam->weighted_pred_flag == FALSE &&
+ picParam->weighted_bipred_idc == 0 &&
+ picParam->entropy_coding_mode_flag == FALSE &&
+ picParam->num_slice_groups_minus1 <= 7 /*&&
+ picParam->num_slice_groups_minus1>=0 (no need, it's unsigned)*/)
+ {
+ seqParam->profile_idc = AVC_BASELINE;
+ seqParam->constrained_set0_flag = TRUE;
+ }
+
+ if (seqParam->profile_idc == 0) /* still zero */
+ {
+ return AVCENC_PROFILE_NOT_SUPPORTED;
+ }
+ }
+
+ /* check the list of supported profile by this library */
+ switch (seqParam->profile_idc)
+ {
+ case AVC_BASELINE:
+ if (rateCtrl->bidirPred == TRUE ||
+ rateCtrl->dpEnable == TRUE ||
+ seqParam->frame_mbs_only_flag != TRUE ||
+ picParam->weighted_pred_flag == TRUE ||
+ picParam->weighted_bipred_idc != 0 ||
+ picParam->entropy_coding_mode_flag == TRUE ||
+ picParam->num_slice_groups_minus1 > 7 /*||
+ picParam->num_slice_groups_minus1<0 (no need, it's unsigned) */)
+ {
+ status = AVCENC_TOOLS_NOT_SUPPORTED;
+ }
+ break;
+
+ case AVC_MAIN:
+ case AVC_EXTENDED:
+ status = AVCENC_PROFILE_NOT_SUPPORTED;
+ }
+
+ return status;
+}
+
+/* verify the level setting */
+AVCEnc_Status VerifyLevel(AVCEncObject *encvid, AVCSeqParamSet *seqParam, AVCPicParamSet *picParam)
+{
+ (void)(picParam);
+
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCCommonObj *video = encvid->common;
+ int mb_per_sec, ii;
+ int lev_idx;
+ int dpb_size;
+
+ mb_per_sec = (int)(video->PicSizeInMbs * rateCtrl->frame_rate + 0.5);
+ dpb_size = (seqParam->num_ref_frames * video->PicSizeInMbs * 3) >> 6;
+
+ if (seqParam->level_idc == 0) /* find level for this setting */
+ {
+ for (ii = 0; ii < MAX_LEVEL_IDX; ii++)
+ {
+ if (mb_per_sec <= MaxMBPS[ii] &&
+ video->PicSizeInMbs <= (uint)MaxFS[ii] &&
+ rateCtrl->bitRate <= (int32)MaxBR[ii]*1000 &&
+ rateCtrl->cpbSize <= (int32)MaxCPB[ii]*1000 &&
+ rateCtrl->mvRange <= MaxVmvR[ii] &&
+ dpb_size <= MaxDPBX2[ii]*512)
+ {
+ seqParam->level_idc = mapIdx2Lev[ii];
+ break;
+ }
+ }
+ if (seqParam->level_idc == 0)
+ {
+ return AVCENC_LEVEL_NOT_SUPPORTED;
+ }
+ }
+
+ /* check if this level is supported by this library */
+ lev_idx = mapLev2Idx[seqParam->level_idc];
+ if (seqParam->level_idc == AVC_LEVEL1_B)
+ {
+ seqParam->constrained_set3_flag = 1;
+ }
+
+
+ if (lev_idx == 255) /* not defined */
+ {
+ return AVCENC_LEVEL_NOT_SUPPORTED;
+ }
+
+ /* check if the encoding setting complies with the level */
+ if (mb_per_sec > MaxMBPS[lev_idx] ||
+ video->PicSizeInMbs > (uint)MaxFS[lev_idx] ||
+ rateCtrl->bitRate > (int32)MaxBR[lev_idx]*1000 ||
+ rateCtrl->cpbSize > (int32)MaxCPB[lev_idx]*1000 ||
+ rateCtrl->mvRange > MaxVmvR[lev_idx])
+ {
+ return AVCENC_LEVEL_FAIL;
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+/* initialize variables at the beginning of each frame */
+/* determine the picture type */
+/* encode POC */
+/* maybe we should do more stuff here. MotionEstimation+SCD and generate a new SPS and PPS */
+AVCEnc_Status InitFrame(AVCEncObject *encvid)
+{
+ AVCStatus ret;
+ AVCEnc_Status status;
+ AVCCommonObj *video = encvid->common;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+
+ /* look for the next frame in coding_order and look for available picture
+ in the DPB. Note, video->currFS->PicOrderCnt, currFS->FrameNum and currPic->PicNum
+ are set to wrong number in this function (right for decoder). */
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ // call init DPB in here.
+ ret = AVCConfigureSequence(encvid->avcHandle, video, TRUE);
+ if (ret != AVC_SUCCESS)
+ {
+ return AVCENC_FAIL;
+ }
+ }
+
+ /* flexible macroblock ordering (every frame)*/
+ /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */
+ /* It changes once per each PPS. */
+ FMOInit(video);
+
+ ret = DPBInitBuffer(encvid->avcHandle, video); // get new buffer
+
+ if (ret != AVC_SUCCESS)
+ {
+ return (AVCEnc_Status)ret; // AVCENC_PICTURE_READY, FAIL
+ }
+
+ DPBInitPic(video, 0); /* 0 is dummy */
+
+ /************* determine picture type IDR or non-IDR ***********/
+ video->currPicType = AVC_FRAME;
+ video->slice_data_partitioning = FALSE;
+ encvid->currInput->is_reference = 1; /* default to all frames */
+ video->nal_ref_idc = 1; /* need to set this for InitPOC */
+ video->currPic->isReference = TRUE;
+
+ /************* set frame_num ********************/
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ video->prevFrameNum = video->MaxFrameNum;
+ video->PrevRefFrameNum = 0;
+ sliceHdr->frame_num = 0;
+ }
+ /* otherwise, it's set to previous reference frame access unit's frame_num in decoding order,
+ see the end of PVAVCDecodeSlice()*/
+ /* There's also restriction on the frame_num, see page 59 of JVT-I1010.doc. */
+ /* Basically, frame_num can't be repeated unless it's opposite fields or non reference fields */
+ else
+ {
+ sliceHdr->frame_num = (video->PrevRefFrameNum + 1) % video->MaxFrameNum;
+ }
+ video->CurrPicNum = sliceHdr->frame_num; /* for field_pic_flag = 0 */
+ //video->CurrPicNum = 2*sliceHdr->frame_num + 1; /* for field_pic_flag = 1 */
+
+ /* assign pic_order_cnt, video->PicOrderCnt */
+ status = InitPOC(encvid);
+ if (status != AVCENC_SUCCESS) /* incorrigable fail */
+ {
+ return status;
+ }
+
+ /* Initialize refListIdx for this picture */
+ RefListInit(video);
+
+ /************* motion estimation and scene analysis ************/
+ // , to move this to MB-based MV search for comparison
+ // use sub-optimal QP for mv search
+ AVCMotionEstimation(encvid); /* AVCENC_SUCCESS or AVCENC_NEW_IDR */
+
+ /* after this point, the picture type will be fixed to either IDR or non-IDR */
+ video->currFS->PicOrderCnt = video->PicOrderCnt;
+ video->currFS->FrameNum = video->sliceHdr->frame_num;
+ video->currPic->PicNum = video->CurrPicNum;
+ video->mbNum = 0; /* start from zero MB */
+ encvid->currSliceGroup = 0; /* start from slice group #0 */
+ encvid->numIntraMB = 0; /* reset this counter */
+
+ if (video->nal_unit_type == AVC_NALTYPE_IDR)
+ {
+ RCInitGOP(encvid);
+
+ /* calculate picture QP */
+ RCInitFrameQP(encvid);
+
+ return AVCENC_NEW_IDR;
+ }
+
+ /* calculate picture QP */
+ RCInitFrameQP(encvid); /* get QP after MV search */
+
+ return AVCENC_SUCCESS;
+}
+
+/* initialize variables for this slice */
+AVCEnc_Status InitSlice(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCPicParamSet *currPPS = video->currPicParams;
+ AVCSeqParamSet *currSPS = video->currSeqParams;
+ int slice_type = video->slice_type;
+
+ sliceHdr->first_mb_in_slice = video->mbNum;
+ if (video->mbNum) // not first slice of a frame
+ {
+ video->sliceHdr->slice_type = (AVCSliceType)slice_type;
+ }
+
+ /* sliceHdr->slice_type already set in InitFrame */
+
+ sliceHdr->pic_parameter_set_id = video->currPicParams->pic_parameter_set_id;
+
+ /* sliceHdr->frame_num already set in InitFrame */
+
+ if (!currSPS->frame_mbs_only_flag) /* we shouldn't need this check */
+ {
+ sliceHdr->field_pic_flag = sliceHdr->bottom_field_flag = FALSE;
+ return AVCENC_TOOLS_NOT_SUPPORTED;
+ }
+
+ /* sliceHdr->idr_pic_id already set in PVAVCEncodeNAL
+
+ sliceHdr->pic_order_cnt_lsb already set in InitFrame..InitPOC
+ sliceHdr->delta_pic_order_cnt_bottom already set in InitPOC
+
+ sliceHdr->delta_pic_order_cnt[0] already set in InitPOC
+ sliceHdr->delta_pic_order_cnt[1] already set in InitPOC
+ */
+
+ sliceHdr->redundant_pic_cnt = 0; /* default if(currPPS->redundant_pic_cnt_present_flag), range 0..127 */
+ sliceHdr->direct_spatial_mv_pred_flag = 0; // default if(slice_type == AVC_B_SLICE)
+
+ sliceHdr->num_ref_idx_active_override_flag = FALSE; /* default, if(slice_type== P,SP or B)*/
+ sliceHdr->num_ref_idx_l0_active_minus1 = 0; /* default, if (num_ref_idx_active_override_flag) */
+ sliceHdr->num_ref_idx_l1_active_minus1 = 0; /* default, if above and B_slice */
+ /* the above 2 values range from 0..15 for frame picture and 0..31 for field picture */
+
+ /* ref_pic_list_reordering(), currently we don't do anything */
+ sliceHdr->ref_pic_list_reordering_flag_l0 = FALSE; /* default */
+ sliceHdr->ref_pic_list_reordering_flag_l1 = FALSE; /* default */
+ /* if the above are TRUE, some other params must be set */
+
+ if ((currPPS->weighted_pred_flag && (slice_type == AVC_P_SLICE || slice_type == AVC_SP_SLICE)) ||
+ (currPPS->weighted_bipred_idc == 1 && slice_type == AVC_B_SLICE))
+ {
+ // pred_weight_table(); // not supported !!
+ return AVCENC_TOOLS_NOT_SUPPORTED;
+ }
+
+ /* dec_ref_pic_marking(), this will be done later*/
+ sliceHdr->no_output_of_prior_pics_flag = FALSE; /* default */
+ sliceHdr->long_term_reference_flag = FALSE; /* for IDR frame, do not make it long term */
+ sliceHdr->adaptive_ref_pic_marking_mode_flag = FALSE; /* default */
+ /* other params are not set here because they are not used */
+
+ sliceHdr->cabac_init_idc = 0; /* default, if entropy_coding_mode_flag && slice_type==I or SI, range 0..2 */
+ sliceHdr->slice_qp_delta = 0; /* default for now */
+ sliceHdr->sp_for_switch_flag = FALSE; /* default, if slice_type == SP */
+ sliceHdr->slice_qs_delta = 0; /* default, if slice_type == SP or SI */
+
+ /* derived variables from encParam */
+ /* deblocking filter */
+ video->FilterOffsetA = video->FilterOffsetB = 0;
+ if (currPPS->deblocking_filter_control_present_flag == TRUE)
+ {
+ video->FilterOffsetA = sliceHdr->slice_alpha_c0_offset_div2 << 1;
+ video->FilterOffsetB = sliceHdr->slice_beta_offset_div_2 << 1;
+ }
+
+ /* flexible macroblock ordering */
+ /* populate video->mapUnitToSliceGroupMap and video->MbToSliceGroupMap */
+ /* We already call it at the end of PVAVCEncInitialize(). It changes once per each PPS. */
+ if (video->currPicParams->num_slice_groups_minus1 > 0 && video->currPicParams->slice_group_map_type >= 3
+ && video->currPicParams->slice_group_map_type <= 5)
+ {
+ sliceHdr->slice_group_change_cycle = SLICE_GROUP_CHANGE_CYCLE; /* default, don't understand how to set it!!!*/
+
+ video->MapUnitsInSliceGroup0 =
+ AVC_MIN(sliceHdr->slice_group_change_cycle * video->SliceGroupChangeRate, video->PicSizeInMapUnits);
+
+ FMOInit(video);
+ }
+
+ /* calculate SliceQPy first */
+ /* calculate QSy first */
+
+ sliceHdr->slice_qp_delta = video->QPy - 26 - currPPS->pic_init_qp_minus26;
+ //sliceHdr->slice_qs_delta = video->QSy - 26 - currPPS->pic_init_qs_minus26;
+
+ return AVCENC_SUCCESS;
+}
+
diff --git a/media/libstagefright/codecs/avc/enc/src/intra_est.cpp b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp
new file mode 100644
index 0000000..17e5985
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/intra_est.cpp
@@ -0,0 +1,2199 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define TH_I4 0 /* threshold biasing toward I16 mode instead of I4 mode */
+#define TH_Intra 0 /* threshold biasing toward INTER mode instead of intra mode */
+
+#define FIXED_INTRAPRED_MODE AVC_I16
+#define FIXED_I16_MODE AVC_I16_DC
+#define FIXED_I4_MODE AVC_I4_Diagonal_Down_Left
+#define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
+
+#define CLIP_RESULT(x) if((uint)x > 0xFF){ \
+ x = 0xFF & (~(x>>31));}
+
+
+bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCFrameIO *currInput = encvid->currInput;
+ int orgPitch = currInput->pitch;
+ int x_pos = (video->mb_x) << 4;
+ int y_pos = (video->mb_y) << 4;
+ uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
+ int j;
+ uint8 *topL, *leftL, *orgY_2, *orgY_3;
+ int temp, SBE, offset;
+ OsclFloat ABE;
+ bool intra = true;
+
+ if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
+ ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
+ video->intraAvailA &&
+ video->intraAvailB)
+ {
+ SBE = 0;
+ /* top neighbor */
+ topL = curL - picPitch;
+ /* left neighbor */
+ leftL = curL - 1;
+ orgY_2 = orgY - orgPitch;
+
+ for (j = 0; j < 16; j++)
+ {
+ temp = *topL++ - orgY[j];
+ SBE += ((temp >= 0) ? temp : -temp);
+ temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
+ SBE += ((temp >= 0) ? temp : -temp);
+ }
+
+ /* calculate chroma */
+ offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
+ topL = video->currPic->Scb + offset;
+ orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
+
+ leftL = topL - 1;
+ topL -= (picPitch >> 1);
+ orgY_3 = orgY_2 - (orgPitch >> 1);
+ for (j = 0; j < 8; j++)
+ {
+ temp = *topL++ - orgY_2[j];
+ SBE += ((temp >= 0) ? temp : -temp);
+ temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
+ SBE += ((temp >= 0) ? temp : -temp);
+ }
+
+ topL = video->currPic->Scr + offset;
+ orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
+
+ leftL = topL - 1;
+ topL -= (picPitch >> 1);
+ orgY_3 = orgY_2 - (orgPitch >> 1);
+ for (j = 0; j < 8; j++)
+ {
+ temp = *topL++ - orgY_2[j];
+ SBE += ((temp >= 0) ? temp : -temp);
+ temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
+ SBE += ((temp >= 0) ? temp : -temp);
+ }
+
+ /* compare mincost/384 and SBE/64 */
+ ABE = SBE / 64.0;
+ if (ABE*0.8 >= min_cost / 384.0)
+ {
+ intra = false;
+ }
+ }
+
+ return intra;
+}
+
+/* perform searching for MB mode */
+/* assuming that this is done inside the encoding loop,
+no need to call InitNeighborAvailability */
+
+void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCFrameIO *currInput = encvid->currInput;
+ AVCMacroblock *currMB = video->currMB;
+ int min_cost;
+ uint8 *orgY;
+ int x_pos = (video->mb_x) << 4;
+ int y_pos = (video->mb_y) << 4;
+ uint32 *saved_inter;
+ int j;
+ int orgPitch = currInput->pitch;
+ bool intra = true;
+
+ currMB->CBP = 0;
+
+ /* first do motion vector and variable block size search */
+ min_cost = encvid->min_cost[mbnum];
+
+ /* now perform intra prediction search */
+ /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
+ if it's not worth checking. */
+ if (video->slice_type == AVC_P_SLICE)
+ {
+ /* Decide whether intra search is necessary or not */
+ /* This one, we do it in the encoding loop so the neighboring pixel are the
+ actual reconstructed pixels. */
+ intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
+ }
+
+ if (intra == true || video->slice_type == AVC_I_SLICE)
+ {
+ orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
+
+ /* i16 mode search */
+ /* generate all the predictions */
+ intrapred_luma_16x16(encvid);
+
+ /* evaluate them one by one */
+ find_cost_16x16(encvid, orgY, &min_cost);
+
+ if (video->slice_type == AVC_P_SLICE)
+ {
+ /* save current inter prediction */
+ saved_inter = encvid->subpel_pred; /* reuse existing buffer */
+ j = 16;
+ curL -= 4;
+ picPitch -= 16;
+ while (j--)
+ {
+ *saved_inter++ = *((uint32*)(curL += 4));
+ *saved_inter++ = *((uint32*)(curL += 4));
+ *saved_inter++ = *((uint32*)(curL += 4));
+ *saved_inter++ = *((uint32*)(curL += 4));
+ curL += picPitch;
+ }
+
+ }
+
+ /* i4 mode search */
+ mb_intra4x4_search(encvid, &min_cost);
+
+ encvid->min_cost[mbnum] = min_cost; /* update min_cost */
+ }
+
+
+ if (currMB->mb_intra)
+ {
+ chroma_intra_search(encvid);
+
+ /* need to set this in order for the MBInterPrediction to work!! */
+ memset(currMB->mvL0, 0, sizeof(int32)*16);
+ currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
+ currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
+ }
+ else if (video->slice_type == AVC_P_SLICE && intra == true)
+ {
+ /* restore current inter prediction */
+ saved_inter = encvid->subpel_pred; /* reuse existing buffer */
+ j = 16;
+ curL -= ((picPitch + 16) << 4);
+ while (j--)
+ {
+ *((uint32*)(curL += 4)) = *saved_inter++;
+ *((uint32*)(curL += 4)) = *saved_inter++;
+ *((uint32*)(curL += 4)) = *saved_inter++;
+ *((uint32*)(curL += 4)) = *saved_inter++;
+ curL += picPitch;
+ }
+ }
+
+ return ;
+}
+
+/* generate all the prediction values */
+void intrapred_luma_16x16(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCPictureData *currPic = video->currPic;
+
+ int x_pos = (video->mb_x) << 4;
+ int y_pos = (video->mb_y) << 4;
+ int pitch = currPic->pitch;
+
+ int offset = y_pos * pitch + x_pos;
+
+ uint8 *pred, *top, *left;
+ uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
+ uint32 word1, word2, word3, word4;
+ uint32 sum = 0;
+
+ int a_16, b, c, factor_c;
+ uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
+ int H = 0, V = 0, tmp, value;
+ int i;
+
+ if (video->intraAvailB)
+ {
+ //get vertical prediction mode
+ top = curL - pitch;
+
+ pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
+
+ word1 = *((uint32*)(top)); /* read 4 bytes from top */
+ word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
+ word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
+ word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
+
+ for (i = 0; i < 16; i++)
+ {
+ *((uint32*)(pred += 16)) = word1;
+ *((uint32*)(pred + 4)) = word2;
+ *((uint32*)(pred + 8)) = word3;
+ *((uint32*)(pred + 12)) = word4;
+
+ }
+
+ sum = word1 & 0xFF00FF;
+ word1 = (word1 >> 8) & 0xFF00FF;
+ sum += word1;
+ word1 = (word2 & 0xFF00FF);
+ sum += word1;
+ word2 = (word2 >> 8) & 0xFF00FF;
+ sum += word2;
+ word1 = (word3 & 0xFF00FF);
+ sum += word1;
+ word3 = (word3 >> 8) & 0xFF00FF;
+ sum += word3;
+ word1 = (word4 & 0xFF00FF);
+ sum += word1;
+ word4 = (word4 >> 8) & 0xFF00FF;
+ sum += word4;
+
+ sum += (sum >> 16);
+ sum &= 0xFFFF;
+
+ if (!video->intraAvailA)
+ {
+ sum = (sum + 8) >> 4;
+ }
+ }
+
+ if (video->intraAvailA)
+ {
+ // get horizontal mode
+ left = curL - 1 - pitch;
+
+ pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
+
+ for (i = 0; i < 16; i++)
+ {
+ word1 = *(left += pitch);
+ sum += word1;
+
+ word1 = (word1 << 8) | word1;
+ word1 = (word1 << 16) | word1; /* make it 4 */
+
+ *(uint32*)(pred += 16) = word1;
+ *(uint32*)(pred + 4) = word1;
+ *(uint32*)(pred + 8) = word1;
+ *(uint32*)(pred + 12) = word1;
+ }
+
+ if (!video->intraAvailB)
+ {
+ sum = (sum + 8) >> 4;
+ }
+ else
+ {
+ sum = (sum + 16) >> 5;
+ }
+ }
+
+ // get DC mode
+ if (!video->intraAvailA && !video->intraAvailB)
+ {
+ sum = 0x80808080;
+ }
+ else
+ {
+ sum = (sum << 8) | sum;
+ sum = (sum << 16) | sum;
+ }
+
+ pred = encvid->pred_i16[AVC_I16_DC] - 16;
+ for (i = 0; i < 16; i++)
+ {
+ *((uint32*)(pred += 16)) = sum;
+ *((uint32*)(pred + 4)) = sum;
+ *((uint32*)(pred + 8)) = sum;
+ *((uint32*)(pred + 12)) = sum;
+ }
+
+ // get plane mode
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ pred = encvid->pred_i16[AVC_I16_Plane] - 16;
+
+ comp_ref_x0 = curL - pitch + 8;
+ comp_ref_x1 = curL - pitch + 6;
+ comp_ref_y0 = curL - 1 + (pitch << 3);
+ comp_ref_y1 = curL - 1 + 6 * pitch;
+
+ for (i = 1; i < 8; i++)
+ {
+ H += i * (*comp_ref_x0++ - *comp_ref_x1--);
+ V += i * (*comp_ref_y0 - *comp_ref_y1);
+ comp_ref_y0 += pitch;
+ comp_ref_y1 -= pitch;
+ }
+
+ H += i * (*comp_ref_x0++ - curL[-pitch-1]);
+ V += i * (*comp_ref_y0 - *comp_ref_y1);
+
+
+ a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
+ b = (5 * H + 32) >> 6;
+ c = (5 * V + 32) >> 6;
+
+ tmp = 0;
+ for (i = 0; i < 16; i++)
+ {
+ factor_c = a_16 + c * (tmp++ - 7);
+ factor_c -= 7 * b;
+
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 16);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 24);
+ *((uint32*)(pred += 16)) = word1;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 16);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 24);
+ *((uint32*)(pred + 4)) = word1;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 16);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 24);
+ *((uint32*)(pred + 8)) = word1;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 16);
+ value = factor_c >> 5;
+ CLIP_RESULT(value)
+ word1 = (word1) | (value << 24);
+ *((uint32*)(pred + 12)) = word1;
+ }
+ }
+
+ return ;
+}
+
+
+/* evaluate each prediction mode of I16 */
+void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMacroblock *currMB = video->currMB;
+ int cost;
+ int org_pitch = encvid->currInput->pitch;
+
+ /* evaluate vertical mode */
+ if (video->intraAvailB)
+ {
+ cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
+ if (cost < *min_cost)
+ {
+ *min_cost = cost;
+ currMB->mbMode = AVC_I16;
+ currMB->mb_intra = 1;
+ currMB->i16Mode = AVC_I16_Vertical;
+ }
+ }
+
+
+ /* evaluate horizontal mode */
+ if (video->intraAvailA)
+ {
+ cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
+ if (cost < *min_cost)
+ {
+ *min_cost = cost;
+ currMB->mbMode = AVC_I16;
+ currMB->mb_intra = 1;
+ currMB->i16Mode = AVC_I16_Horizontal;
+ }
+ }
+
+ /* evaluate DC mode */
+ cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
+ if (cost < *min_cost)
+ {
+ *min_cost = cost;
+ currMB->mbMode = AVC_I16;
+ currMB->mb_intra = 1;
+ currMB->i16Mode = AVC_I16_DC;
+ }
+
+ /* evaluate plane mode */
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
+ if (cost < *min_cost)
+ {
+ *min_cost = cost;
+ currMB->mbMode = AVC_I16;
+ currMB->mb_intra = 1;
+ currMB->i16Mode = AVC_I16_Plane;
+ }
+ }
+
+ return ;
+}
+
+
+int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
+{
+
+ int cost;
+ int j, k;
+ int16 res[256], *pres; // residue
+ int m0, m1, m2, m3;
+
+ // calculate SATD
+ org_pitch -= 16;
+ pres = res;
+ // horizontal transform
+ for (j = 0; j < 16; j++)
+ {
+ k = 4;
+ while (k > 0)
+ {
+ m0 = org[0] - pred[0];
+ m3 = org[3] - pred[3];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = org[1] - pred[1];
+ m2 = org[2] - pred[2];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[2] = m0 - m1;
+ pres[1] = m2 + m3;
+ pres[3] = m3 - m2;
+
+ org += 4;
+ pres += 4;
+ pred += 4;
+ k--;
+ }
+ org += org_pitch;
+ }
+ /* vertical transform */
+ cost = 0;
+ for (j = 0; j < 4; j++)
+ {
+ pres = res + (j << 6);
+ k = 16;
+ while (k > 0)
+ {
+ m0 = pres[0];
+ m3 = pres[3<<4];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = pres[1<<4];
+ m2 = pres[2<<4];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 = m0 + m1;
+
+ if (k&0x3) // only sum up non DC values.
+ {
+ cost += ((m0 > 0) ? m0 : -m0);
+ }
+
+ m1 = m0 - (m1 << 1);
+ cost += ((m1 > 0) ? m1 : -m1);
+ m3 = m2 + m3;
+ cost += ((m3 > 0) ? m3 : -m3);
+ m2 = m3 - (m2 << 1);
+ cost += ((m2 > 0) ? m2 : -m2);
+
+ pres++;
+ k--;
+ }
+ if ((cost >> 1) > min_cost) /* early drop out */
+ {
+ return (cost >> 1);
+ }
+ }
+
+ /* Hadamard of the DC coefficient */
+ pres = res;
+ k = 4;
+ while (k > 0)
+ {
+ m0 = pres[0];
+ m3 = pres[3<<2];
+ m0 >>= 2;
+ m0 += (m3 >> 2);
+ m3 = m0 - (m3 >> 1);
+ m1 = pres[1<<2];
+ m2 = pres[2<<2];
+ m1 >>= 2;
+ m1 += (m2 >> 2);
+ m2 = m1 - (m2 >> 1);
+ pres[0] = (m0 + m1);
+ pres[2<<2] = (m0 - m1);
+ pres[1<<2] = (m2 + m3);
+ pres[3<<2] = (m3 - m2);
+ pres += (4 << 4);
+ k--;
+ }
+
+ pres = res;
+ k = 4;
+ while (k > 0)
+ {
+ m0 = pres[0];
+ m3 = pres[3<<6];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = pres[1<<6];
+ m2 = pres[2<<6];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ m0 = m0 + m1;
+ cost += ((m0 >= 0) ? m0 : -m0);
+ m1 = m0 - (m1 << 1);
+ cost += ((m1 >= 0) ? m1 : -m1);
+ m3 = m2 + m3;
+ cost += ((m3 >= 0) ? m3 : -m3);
+ m2 = m3 - (m2 << 1);
+ cost += ((m2 >= 0) ? m2 : -m2);
+ pres += 4;
+
+ if ((cost >> 1) > min_cost) /* early drop out */
+ {
+ return (cost >> 1);
+ }
+
+ k--;
+ }
+
+ return (cost >> 1);
+}
+
+
+void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMacroblock *currMB = video->currMB;
+ AVCPictureData *currPic = video->currPic;
+ AVCFrameIO *currInput = encvid->currInput;
+ int pitch = currPic->pitch;
+ int org_pitch = currInput->pitch;
+ int offset;
+ uint8 *curL, *comp, *org4, *org8;
+ int y = video->mb_y << 4;
+ int x = video->mb_x << 4;
+
+ int b8, b4, cost4x4, blkidx;
+ int cost = 0;
+ int numcoef;
+ int dummy = 0;
+ int mb_intra = currMB->mb_intra; // save the original value
+
+ offset = y * pitch + x;
+
+ curL = currPic->Sl + offset;
+ org8 = currInput->YCbCr[0] + y * org_pitch + x;
+ video->pred_pitch = 4;
+
+ cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
+ cost <<= 2;
+
+ currMB->mb_intra = 1; // temporary set this to one to enable the IDCT
+ // operation inside dct_luma
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ comp = curL;
+ org4 = org8;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ blkidx = blkIdx2blkXY[b8][b4];
+ cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
+ cost += cost4x4;
+ if (cost > *min_cost)
+ {
+ currMB->mb_intra = mb_intra; // restore the value
+ return ;
+ }
+
+ /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
+ video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
+ numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
+ currMB->nz_coeff[blkidx] = numcoef;
+ if (numcoef)
+ {
+ video->cbp4x4 |= (1 << blkidx);
+ currMB->CBP |= (1 << b8);
+ }
+
+ if (b4&1)
+ {
+ comp += ((pitch << 2) - 4);
+ org4 += ((org_pitch << 2) - 4);
+ }
+ else
+ {
+ comp += 4;
+ org4 += 4;
+ }
+ }
+
+ if (b8&1)
+ {
+ curL += ((pitch << 3) - 8);
+ org8 += ((org_pitch << 3) - 8);
+ }
+ else
+ {
+ curL += 8;
+ org8 += 8;
+ }
+ }
+
+ currMB->mb_intra = mb_intra; // restore the value
+
+ if (cost < *min_cost)
+ {
+ *min_cost = cost;
+ currMB->mbMode = AVC_I4;
+ currMB->mb_intra = 1;
+ }
+
+ return ;
+}
+
+
+/* search for i4 mode for a 4x4 block */
+int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCNeighborAvailability availability;
+ AVCMacroblock *currMB = video->currMB;
+ bool top_left = FALSE;
+ int pitch = video->currPic->pitch;
+ uint8 mode_avail[AVCNumI4PredMode];
+ uint32 temp, DC;
+ uint8 *pred;
+ int org_pitch = encvid->currInput->pitch;
+ uint16 min_cost, cost;
+
+ int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
+ int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
+ uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+ int x0, x1, x2, x3, x4, x5;
+ uint32 temp1, temp2;
+
+ int ipmode, mostProbableMode;
+ int fixedcost = 4 * encvid->lambda_mode;
+ int min_sad = 0x7FFF;
+
+ availability.left = TRUE;
+ availability.top = TRUE;
+ if (blkidx <= 3) /* top row block (!block_y) */
+ { /* check availability up */
+ availability.top = video->intraAvailB ;
+ }
+ if (!(blkidx&0x3)) /* left column block (!block_x)*/
+ { /* check availability left */
+ availability.left = video->intraAvailA ;
+ }
+ availability.top_right = BlkTopRight[blkidx];
+
+ if (availability.top_right == 2)
+ {
+ availability.top_right = video->intraAvailB;
+ }
+ else if (availability.top_right == 3)
+ {
+ availability.top_right = video->intraAvailC;
+ }
+
+ if (availability.top == TRUE)
+ {
+ temp = *(uint32*)(cur - pitch);
+ P_A = temp & 0xFF;
+ P_B = (temp >> 8) & 0xFF;
+ P_C = (temp >> 16) & 0xFF;
+ P_D = (temp >> 24) & 0xFF;
+ }
+ else
+ {
+ P_A = P_B = P_C = P_D = 128;
+ }
+
+ if (availability.top_right == TRUE)
+ {
+ temp = *(uint32*)(cur - pitch + 4);
+ P_E = temp & 0xFF;
+ P_F = (temp >> 8) & 0xFF;
+ P_G = (temp >> 16) & 0xFF;
+ P_H = (temp >> 24) & 0xFF;
+ }
+ else
+ {
+ P_E = P_F = P_G = P_H = 128;
+ }
+
+ if (availability.left == TRUE)
+ {
+ cur--;
+ P_I = *cur;
+ P_J = *(cur += pitch);
+ P_K = *(cur += pitch);
+ P_L = *(cur + pitch);
+ cur -= (pitch << 1);
+ cur++;
+ }
+ else
+ {
+ P_I = P_J = P_K = P_L = 128;
+ }
+
+ /* check if top-left pixel is available */
+ if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
+ || ((blkidx&0x3) && video->intraAvailB)
+ || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
+ {
+ top_left = TRUE;
+ P_X = *(cur - pitch - 1);
+ }
+ else
+ {
+ P_X = 128;
+ }
+
+ //===== INTRA PREDICTION FOR 4x4 BLOCK =====
+ /* vertical */
+ mode_avail[AVC_I4_Vertical] = 0;
+ if (availability.top)
+ {
+ mode_avail[AVC_I4_Vertical] = 1;
+ pred = encvid->pred_i4[AVC_I4_Vertical];
+
+ temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
+ *((uint32*)pred) = temp; /* write 4 at a time */
+ *((uint32*)(pred += 4)) = temp;
+ *((uint32*)(pred += 4)) = temp;
+ *((uint32*)(pred += 4)) = temp;
+ }
+ /* horizontal */
+ mode_avail[AVC_I4_Horizontal] = 0;
+ mode_avail[AVC_I4_Horizontal_Up] = 0;
+ if (availability.left)
+ {
+ mode_avail[AVC_I4_Horizontal] = 1;
+ pred = encvid->pred_i4[AVC_I4_Horizontal];
+
+ temp = P_I | (P_I << 8);
+ temp = temp | (temp << 16);
+ *((uint32*)pred) = temp;
+ temp = P_J | (P_J << 8);
+ temp = temp | (temp << 16);
+ *((uint32*)(pred += 4)) = temp;
+ temp = P_K | (P_K << 8);
+ temp = temp | (temp << 16);
+ *((uint32*)(pred += 4)) = temp;
+ temp = P_L | (P_L << 8);
+ temp = temp | (temp << 16);
+ *((uint32*)(pred += 4)) = temp;
+
+ mode_avail[AVC_I4_Horizontal_Up] = 1;
+ pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
+
+ Q0 = (P_J + P_K + 1) >> 1;
+ Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
+ P0 = ((P_I + P_J + 1) >> 1);
+ P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
+
+ temp = P0 | (P1 << 8); // [P0 P1 Q0 Q1]
+ temp |= (Q0 << 16); // [Q0 Q1 R0 DO]
+ temp |= (Q1 << 24); // [R0 D0 D1 D1]
+ *((uint32*)pred) = temp; // [D1 D1 D1 D1]
+
+ D0 = (P_K + 3 * P_L + 2) >> 2;
+ R0 = (P_K + P_L + 1) >> 1;
+
+ temp = Q0 | (Q1 << 8);
+ temp |= (R0 << 16);
+ temp |= (D0 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ D1 = P_L;
+
+ temp = R0 | (D0 << 8);
+ temp |= (D1 << 16);
+ temp |= (D1 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = D1 | (D1 << 8);
+ temp |= (temp << 16);
+ *((uint32*)(pred += 4)) = temp;
+ }
+ /* DC */
+ mode_avail[AVC_I4_DC] = 1;
+ pred = encvid->pred_i4[AVC_I4_DC];
+ if (availability.left)
+ {
+ DC = P_I + P_J + P_K + P_L;
+
+ if (availability.top)
+ {
+ DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
+ }
+ else
+ {
+ DC = (DC + 2) >> 2;
+
+ }
+ }
+ else if (availability.top)
+ {
+ DC = (P_A + P_B + P_C + P_D + 2) >> 2;
+
+ }
+ else
+ {
+ DC = 128;
+ }
+
+ temp = DC | (DC << 8);
+ temp = temp | (temp << 16);
+ *((uint32*)pred) = temp;
+ *((uint32*)(pred += 4)) = temp;
+ *((uint32*)(pred += 4)) = temp;
+ *((uint32*)(pred += 4)) = temp;
+
+ /* Down-left */
+ mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
+
+ if (availability.top)
+ {
+ mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
+
+ pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
+
+ r0 = P_A;
+ r1 = P_B;
+ r2 = P_C;
+ r3 = P_D;
+
+ r0 += (r1 << 1);
+ r0 += r2;
+ r0 += 2;
+ r0 >>= 2;
+ r1 += (r2 << 1);
+ r1 += r3;
+ r1 += 2;
+ r1 >>= 2;
+
+ if (availability.top_right)
+ {
+ r4 = P_E;
+ r5 = P_F;
+ r6 = P_G;
+ r7 = P_H;
+
+ r2 += (r3 << 1);
+ r2 += r4;
+ r2 += 2;
+ r2 >>= 2;
+ r3 += (r4 << 1);
+ r3 += r5;
+ r3 += 2;
+ r3 >>= 2;
+ r4 += (r5 << 1);
+ r4 += r6;
+ r4 += 2;
+ r4 >>= 2;
+ r5 += (r6 << 1);
+ r5 += r7;
+ r5 += 2;
+ r5 >>= 2;
+ r6 += (3 * r7);
+ r6 += 2;
+ r6 >>= 2;
+ temp = r0 | (r1 << 8);
+ temp |= (r2 << 16);
+ temp |= (r3 << 24);
+ *((uint32*)pred) = temp;
+
+ temp = (temp >> 8) | (r4 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = (temp >> 8) | (r5 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = (temp >> 8) | (r6 << 24);
+ *((uint32*)(pred += 4)) = temp;
+ }
+ else
+ {
+ r2 += (r3 * 3);
+ r2 += 2;
+ r2 >>= 2;
+ r3 = ((r3 << 2) + 2);
+ r3 >>= 2;
+
+ temp = r0 | (r1 << 8);
+ temp |= (r2 << 16);
+ temp |= (r3 << 24);
+ *((uint32*)pred) = temp;
+
+ temp = (temp >> 8) | (r3 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = (temp >> 8) | (r3 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = (temp >> 8) | (r3 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ }
+ }
+
+ /* Down Right */
+ mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
+ /* Diagonal Vertical Right */
+ mode_avail[AVC_I4_Vertical_Right] = 0;
+ /* Horizontal Down */
+ mode_avail[AVC_I4_Horizontal_Down] = 0;
+
+ if (top_left == TRUE)
+ {
+ /* Down Right */
+ mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
+ pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
+
+ Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
+ R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
+ P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
+ D = (P_A + 2 * P_X + P_I + 2) >> 2;
+ P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
+ Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
+ R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
+
+ /* we can pack these */
+ temp = D | (P_x << 8); //[D P_x Q_x R_x]
+ //[P_y D P_x Q_x]
+ temp |= (Q_x << 16); //[Q_y P_y D P_x]
+ temp |= (R_x << 24); //[R_y Q_y P_y D ]
+ *((uint32*)pred) = temp;
+
+ temp = P_y | (D << 8);
+ temp |= (P_x << 16);
+ temp |= (Q_x << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = Q_y | (P_y << 8);
+ temp |= (D << 16);
+ temp |= (P_x << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = R_y | (Q_y << 8);
+ temp |= (P_y << 16);
+ temp |= (D << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+
+ /* Diagonal Vertical Right */
+ mode_avail[AVC_I4_Vertical_Right] = 1;
+ pred = encvid->pred_i4[AVC_I4_Vertical_Right];
+
+ Q0 = P_A + P_B + 1;
+ R0 = P_B + P_C + 1;
+ S0 = P_C + P_D + 1;
+ P0 = P_X + P_A + 1;
+ D = (P_I + 2 * P_X + P_A + 2) >> 2;
+
+ P1 = (P0 + Q0) >> 2;
+ Q1 = (Q0 + R0) >> 2;
+ R1 = (R0 + S0) >> 2;
+
+ P0 >>= 1;
+ Q0 >>= 1;
+ R0 >>= 1;
+ S0 >>= 1;
+
+ P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
+ Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
+
+ temp = P0 | (Q0 << 8); //[P0 Q0 R0 S0]
+ //[D P1 Q1 R1]
+ temp |= (R0 << 16); //[P2 P0 Q0 R0]
+ temp |= (S0 << 24); //[Q2 D P1 Q1]
+ *((uint32*)pred) = temp;
+
+ temp = D | (P1 << 8);
+ temp |= (Q1 << 16);
+ temp |= (R1 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = P2 | (P0 << 8);
+ temp |= (Q0 << 16);
+ temp |= (R0 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = Q2 | (D << 8);
+ temp |= (P1 << 16);
+ temp |= (Q1 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+
+ /* Horizontal Down */
+ mode_avail[AVC_I4_Horizontal_Down] = 1;
+ pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
+
+
+ Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
+ P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
+ D = (P_I + 2 * P_X + P_A + 2) >> 2;
+ P0 = P_X + P_I + 1;
+ Q0 = P_I + P_J + 1;
+ R0 = P_J + P_K + 1;
+ S0 = P_K + P_L + 1;
+
+ P1 = (P0 + Q0) >> 2;
+ Q1 = (Q0 + R0) >> 2;
+ R1 = (R0 + S0) >> 2;
+
+ P0 >>= 1;
+ Q0 >>= 1;
+ R0 >>= 1;
+ S0 >>= 1;
+
+
+ /* we can pack these */
+ temp = P0 | (D << 8); //[P0 D P2 Q2]
+ //[Q0 P1 P0 D ]
+ temp |= (P2 << 16); //[R0 Q1 Q0 P1]
+ temp |= (Q2 << 24); //[S0 R1 R0 Q1]
+ *((uint32*)pred) = temp;
+
+ temp = Q0 | (P1 << 8);
+ temp |= (P0 << 16);
+ temp |= (D << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = R0 | (Q1 << 8);
+ temp |= (Q0 << 16);
+ temp |= (P1 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ temp = S0 | (R1 << 8);
+ temp |= (R0 << 16);
+ temp |= (Q1 << 24);
+ *((uint32*)(pred += 4)) = temp;
+
+ }
+
+ /* vertical left */
+ mode_avail[AVC_I4_Vertical_Left] = 0;
+ if (availability.top)
+ {
+ mode_avail[AVC_I4_Vertical_Left] = 1;
+ pred = encvid->pred_i4[AVC_I4_Vertical_Left];
+
+ x0 = P_A + P_B + 1;
+ x1 = P_B + P_C + 1;
+ x2 = P_C + P_D + 1;
+ if (availability.top_right)
+ {
+ x3 = P_D + P_E + 1;
+ x4 = P_E + P_F + 1;
+ x5 = P_F + P_G + 1;
+ }
+ else
+ {
+ x3 = x4 = x5 = (P_D << 1) + 1;
+ }
+
+ temp1 = (x0 >> 1);
+ temp1 |= ((x1 >> 1) << 8);
+ temp1 |= ((x2 >> 1) << 16);
+ temp1 |= ((x3 >> 1) << 24);
+
+ *((uint32*)pred) = temp1;
+
+ temp2 = ((x0 + x1) >> 2);
+ temp2 |= (((x1 + x2) >> 2) << 8);
+ temp2 |= (((x2 + x3) >> 2) << 16);
+ temp2 |= (((x3 + x4) >> 2) << 24);
+
+ *((uint32*)(pred += 4)) = temp2;
+
+ temp1 = (temp1 >> 8) | ((x4 >> 1) << 24); /* rotate out old value */
+ *((uint32*)(pred += 4)) = temp1;
+
+ temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
+ *((uint32*)(pred += 4)) = temp2;
+ }
+
+ //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
+ // can re-order the search here instead of going in order
+
+ // find most probable mode
+ encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
+
+ min_cost = 0xFFFF;
+
+ for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
+ {
+ if (mode_avail[ipmode] == TRUE)
+ {
+ cost = (ipmode == mostProbableMode) ? 0 : fixedcost;
+ pred = encvid->pred_i4[ipmode];
+
+ cost_i4(org, org_pitch, pred, &cost);
+
+ if (cost < min_cost)
+ {
+ currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
+ min_cost = cost;
+ min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
+ }
+ }
+ }
+
+ if (blkidx == 0)
+ {
+ encvid->i4_sad = min_sad;
+ }
+ else
+ {
+ encvid->i4_sad += min_sad;
+ }
+
+ return min_cost;
+}
+
+int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
+{
+ int dcOnlyPredictionFlag;
+ AVCMacroblock *currMB = video->currMB;
+ int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
+
+
+ dcOnlyPredictionFlag = 0;
+ if (blkidx&0x3)
+ {
+ intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
+ }
+ else /* for blk 0, 4, 8, 12 */
+ {
+ if (video->intraAvailA)
+ {
+ if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
+ {
+ intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
+ }
+ else
+ {
+ intra4x4PredModeA = AVC_I4_DC;
+ }
+ }
+ else
+ {
+ dcOnlyPredictionFlag = 1;
+ goto PRED_RESULT_READY; // skip below
+ }
+ }
+
+ if (blkidx >> 2)
+ {
+ intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
+ }
+ else /* block 0, 1, 2, 3 */
+ {
+ if (video->intraAvailB)
+ {
+ if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
+ {
+ intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
+ }
+ else
+ {
+ intra4x4PredModeB = AVC_I4_DC;
+ }
+ }
+ else
+ {
+ dcOnlyPredictionFlag = 1;
+ }
+ }
+
+PRED_RESULT_READY:
+ if (dcOnlyPredictionFlag)
+ {
+ intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
+ }
+
+ predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
+
+ return predIntra4x4PredMode;
+}
+
+void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
+{
+ int k;
+ int16 res[16], *pres;
+ int m0, m1, m2, m3, tmp1;
+ int satd = 0;
+
+ pres = res;
+ // horizontal transform
+ k = 4;
+ while (k > 0)
+ {
+ m0 = org[0] - pred[0];
+ m3 = org[3] - pred[3];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = org[1] - pred[1];
+ m2 = org[2] - pred[2];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[2] = m0 - m1;
+ pres[1] = m2 + m3;
+ pres[3] = m3 - m2;
+
+ org += org_pitch;
+ pres += 4;
+ pred += 4;
+ k--;
+ }
+ /* vertical transform */
+ pres = res;
+ k = 4;
+ while (k > 0)
+ {
+ m0 = pres[0];
+ m3 = pres[12];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = pres[4];
+ m2 = pres[8];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[8] = m0 - m1;
+ pres[4] = m2 + m3;
+ pres[12] = m3 - m2;
+
+ pres++;
+ k--;
+
+ }
+
+ pres = res;
+ k = 4;
+ while (k > 0)
+ {
+ tmp1 = *pres++;
+ satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ k--;
+ }
+
+ satd = (satd + 1) >> 1;
+ *cost += satd;
+
+ return ;
+}
+
+void chroma_intra_search(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCPictureData *currPic = video->currPic;
+
+ int x_pos = video->mb_x << 3;
+ int y_pos = video->mb_y << 3;
+ int pitch = currPic->pitch >> 1;
+ int offset = y_pos * pitch + x_pos;
+
+ uint8 *comp_ref_x, *comp_ref_y, *pred;
+ int sum_x0, sum_x1, sum_y0, sum_y1;
+ int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
+ uint32 pred_a, pred_b, pred_c, pred_d;
+ int i, j, component;
+ int a_16, b, c, factor_c, topleft;
+ int H, V, value;
+ uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
+
+ uint8 *curCb = currPic->Scb + offset;
+ uint8 *curCr = currPic->Scr + offset;
+
+ uint8 *orgCb, *orgCr;
+ AVCFrameIO *currInput = encvid->currInput;
+ AVCMacroblock *currMB = video->currMB;
+ int org_pitch;
+ int cost, mincost;
+
+ /* evaluate DC mode */
+ if (video->intraAvailB & video->intraAvailA)
+ {
+ comp_ref_x = curCb - pitch;
+ comp_ref_y = curCb - 1;
+
+ for (i = 0; i < 2; i++)
+ {
+ pred_a = *((uint32*)comp_ref_x);
+ comp_ref_x += 4;
+ pred_b = (pred_a >> 8) & 0xFF00FF;
+ pred_a &= 0xFF00FF;
+ pred_a += pred_b;
+ pred_a += (pred_a >> 16);
+ sum_x0 = pred_a & 0xFFFF;
+
+ pred_a = *((uint32*)comp_ref_x);
+ pred_b = (pred_a >> 8) & 0xFF00FF;
+ pred_a &= 0xFF00FF;
+ pred_a += pred_b;
+ pred_a += (pred_a >> 16);
+ sum_x1 = pred_a & 0xFFFF;
+
+ pred_1[i] = (sum_x1 + 2) >> 2;
+
+ sum_y0 = *comp_ref_y;
+ sum_y0 += *(comp_ref_y += pitch);
+ sum_y0 += *(comp_ref_y += pitch);
+ sum_y0 += *(comp_ref_y += pitch);
+
+ sum_y1 = *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+
+ pred_2[i] = (sum_y1 + 2) >> 2;
+
+ pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
+ pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
+
+ comp_ref_x = curCr - pitch;
+ comp_ref_y = curCr - 1;
+ }
+ }
+
+ else if (video->intraAvailA)
+ {
+ comp_ref_y = curCb - 1;
+ for (i = 0; i < 2; i++)
+ {
+ sum_y0 = *comp_ref_y;
+ sum_y0 += *(comp_ref_y += pitch);
+ sum_y0 += *(comp_ref_y += pitch);
+ sum_y0 += *(comp_ref_y += pitch);
+
+ sum_y1 = *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+ sum_y1 += *(comp_ref_y += pitch);
+
+ pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
+ pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
+
+ comp_ref_y = curCr - 1;
+ }
+ }
+ else if (video->intraAvailB)
+ {
+ comp_ref_x = curCb - pitch;
+ for (i = 0; i < 2; i++)
+ {
+ pred_a = *((uint32*)comp_ref_x);
+ comp_ref_x += 4;
+ pred_b = (pred_a >> 8) & 0xFF00FF;
+ pred_a &= 0xFF00FF;
+ pred_a += pred_b;
+ pred_a += (pred_a >> 16);
+ sum_x0 = pred_a & 0xFFFF;
+
+ pred_a = *((uint32*)comp_ref_x);
+ pred_b = (pred_a >> 8) & 0xFF00FF;
+ pred_a &= 0xFF00FF;
+ pred_a += pred_b;
+ pred_a += (pred_a >> 16);
+ sum_x1 = pred_a & 0xFFFF;
+
+ pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
+ pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
+
+ comp_ref_x = curCr - pitch;
+ }
+ }
+ else
+ {
+ pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
+ pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
+ }
+
+ pred = encvid->pred_ic[AVC_IC_DC];
+
+ pred_a = pred_0[0];
+ pred_b = pred_1[0];
+ pred_a |= (pred_a << 8);
+ pred_a |= (pred_a << 16);
+ pred_b |= (pred_b << 8);
+ pred_b |= (pred_b << 16);
+
+ pred_c = pred_0[1];
+ pred_d = pred_1[1];
+ pred_c |= (pred_c << 8);
+ pred_c |= (pred_c << 16);
+ pred_d |= (pred_d << 8);
+ pred_d |= (pred_d << 16);
+
+
+ for (j = 0; j < 4; j++) /* 4 lines */
+ {
+ *((uint32*)pred) = pred_a;
+ *((uint32*)(pred + 4)) = pred_b;
+ *((uint32*)(pred + 8)) = pred_c;
+ *((uint32*)(pred + 12)) = pred_d;
+ pred += 16; /* move to the next line */
+ }
+
+ pred_a = pred_2[0];
+ pred_b = pred_3[0];
+ pred_a |= (pred_a << 8);
+ pred_a |= (pred_a << 16);
+ pred_b |= (pred_b << 8);
+ pred_b |= (pred_b << 16);
+
+ pred_c = pred_2[1];
+ pred_d = pred_3[1];
+ pred_c |= (pred_c << 8);
+ pred_c |= (pred_c << 16);
+ pred_d |= (pred_d << 8);
+ pred_d |= (pred_d << 16);
+
+ for (j = 0; j < 4; j++) /* 4 lines */
+ {
+ *((uint32*)pred) = pred_a;
+ *((uint32*)(pred + 4)) = pred_b;
+ *((uint32*)(pred + 8)) = pred_c;
+ *((uint32*)(pred + 12)) = pred_d;
+ pred += 16; /* move to the next line */
+ }
+
+ /* predict horizontal mode */
+ if (video->intraAvailA)
+ {
+ comp_ref_y = curCb - 1;
+ comp_ref_x = curCr - 1;
+ pred = encvid->pred_ic[AVC_IC_Horizontal];
+
+ for (i = 4; i < 6; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ pred_a = *comp_ref_y;
+ comp_ref_y += pitch;
+ pred_a |= (pred_a << 8);
+ pred_a |= (pred_a << 16);
+ *((uint32*)pred) = pred_a;
+ *((uint32*)(pred + 4)) = pred_a;
+
+ pred_a = *comp_ref_x;
+ comp_ref_x += pitch;
+ pred_a |= (pred_a << 8);
+ pred_a |= (pred_a << 16);
+ *((uint32*)(pred + 8)) = pred_a;
+ *((uint32*)(pred + 12)) = pred_a;
+
+ pred += 16;
+ }
+ }
+ }
+
+ /* vertical mode */
+ if (video->intraAvailB)
+ {
+ comp_ref_x = curCb - pitch;
+ comp_ref_y = curCr - pitch;
+ pred = encvid->pred_ic[AVC_IC_Vertical];
+
+ pred_a = *((uint32*)comp_ref_x);
+ pred_b = *((uint32*)(comp_ref_x + 4));
+ pred_c = *((uint32*)comp_ref_y);
+ pred_d = *((uint32*)(comp_ref_y + 4));
+
+ for (j = 0; j < 8; j++)
+ {
+ *((uint32*)pred) = pred_a;
+ *((uint32*)(pred + 4)) = pred_b;
+ *((uint32*)(pred + 8)) = pred_c;
+ *((uint32*)(pred + 12)) = pred_d;
+ pred += 16;
+ }
+ }
+
+ /* Intra_Chroma_Plane */
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ comp_ref_x = curCb - pitch;
+ comp_ref_y = curCb - 1;
+ topleft = curCb[-pitch-1];
+
+ pred = encvid->pred_ic[AVC_IC_Plane];
+ for (component = 0; component < 2; component++)
+ {
+ H = V = 0;
+ comp_ref_x0 = comp_ref_x + 4;
+ comp_ref_x1 = comp_ref_x + 2;
+ comp_ref_y0 = comp_ref_y + (pitch << 2);
+ comp_ref_y1 = comp_ref_y + (pitch << 1);
+ for (i = 1; i < 4; i++)
+ {
+ H += i * (*comp_ref_x0++ - *comp_ref_x1--);
+ V += i * (*comp_ref_y0 - *comp_ref_y1);
+ comp_ref_y0 += pitch;
+ comp_ref_y1 -= pitch;
+ }
+ H += i * (*comp_ref_x0++ - topleft);
+ V += i * (*comp_ref_y0 - *comp_ref_y1);
+
+ a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
+ b = (17 * H + 16) >> 5;
+ c = (17 * V + 16) >> 5;
+
+ pred_a = 0;
+ for (i = 4; i < 6; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ factor_c = a_16 + c * (pred_a++ - 3);
+
+ factor_c -= 3 * b;
+
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 16);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 24);
+ *((uint32*)pred) = pred_b;
+
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b = value;
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 8);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 16);
+ value = factor_c >> 5;
+ factor_c += b;
+ CLIP_RESULT(value)
+ pred_b |= (value << 24);
+ *((uint32*)(pred + 4)) = pred_b;
+ pred += 16;
+ }
+ }
+
+ pred -= 120; /* point to cr */
+ comp_ref_x = curCr - pitch;
+ comp_ref_y = curCr - 1;
+ topleft = curCr[-pitch-1];
+ }
+ }
+
+ /* now evaluate it */
+
+ org_pitch = (currInput->pitch) >> 1;
+ offset = x_pos + y_pos * org_pitch;
+
+ orgCb = currInput->YCbCr[1] + offset;
+ orgCr = currInput->YCbCr[2] + offset;
+
+ mincost = 0x7fffffff;
+ cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
+ if (cost < mincost)
+ {
+ mincost = cost;
+ currMB->intra_chroma_pred_mode = AVC_IC_DC;
+ }
+
+ if (video->intraAvailA)
+ {
+ cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
+ if (cost < mincost)
+ {
+ mincost = cost;
+ currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
+ }
+ }
+
+ if (video->intraAvailB)
+ {
+ cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
+ if (cost < mincost)
+ {
+ mincost = cost;
+ currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
+ }
+ }
+
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
+ if (cost < mincost)
+ {
+ mincost = cost;
+ currMB->intra_chroma_pred_mode = AVC_IC_Plane;
+ }
+ }
+
+
+ return ;
+}
+
+
+int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
+{
+ int cost;
+ /* first take difference between orgCb, orgCr and pred */
+ int16 res[128], *pres; // residue
+ int m0, m1, m2, m3, tmp1;
+ int j, k;
+
+ pres = res;
+ org_pitch -= 8;
+ // horizontal transform
+ for (j = 0; j < 8; j++)
+ {
+ k = 2;
+ while (k > 0)
+ {
+ m0 = orgCb[0] - pred[0];
+ m3 = orgCb[3] - pred[3];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = orgCb[1] - pred[1];
+ m2 = orgCb[2] - pred[2];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[2] = m0 - m1;
+ pres[1] = m2 + m3;
+ pres[3] = m3 - m2;
+
+ orgCb += 4;
+ pres += 4;
+ pred += 4;
+ k--;
+ }
+ orgCb += org_pitch;
+ k = 2;
+ while (k > 0)
+ {
+ m0 = orgCr[0] - pred[0];
+ m3 = orgCr[3] - pred[3];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = orgCr[1] - pred[1];
+ m2 = orgCr[2] - pred[2];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[2] = m0 - m1;
+ pres[1] = m2 + m3;
+ pres[3] = m3 - m2;
+
+ orgCr += 4;
+ pres += 4;
+ pred += 4;
+ k--;
+ }
+ orgCr += org_pitch;
+ }
+
+ /* vertical transform */
+ for (j = 0; j < 2; j++)
+ {
+ pres = res + (j << 6);
+ k = 16;
+ while (k > 0)
+ {
+ m0 = pres[0];
+ m3 = pres[3<<4];
+ m0 += m3;
+ m3 = m0 - (m3 << 1);
+ m1 = pres[1<<4];
+ m2 = pres[2<<4];
+ m1 += m2;
+ m2 = m1 - (m2 << 1);
+ pres[0] = m0 + m1;
+ pres[2<<4] = m0 - m1;
+ pres[1<<4] = m2 + m3;
+ pres[3<<4] = m3 - m2;
+
+ pres++;
+ k--;
+ }
+ }
+
+ /* now sum of absolute value */
+ pres = res;
+ cost = 0;
+ k = 128;
+ while (k > 0)
+ {
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ tmp1 = *pres++;
+ cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
+ k -= 8;
+ if (cost > min_cost) /* early drop out */
+ {
+ return cost;
+ }
+ }
+
+ return cost;
+}
+
+
+
+///////////////////////////////// old code, unused
+/* find the best intra mode based on original (unencoded) frame */
+/* output is
+ currMB->mb_intra, currMB->mbMode,
+ currMB->i16Mode (if currMB->mbMode == AVC_I16)
+ currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
+
+#ifdef FIXED_INTRAPRED_MODE
+void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
+{
+ (void)(mbNum);
+
+ AVCCommonObj *video = encvid->common;
+ int indx, block_x, block_y;
+
+ video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+
+ if (!video->currPicParams->constrained_intra_pred_flag)
+ {
+ video->intraAvailA = video->mbAvailA;
+ video->intraAvailB = video->mbAvailB;
+ video->intraAvailC = video->mbAvailC;
+ video->intraAvailD = video->mbAvailD;
+ }
+ else
+ {
+ if (video->mbAvailA)
+ {
+ video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+ }
+ if (video->mbAvailB)
+ {
+ video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+ }
+ if (video->mbAvailC)
+ {
+ video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+ }
+ if (video->mbAvailD)
+ {
+ video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+ }
+ }
+
+ currMB->mb_intra = TRUE;
+ currMB->mbMode = FIXED_INTRAPRED_MODE;
+
+ if (currMB->mbMode == AVC_I16)
+ {
+ currMB->i16Mode = FIXED_I16_MODE;
+
+ if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
+ {
+ currMB->i16Mode = AVC_I16_DC;
+ }
+
+ if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
+ {
+ currMB->i16Mode = AVC_I16_DC;
+ }
+
+ if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
+ {
+ currMB->i16Mode = AVC_I16_DC;
+ }
+ }
+ else //if(currMB->mbMode == AVC_I4)
+ {
+ for (indx = 0; indx < 16; indx++)
+ {
+ block_x = blkIdx2blkX[indx];
+ block_y = blkIdx2blkY[indx];
+
+ currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
+
+ if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
+ (block_y == 0 && !video->intraAvailB))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
+ !((block_y && block_x)
+ || (block_y && video->intraAvailA)
+ || (block_x && video->intraAvailB)
+ || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
+ !((block_y && block_x)
+ || (block_y && video->intraAvailA)
+ || (block_x && video->intraAvailB)
+ || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
+ !((block_y && block_x)
+ || (block_y && video->intraAvailA)
+ || (block_x && video->intraAvailB)
+ || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
+ (block_y == 0 && !video->intraAvailB))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+
+ if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
+ {
+ currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
+ }
+ }
+ }
+
+ currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
+
+ if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
+ {
+ currMB->intra_chroma_pred_mode = AVC_IC_DC;
+ }
+
+ if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
+ {
+ currMB->intra_chroma_pred_mode = AVC_IC_DC;
+ }
+
+ if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
+ {
+ currMB->intra_chroma_pred_mode = AVC_IC_DC;
+ }
+
+ /* also reset the motion vectors */
+ /* set MV and Ref_Idx codes of Intra blocks in P-slices */
+ memset(currMB->mvL0, 0, sizeof(int32)*16);
+ currMB->ref_idx_L0[0] = -1;
+ currMB->ref_idx_L0[1] = -1;
+ currMB->ref_idx_L0[2] = -1;
+ currMB->ref_idx_L0[3] = -1;
+
+ // output from this function, currMB->mbMode should be set to either
+ // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
+ return ;
+}
+#else // faster combined prediction+SAD calculation
+void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCFrameIO *currInput = encvid->currInput;
+ uint8 *curL, *curCb, *curCr;
+ uint8 *comp, *pred_block;
+ int block_x, block_y, offset;
+ uint sad, sad4, sadI4, sadI16;
+ int component, SubBlock_indx, temp;
+ int pitch = video->currPic->pitch;
+
+ /* calculate the cost of each intra prediction mode and compare to the
+ inter mode */
+ /* full search for all intra prediction */
+ offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
+ curL = currInput->YCbCr[0] + offset;
+ pred_block = video->pred_block + 84;
+
+ /* Assuming that InitNeighborAvailability has been called prior to this function */
+ video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+
+ if (!video->currPicParams->constrained_intra_pred_flag)
+ {
+ video->intraAvailA = video->mbAvailA;
+ video->intraAvailB = video->mbAvailB;
+ video->intraAvailC = video->mbAvailC;
+ video->intraAvailD = video->mbAvailD;
+ }
+ else
+ {
+ if (video->mbAvailA)
+ {
+ video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+ }
+ if (video->mbAvailB)
+ {
+ video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+ }
+ if (video->mbAvailC)
+ {
+ video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+ }
+ if (video->mbAvailD)
+ {
+ video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+ }
+ }
+
+ /* currently we're doing exhaustive search. Smart search will be used later */
+
+ /* I16 modes */
+ curL = currInput->YCbCr[0] + offset;
+ video->pintra_pred_top = curL - pitch;
+ video->pintra_pred_left = curL - 1;
+ if (video->mb_y)
+ {
+ video->intra_pred_topleft = *(curL - pitch - 1);
+ }
+
+ /* Intra_16x16_Vertical */
+ sadI16 = 65536;
+ /* check availability of top */
+ if (video->intraAvailB)
+ {
+ sad = SAD_I16_Vert(video, curL, sadI16);
+
+ if (sad < sadI16)
+ {
+ sadI16 = sad;
+ currMB->i16Mode = AVC_I16_Vertical;
+ }
+ }
+ /* Intra_16x16_Horizontal */
+ /* check availability of left */
+ if (video->intraAvailA)
+ {
+ sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
+
+ if (sad < sadI16)
+ {
+ sadI16 = sad;
+ currMB->i16Mode = AVC_I16_Horizontal;
+ }
+ }
+
+ /* Intra_16x16_DC, default mode */
+ sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
+ if (sad < sadI16)
+ {
+ sadI16 = sad;
+ currMB->i16Mode = AVC_I16_DC;
+ }
+
+ /* Intra_16x16_Plane */
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ sad = SAD_I16_Plane(video, curL, sadI16);
+
+ if (sad < sadI16)
+ {
+ sadI16 = sad;
+ currMB->i16Mode = AVC_I16_Plane;
+ }
+ }
+
+ sadI16 >>= 1; /* before comparison */
+
+ /* selection between intra4, intra16 or inter mode */
+ if (sadI16 < encvid->min_cost)
+ {
+ currMB->mb_intra = TRUE;
+ currMB->mbMode = AVC_I16;
+ encvid->min_cost = sadI16;
+ }
+
+ if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
+ {
+ /* Note that we might be able to guess the type of prediction from
+ the luma prediction type */
+
+ /* now search for the best chroma intra prediction */
+ offset = (offset >> 2) + (video->mb_x << 2);
+ curCb = currInput->YCbCr[1] + offset;
+ curCr = currInput->YCbCr[2] + offset;
+
+ pitch >>= 1;
+ video->pintra_pred_top_cb = curCb - pitch;
+ video->pintra_pred_left_cb = curCb - 1;
+ video->pintra_pred_top_cr = curCr - pitch;
+ video->pintra_pred_left_cr = curCr - 1;
+
+ if (video->mb_y)
+ {
+ video->intra_pred_topleft_cb = *(curCb - pitch - 1);
+ video->intra_pred_topleft_cr = *(curCr - pitch - 1);
+ }
+
+ /* Intra_Chroma_DC */
+ sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
+ currMB->intra_chroma_pred_mode = AVC_IC_DC;
+
+ /* Intra_Chroma_Horizontal */
+ if (video->intraAvailA)
+ {
+ /* check availability of left */
+ sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
+ if (sad < sad4)
+ {
+ sad4 = sad;
+ currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
+ }
+ }
+
+ /* Intra_Chroma_Vertical */
+ if (video->intraAvailB)
+ {
+ /* check availability of top */
+ sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
+
+ if (sad < sad4)
+ {
+ sad4 = sad;
+ currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
+ }
+ }
+
+ /* Intra_Chroma_Plane */
+ if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
+ {
+ /* check availability of top and left */
+ Intra_Chroma_Plane(video, pitch);
+
+ sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
+
+ if (sad < sad4)
+ {
+ sad4 = sad;
+ currMB->intra_chroma_pred_mode = AVC_IC_Plane;
+ }
+ }
+
+ /* also reset the motion vectors */
+ /* set MV and Ref_Idx codes of Intra blocks in P-slices */
+ memset(currMB->mvL0, 0, sizeof(int32)*16);
+ memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
+
+ }
+
+ // output from this function, currMB->mbMode should be set to either
+ // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
+
+ return ;
+}
+#endif
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp
new file mode 100644
index 0000000..ac62d78
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/motion_comp.cpp
@@ -0,0 +1,2156 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "avcenc_int.h"
+
+
+#define CLIP_RESULT(x) if((uint)x > 0xFF){ \
+ x = 0xFF & (~(x>>31));}
+
+/* (blkwidth << 2) + (dy << 1) + dx */
+static void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) =
+{
+ &eChromaFullMC_SIMD,
+ &eChromaHorizontalMC_SIMD,
+ &eChromaVerticalMC_SIMD,
+ &eChromaDiagonalMC_SIMD,
+ &eChromaFullMC_SIMD,
+ &eChromaHorizontalMC2_SIMD,
+ &eChromaVerticalMC2_SIMD,
+ &eChromaDiagonalMC2_SIMD
+};
+/* Perform motion prediction and compensation with residue if exist. */
+void AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video)
+{
+ (void)(encvid);
+
+ AVCMacroblock *currMB = video->currMB;
+ AVCPictureData *currPic = video->currPic;
+ int mbPartIdx, subMbPartIdx;
+ int ref_idx;
+ int offset_MbPart_indx = 0;
+ int16 *mv;
+ uint32 x_pos, y_pos;
+ uint8 *curL, *curCb, *curCr;
+ uint8 *ref_l, *ref_Cb, *ref_Cr;
+ uint8 *predBlock, *predCb, *predCr;
+ int block_x, block_y, offset_x, offset_y, offsetP, offset;
+ int x_position = (video->mb_x << 4);
+ int y_position = (video->mb_y << 4);
+ int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx;
+ int picWidth = currPic->width;
+ int picPitch = currPic->pitch;
+ int picHeight = currPic->height;
+ uint32 tmp_word;
+
+ tmp_word = y_position * picPitch;
+ curL = currPic->Sl + tmp_word + x_position;
+ offset = (tmp_word >> 2) + (x_position >> 1);
+ curCb = currPic->Scb + offset;
+ curCr = currPic->Scr + offset;
+
+ predBlock = curL;
+ predCb = curCb;
+ predCr = curCr;
+
+ GetMotionVectorPredictor(video, 1);
+
+ for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+ {
+ MbHeight = currMB->SubMbPartHeight[mbPartIdx];
+ MbWidth = currMB->SubMbPartWidth[mbPartIdx];
+ mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1);
+ mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1;
+ ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X];
+ offset_indx = 0;
+
+ ref_l = video->RefPicList0[ref_idx]->Sl;
+ ref_Cb = video->RefPicList0[ref_idx]->Scb;
+ ref_Cr = video->RefPicList0[ref_idx]->Scr;
+
+ for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+ {
+ block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1);
+ block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1);
+ mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2));
+ offset_x = x_position + (block_x << 2);
+ offset_y = y_position + (block_y << 2);
+ x_pos = (offset_x << 2) + *mv++; /*quarter pel */
+ y_pos = (offset_y << 2) + *mv; /*quarter pel */
+
+ //offset = offset_y * currPic->width;
+ //offsetC = (offset >> 2) + (offset_x >> 1);
+ offsetP = (block_y << 2) * picPitch + (block_x << 2);
+ eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos,
+ /*comp_Sl + offset + offset_x,*/
+ predBlock + offsetP, picPitch, MbWidth, MbHeight);
+
+ offsetP = (block_y * picWidth) + (block_x << 1);
+ eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
+ /*comp_Scb + offsetC,*/
+ predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
+ eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
+ /*comp_Scr + offsetC,*/
+ predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
+
+ offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3;
+ }
+ offset_MbPart_indx = currMB->MbPartWidth >> 4;
+ }
+
+ return ;
+}
+
+
+/* preform the actual motion comp here */
+void eLumaMotionComp(uint8 *ref, int picpitch, int picheight,
+ int x_pos, int y_pos,
+ uint8 *pred, int pred_pitch,
+ int blkwidth, int blkheight)
+{
+ (void)(picheight);
+
+ int dx, dy;
+ int temp2[21][21]; /* for intermediate results */
+ uint8 *ref2;
+
+ dx = x_pos & 3;
+ dy = y_pos & 3;
+ x_pos = x_pos >> 2; /* round it to full-pel resolution */
+ y_pos = y_pos >> 2;
+
+ /* perform actual motion compensation */
+ if (dx == 0 && dy == 0)
+ { /* fullpel position *//* G */
+
+ ref += y_pos * picpitch + x_pos;
+
+ eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
+
+ } /* other positions */
+ else if (dy == 0)
+ { /* no vertical interpolation *//* a,b,c*/
+
+ ref += y_pos * picpitch + x_pos;
+
+ eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx);
+ }
+ else if (dx == 0)
+ { /*no horizontal interpolation *//* d,h,n */
+
+ ref += y_pos * picpitch + x_pos;
+
+ eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy);
+ }
+ else if (dy == 2)
+ { /* horizontal cross *//* i, j, k */
+
+ ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */
+
+ eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight);
+
+ eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
+ }
+ else if (dx == 2)
+ { /* vertical cross */ /* f,q */
+
+ ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */
+
+ eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5);
+ eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
+ }
+ else
+ { /* diagonal *//* e,g,p,r */
+
+ ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos;
+
+ ref += (y_pos * picpitch) + x_pos + (dx / 2);
+
+ eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
+ }
+
+ return ;
+}
+
+void eCreateAlign(uint8 *ref, int picpitch, int y_pos,
+ uint8 *out, int blkwidth, int blkheight)
+{
+ int i, j;
+ int offset, out_offset;
+ uint32 prev_pix, result, pix1, pix2, pix4;
+
+ ref += y_pos * picpitch;// + x_pos;
+ out_offset = 24 - blkwidth;
+
+ //switch(x_pos&0x3){
+ switch (((uint32)ref)&0x3)
+ {
+ case 1:
+ offset = picpitch - blkwidth - 3;
+ for (j = 0; j < blkheight; j++)
+ {
+ pix1 = *ref++;
+ pix2 = *((uint16*)ref);
+ ref += 2;
+ result = (pix2 << 8) | pix1;
+
+ for (i = 3; i < blkwidth; i += 4)
+ {
+ pix4 = *((uint32*)ref);
+ ref += 4;
+ prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */
+ result |= prev_pix;
+ *((uint32*)out) = result; /* write 4 bytes */
+ out += 4;
+ result = pix4 >> 8; /* for the next loop */
+ }
+ ref += offset;
+ out += out_offset;
+ }
+ break;
+ case 2:
+ offset = picpitch - blkwidth - 2;
+ for (j = 0; j < blkheight; j++)
+ {
+ result = *((uint16*)ref);
+ ref += 2;
+ for (i = 2; i < blkwidth; i += 4)
+ {
+ pix4 = *((uint32*)ref);
+ ref += 4;
+ prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */
+ result |= prev_pix;
+ *((uint32*)out) = result; /* write 4 bytes */
+ out += 4;
+ result = pix4 >> 16; /* for the next loop */
+ }
+ ref += offset;
+ out += out_offset;
+ }
+ break;
+ case 3:
+ offset = picpitch - blkwidth - 1;
+ for (j = 0; j < blkheight; j++)
+ {
+ result = *ref++;
+ for (i = 1; i < blkwidth; i += 4)
+ {
+ pix4 = *((uint32*)ref);
+ ref += 4;
+ prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */
+ result |= prev_pix;
+ *((uint32*)out) = result; /* write 4 bytes */
+ out += 4;
+ result = pix4 >> 24; /* for the next loop */
+ }
+ ref += offset;
+ out += out_offset;
+ }
+ break;
+ }
+}
+
+void eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dx)
+{
+ uint8 *p_ref;
+ uint32 *p_cur;
+ uint32 tmp, pkres;
+ int result, curr_offset, ref_offset;
+ int j;
+ int32 r0, r1, r2, r3, r4, r5;
+ int32 r13, r6;
+
+ p_cur = (uint32*)out; /* assume it's word aligned */
+ curr_offset = (outpitch - blkwidth) >> 2;
+ p_ref = in;
+ ref_offset = inpitch - blkwidth;
+
+ if (dx&1)
+ {
+ dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
+ p_ref -= 2;
+ r13 = 0;
+ for (j = blkheight; j > 0; j--)
+ {
+ tmp = (uint32)(p_ref + blkwidth);
+ r0 = p_ref[0];
+ r1 = p_ref[2];
+ r0 |= (r1 << 16); /* 0,c,0,a */
+ r1 = p_ref[1];
+ r2 = p_ref[3];
+ r1 |= (r2 << 16); /* 0,d,0,b */
+ while ((uint32)p_ref < tmp)
+ {
+ r2 = *(p_ref += 4); /* move pointer to e */
+ r3 = p_ref[2];
+ r2 |= (r3 << 16); /* 0,g,0,e */
+ r3 = p_ref[1];
+ r4 = p_ref[3];
+ r3 |= (r4 << 16); /* 0,h,0,f */
+
+ r4 = r0 + r3; /* c+h, a+f */
+ r5 = r0 + r1; /* c+d, a+b */
+ r6 = r2 + r3; /* g+h, e+f */
+ r5 >>= 16;
+ r5 |= (r6 << 16); /* e+f, c+d */
+ r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
+ r4 += 0x100010; /* +16, +16 */
+ r5 = r1 + r2; /* d+g, b+e */
+ r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+ r4 >>= 5;
+ r13 |= r4; /* check clipping */
+
+ r5 = p_ref[dx+2];
+ r6 = p_ref[dx+4];
+ r5 |= (r6 << 16);
+ r4 += r5;
+ r4 += 0x10001;
+ r4 = (r4 >> 1) & 0xFF00FF;
+
+ r5 = p_ref[4]; /* i */
+ r6 = (r5 << 16);
+ r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+ r5 += r1; /* d+i, b+g */ /* r5 not free */
+ r1 >>= 16;
+ r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+ r1 += r2; /* f+g, d+e */
+ r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
+ r0 >>= 16;
+ r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+ r0 += r3; /* e+h, c+f */
+ r5 += 0x100010; /* 16,16 */
+ r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+ r5 >>= 5;
+ r13 |= r5; /* check clipping */
+
+ r0 = p_ref[dx+3];
+ r1 = p_ref[dx+5];
+ r0 |= (r1 << 16);
+ r5 += r0;
+ r5 += 0x10001;
+ r5 = (r5 >> 1) & 0xFF00FF;
+
+ r4 |= (r5 << 8); /* pack them together */
+ *p_cur++ = r4;
+ r1 = r3;
+ r0 = r2;
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
+
+ if (r13&0xFF000700) /* need clipping */
+ {
+ /* move back to the beginning of the line */
+ p_ref -= (ref_offset + blkwidth); /* input */
+ p_cur -= (outpitch >> 2);
+
+ tmp = (uint32)(p_ref + blkwidth);
+ for (; (uint32)p_ref < tmp;)
+ {
+
+ r0 = *p_ref++;
+ r1 = *p_ref++;
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dx] + 1);
+ pkres = (result >> 1) ;
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dx] + 1);
+ result = (result >> 1);
+ pkres |= (result << 8);
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dx] + 1);
+ result = (result >> 1);
+ pkres |= (result << 16);
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dx] + 1);
+ result = (result >> 1);
+ pkres |= (result << 24);
+ *p_cur++ = pkres; /* write 4 pixels */
+ p_ref -= 5; /* offset back to the middle of filter */
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset; /* move to the next line */
+ }
+ }
+ }
+ else
+ {
+ p_ref -= 2;
+ r13 = 0;
+ for (j = blkheight; j > 0; j--)
+ {
+ tmp = (uint32)(p_ref + blkwidth);
+ r0 = p_ref[0];
+ r1 = p_ref[2];
+ r0 |= (r1 << 16); /* 0,c,0,a */
+ r1 = p_ref[1];
+ r2 = p_ref[3];
+ r1 |= (r2 << 16); /* 0,d,0,b */
+ while ((uint32)p_ref < tmp)
+ {
+ r2 = *(p_ref += 4); /* move pointer to e */
+ r3 = p_ref[2];
+ r2 |= (r3 << 16); /* 0,g,0,e */
+ r3 = p_ref[1];
+ r4 = p_ref[3];
+ r3 |= (r4 << 16); /* 0,h,0,f */
+
+ r4 = r0 + r3; /* c+h, a+f */
+ r5 = r0 + r1; /* c+d, a+b */
+ r6 = r2 + r3; /* g+h, e+f */
+ r5 >>= 16;
+ r5 |= (r6 << 16); /* e+f, c+d */
+ r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
+ r4 += 0x100010; /* +16, +16 */
+ r5 = r1 + r2; /* d+g, b+e */
+ r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+ r4 >>= 5;
+ r13 |= r4; /* check clipping */
+ r4 &= 0xFF00FF; /* mask */
+
+ r5 = p_ref[4]; /* i */
+ r6 = (r5 << 16);
+ r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+ r5 += r1; /* d+i, b+g */ /* r5 not free */
+ r1 >>= 16;
+ r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+ r1 += r2; /* f+g, d+e */
+ r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
+ r0 >>= 16;
+ r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+ r0 += r3; /* e+h, c+f */
+ r5 += 0x100010; /* 16,16 */
+ r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+ r5 >>= 5;
+ r13 |= r5; /* check clipping */
+ r5 &= 0xFF00FF; /* mask */
+
+ r4 |= (r5 << 8); /* pack them together */
+ *p_cur++ = r4;
+ r1 = r3;
+ r0 = r2;
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
+
+ if (r13&0xFF000700) /* need clipping */
+ {
+ /* move back to the beginning of the line */
+ p_ref -= (ref_offset + blkwidth); /* input */
+ p_cur -= (outpitch >> 2);
+
+ tmp = (uint32)(p_ref + blkwidth);
+ for (; (uint32)p_ref < tmp;)
+ {
+
+ r0 = *p_ref++;
+ r1 = *p_ref++;
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres = result;
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 8);
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 16);
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 24);
+ *p_cur++ = pkres; /* write 4 pixels */
+ p_ref -= 5;
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset;
+ }
+ }
+ }
+
+ return ;
+}
+
+void eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dx)
+{
+ int *p_ref;
+ uint32 *p_cur;
+ uint32 tmp, pkres;
+ int result, result2, curr_offset, ref_offset;
+ int j, r0, r1, r2, r3, r4, r5;
+
+ p_cur = (uint32*)out; /* assume it's word aligned */
+ curr_offset = (outpitch - blkwidth) >> 2;
+ p_ref = in;
+ ref_offset = inpitch - blkwidth;
+
+ if (dx&1)
+ {
+ dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
+
+ for (j = blkheight; j > 0 ; j--)
+ {
+ tmp = (uint32)(p_ref + blkwidth);
+ for (; (uint32)p_ref < tmp;)
+ {
+
+ r0 = p_ref[-2];
+ r1 = p_ref[-1];
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dx] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ pkres = (result >> 1);
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dx] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ pkres |= (result << 8);
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dx] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ pkres |= (result << 16);
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dx] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ pkres |= (result << 24);
+ *p_cur++ = pkres; /* write 4 pixels */
+ p_ref -= 3; /* offset back to the middle of filter */
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset; /* move to the next line */
+ }
+ }
+ else
+ {
+ for (j = blkheight; j > 0 ; j--)
+ {
+ tmp = (uint32)(p_ref + blkwidth);
+ for (; (uint32)p_ref < tmp;)
+ {
+
+ r0 = p_ref[-2];
+ r1 = p_ref[-1];
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ pkres = result;
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ pkres |= (result << 8);
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ pkres |= (result << 16);
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ pkres |= (result << 24);
+ *p_cur++ = pkres; /* write 4 pixels */
+ p_ref -= 3; /* offset back to the middle of filter */
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset; /* move to the next line */
+ }
+ }
+
+ return ;
+}
+
+void eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
+ int blkwidth, int blkheight)
+{
+ uint8 *p_ref;
+ int *p_cur;
+ uint32 tmp;
+ int result, curr_offset, ref_offset;
+ int j, r0, r1, r2, r3, r4, r5;
+
+ p_cur = out;
+ curr_offset = (outpitch - blkwidth);
+ p_ref = in;
+ ref_offset = inpitch - blkwidth;
+
+ for (j = blkheight; j > 0 ; j--)
+ {
+ tmp = (uint32)(p_ref + blkwidth);
+ for (; (uint32)p_ref < tmp;)
+ {
+
+ r0 = p_ref[-2];
+ r1 = p_ref[-1];
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ *p_cur++ = result;
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ *p_cur++ = result;
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ *p_cur++ = result;
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ *p_cur++ = result;
+ p_ref -= 3; /* move back to the middle of the filter */
+ }
+ p_cur += curr_offset; /* move to the next line */
+ p_ref += ref_offset;
+ }
+
+ return ;
+}
+void eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dy)
+{
+ uint8 *p_cur, *p_ref;
+ uint32 tmp;
+ int result, curr_offset, ref_offset;
+ int j, i;
+ int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13;
+ uint8 tmp_in[24][24];
+
+ /* not word-aligned */
+ if (((uint32)in)&0x3)
+ {
+ eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
+ in = &tmp_in[2][0];
+ inpitch = 24;
+ }
+ p_cur = out;
+ curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+ ref_offset = blkheight * inpitch; /* for limit */
+
+ curr_offset += 3;
+
+ if (dy&1)
+ {
+ dy = (dy >> 1) ? 0 : -inpitch;
+
+ for (j = 0; j < blkwidth; j += 4, in += 4)
+ {
+ r13 = 0;
+ p_ref = in;
+ p_cur -= outpitch; /* compensate for the first offset */
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp) /* the loop un-rolled */
+ {
+ r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+ p_ref += inpitch;
+ r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+ r0 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+
+ r0 += r1;
+ r6 += r7;
+
+ r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 += 20 * r1;
+ r6 += 20 * r7;
+ r0 += 0x100010;
+ r6 += 0x100010;
+
+ r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 -= 5 * r1;
+ r6 -= 5 * r7;
+
+ r0 >>= 5;
+ r6 >>= 5;
+ /* clip */
+ r13 |= r6;
+ r13 |= r0;
+ //CLIPPACK(r6,result)
+
+ r1 = *((uint32*)(p_ref + dy));
+ r2 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r0 += r1;
+ r6 += r2;
+ r0 += 0x10001;
+ r6 += 0x10001;
+ r0 = (r0 >> 1) & 0xFF00FF;
+ r6 = (r6 >> 1) & 0xFF00FF;
+
+ r0 |= (r6 << 8); /* pack it back */
+ *((uint32*)(p_cur += outpitch)) = r0;
+ }
+ p_cur += curr_offset; /* offset to the next pixel */
+ if (r13 & 0xFF000700) /* this column need clipping */
+ {
+ p_cur -= 4;
+ for (i = 0; i < 4; i++)
+ {
+ p_ref = in + i;
+ p_cur -= outpitch; /* compensate for the first offset */
+
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp)
+ { /* loop un-rolled */
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dy-(inpitch<<1)] + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dy-(inpitch<<1)] + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dy-(inpitch<<1)] + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ /* 3/4 pel, no need to clip */
+ result = (result + p_ref[dy-(inpitch<<1)] + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += (curr_offset - 3);
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j = 0; j < blkwidth; j += 4, in += 4)
+ {
+ r13 = 0;
+ p_ref = in;
+ p_cur -= outpitch; /* compensate for the first offset */
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp) /* the loop un-rolled */
+ {
+ r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+ p_ref += inpitch;
+ r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+ r0 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+
+ r0 += r1;
+ r6 += r7;
+
+ r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 += 20 * r1;
+ r6 += 20 * r7;
+ r0 += 0x100010;
+ r6 += 0x100010;
+
+ r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 -= 5 * r1;
+ r6 -= 5 * r7;
+
+ r0 >>= 5;
+ r6 >>= 5;
+ /* clip */
+ r13 |= r6;
+ r13 |= r0;
+ //CLIPPACK(r6,result)
+ r0 &= 0xFF00FF;
+ r6 &= 0xFF00FF;
+ r0 |= (r6 << 8); /* pack it back */
+ *((uint32*)(p_cur += outpitch)) = r0;
+ }
+ p_cur += curr_offset; /* offset to the next pixel */
+ if (r13 & 0xFF000700) /* this column need clipping */
+ {
+ p_cur -= 4;
+ for (i = 0; i < 4; i++)
+ {
+ p_ref = in + i;
+ p_cur -= outpitch; /* compensate for the first offset */
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp)
+ { /* loop un-rolled */
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += (curr_offset - 3);
+ }
+ }
+ }
+ }
+
+ return ;
+}
+
+void eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
+ int blkwidth, int blkheight)
+{
+ int *p_cur;
+ uint8 *p_ref;
+ uint32 tmp;
+ int result, curr_offset, ref_offset;
+ int j, r0, r1, r2, r3, r4, r5;
+
+ p_cur = out;
+ curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+ ref_offset = blkheight * inpitch; /* for limit */
+
+ for (j = 0; j < blkwidth; j++)
+ {
+ p_cur -= outpitch; /* compensate for the first offset */
+ p_ref = in++;
+
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp)
+ { /* loop un-rolled */
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += curr_offset;
+ }
+
+ return ;
+}
+
+void eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight, int dy)
+{
+ uint8 *p_cur;
+ int *p_ref;
+ uint32 tmp;
+ int result, result2, curr_offset, ref_offset;
+ int j, r0, r1, r2, r3, r4, r5;
+
+ p_cur = out;
+ curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
+ ref_offset = blkheight * inpitch; /* for limit */
+
+ if (dy&1)
+ {
+ dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch;
+
+ for (j = 0; j < blkwidth; j++)
+ {
+ p_cur -= outpitch; /* compensate for the first offset */
+ p_ref = in++;
+
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp)
+ { /* loop un-rolled */
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dy] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dy] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dy] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ result2 = ((p_ref[dy] + 16) >> 5);
+ CLIP_RESULT(result2)
+ /* 3/4 pel, no need to clip */
+ result = (result + result2 + 1);
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += curr_offset;
+ }
+ }
+ else
+ {
+ for (j = 0; j < blkwidth; j++)
+ {
+ p_cur -= outpitch; /* compensate for the first offset */
+ p_ref = in++;
+
+ tmp = (uint32)(p_ref + ref_offset); /* limit */
+ while ((uint32)p_ref < tmp)
+ { /* loop un-rolled */
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 512) >> 10;
+ CLIP_RESULT(result)
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += curr_offset;
+ }
+ }
+
+ return ;
+}
+
+void eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
+ uint8 *out, int outpitch,
+ int blkwidth, int blkheight)
+{
+ int j, i;
+ int result;
+ uint8 *p_cur, *p_ref, *p_tmp8;
+ int curr_offset, ref_offset;
+ uint8 tmp_res[24][24], tmp_in[24][24];
+ uint32 *p_tmp;
+ uint32 tmp, pkres, tmp_result;
+ int32 r0, r1, r2, r3, r4, r5;
+ int32 r6, r7, r8, r9, r10, r13;
+
+ ref_offset = inpitch - blkwidth;
+ p_ref = in1 - 2;
+ /* perform horizontal interpolation */
+ /* not word-aligned */
+ /* It is faster to read 1 byte at time to avoid calling CreateAlign */
+ /* if(((uint32)p_ref)&0x3)
+ {
+ CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight);
+ p_ref = &tmp_in[0][0];
+ ref_offset = 24-blkwidth;
+ }*/
+
+ p_tmp = (uint32*) & (tmp_res[0][0]);
+ for (j = blkheight; j > 0; j--)
+ {
+ r13 = 0;
+ tmp = (uint32)(p_ref + blkwidth);
+
+ //r0 = *((uint32*)p_ref); /* d,c,b,a */
+ //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */
+ //r0 &= 0xFF00FF; /* 0,c,0,a */
+ /* It is faster to read 1 byte at a time */
+ r0 = p_ref[0];
+ r1 = p_ref[2];
+ r0 |= (r1 << 16); /* 0,c,0,a */
+ r1 = p_ref[1];
+ r2 = p_ref[3];
+ r1 |= (r2 << 16); /* 0,d,0,b */
+
+ while ((uint32)p_ref < tmp)
+ {
+ //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */
+ //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */
+ //r2 &= 0xFF00FF; /* 0,g,0,e */
+ /* It is faster to read 1 byte at a time */
+ r2 = *(p_ref += 4);
+ r3 = p_ref[2];
+ r2 |= (r3 << 16); /* 0,g,0,e */
+ r3 = p_ref[1];
+ r4 = p_ref[3];
+ r3 |= (r4 << 16); /* 0,h,0,f */
+
+ r4 = r0 + r3; /* c+h, a+f */
+ r5 = r0 + r1; /* c+d, a+b */
+ r6 = r2 + r3; /* g+h, e+f */
+ r5 >>= 16;
+ r5 |= (r6 << 16); /* e+f, c+d */
+ r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
+ r4 += 0x100010; /* +16, +16 */
+ r5 = r1 + r2; /* d+g, b+e */
+ r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
+ r4 >>= 5;
+ r13 |= r4; /* check clipping */
+ r4 &= 0xFF00FF; /* mask */
+
+ r5 = p_ref[4]; /* i */
+ r6 = (r5 << 16);
+ r5 = r6 | (r2 >> 16);/* 0,i,0,g */
+ r5 += r1; /* d+i, b+g */ /* r5 not free */
+ r1 >>= 16;
+ r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
+ r1 += r2; /* f+g, d+e */
+ r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
+ r0 >>= 16;
+ r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
+ r0 += r3; /* e+h, c+f */
+ r5 += 0x100010; /* 16,16 */
+ r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
+ r5 >>= 5;
+ r13 |= r5; /* check clipping */
+ r5 &= 0xFF00FF; /* mask */
+
+ r4 |= (r5 << 8); /* pack them together */
+ *p_tmp++ = r4;
+ r1 = r3;
+ r0 = r2;
+ }
+ p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
+ p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
+
+ if (r13&0xFF000700) /* need clipping */
+ {
+ /* move back to the beginning of the line */
+ p_ref -= (ref_offset + blkwidth); /* input */
+ p_tmp -= 6; /* intermediate output */
+ tmp = (uint32)(p_ref + blkwidth);
+ while ((uint32)p_ref < tmp)
+ {
+ r0 = *p_ref++;
+ r1 = *p_ref++;
+ r2 = *p_ref++;
+ r3 = *p_ref++;
+ r4 = *p_ref++;
+ /* first pixel */
+ r5 = *p_ref++;
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres = result;
+ /* second pixel */
+ r0 = *p_ref++;
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 8);
+ /* third pixel */
+ r1 = *p_ref++;
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 16);
+ /* fourth pixel */
+ r2 = *p_ref++;
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ pkres |= (result << 24);
+
+ *p_tmp++ = pkres; /* write 4 pixel */
+ p_ref -= 5;
+ }
+ p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
+ p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
+ }
+ }
+
+ /* perform vertical interpolation */
+ /* not word-aligned */
+ if (((uint32)in2)&0x3)
+ {
+ eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
+ in2 = &tmp_in[2][0];
+ inpitch = 24;
+ }
+
+ p_cur = out;
+ curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */
+ pkres = blkheight * inpitch; /* reuse it for limit */
+
+ curr_offset += 3;
+
+ for (j = 0; j < blkwidth; j += 4, in2 += 4)
+ {
+ r13 = 0;
+ p_ref = in2;
+ p_tmp8 = &(tmp_res[0][j]); /* intermediate result */
+ p_tmp8 -= 24; /* compensate for the first offset */
+ p_cur -= outpitch; /* compensate for the first offset */
+ tmp = (uint32)(p_ref + pkres); /* limit */
+ while ((uint32)p_ref < tmp) /* the loop un-rolled */
+ {
+ /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */
+ /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2];
+ r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3];
+ r6 |= (r1<<16); p_ref+=inpitch; */
+ r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
+ p_ref += inpitch;
+ r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
+ r0 &= 0xFF00FF;
+
+ /*p_ref8 = p_ref+(inpitch<<1);
+ r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16);
+ r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/
+ r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+
+ r0 += r1;
+ r6 += r7;
+
+ /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16);
+ r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/
+ r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2];
+ r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1];
+ r2 = p_ref8[3]; r7 |= (r2<<16);*/
+ r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 += 20 * r1;
+ r6 += 20 * r7;
+ r0 += 0x100010;
+ r6 += 0x100010;
+
+ /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2];
+ r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/
+ r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
+ r8 = (r2 >> 8) & 0xFF00FF;
+ r2 &= 0xFF00FF;
+
+ /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2];
+ r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1];
+ r2 = p_ref8[3]; r7 |= (r2<<16);*/
+ r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
+ r7 = (r1 >> 8) & 0xFF00FF;
+ r1 &= 0xFF00FF;
+ r1 += r2;
+
+ r7 += r8;
+
+ r0 -= 5 * r1;
+ r6 -= 5 * r7;
+
+ r0 >>= 5;
+ r6 >>= 5;
+ /* clip */
+ r13 |= r6;
+ r13 |= r0;
+ //CLIPPACK(r6,result)
+ /* add with horizontal results */
+ r10 = *((uint32*)(p_tmp8 += 24));
+ r9 = (r10 >> 8) & 0xFF00FF;
+ r10 &= 0xFF00FF;
+
+ r0 += r10;
+ r0 += 0x10001;
+ r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */
+
+ r6 += r9;
+ r6 += 0x10001;
+ r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */
+
+ r0 |= (r6 << 8); /* pack it back */
+ *((uint32*)(p_cur += outpitch)) = r0;
+ }
+ p_cur += curr_offset; /* offset to the next pixel */
+ if (r13 & 0xFF000700) /* this column need clipping */
+ {
+ p_cur -= 4;
+ for (i = 0; i < 4; i++)
+ {
+ p_ref = in2 + i;
+ p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */
+ p_tmp8 -= 24; /* compensate for the first offset */
+ p_cur -= outpitch; /* compensate for the first offset */
+ tmp = (uint32)(p_ref + pkres); /* limit */
+ while ((uint32)p_ref < tmp) /* the loop un-rolled */
+ {
+ r0 = *(p_ref - (inpitch << 1));
+ r1 = *(p_ref - inpitch);
+ r2 = *p_ref;
+ r3 = *(p_ref += inpitch); /* modify pointer before loading */
+ r4 = *(p_ref += inpitch);
+ /* first pixel */
+ r5 = *(p_ref += inpitch);
+ result = (r0 + r5);
+ r0 = (r1 + r4);
+ result -= (r0 * 5);//result -= r0; result -= (r0<<2);
+ r0 = (r2 + r3);
+ result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */
+ result = (result + tmp_result + 1); /* no clip */
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* second pixel */
+ r0 = *(p_ref += inpitch);
+ result = (r1 + r0);
+ r1 = (r2 + r5);
+ result -= (r1 * 5);//result -= r1; result -= (r1<<2);
+ r1 = (r3 + r4);
+ result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ tmp_result = *(p_tmp8 += 24); /* intermediate result */
+ result = (result + tmp_result + 1); /* no clip */
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* third pixel */
+ r1 = *(p_ref += inpitch);
+ result = (r2 + r1);
+ r2 = (r3 + r0);
+ result -= (r2 * 5);//result -= r2; result -= (r2<<2);
+ r2 = (r4 + r5);
+ result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ tmp_result = *(p_tmp8 += 24); /* intermediate result */
+ result = (result + tmp_result + 1); /* no clip */
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ /* fourth pixel */
+ r2 = *(p_ref += inpitch);
+ result = (r3 + r2);
+ r3 = (r4 + r1);
+ result -= (r3 * 5);//result -= r3; result -= (r3<<2);
+ r3 = (r5 + r0);
+ result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
+ result = (result + 16) >> 5;
+ CLIP_RESULT(result)
+ tmp_result = *(p_tmp8 += 24); /* intermediate result */
+ result = (result + tmp_result + 1); /* no clip */
+ result = (result >> 1);
+ *(p_cur += outpitch) = result;
+ p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
+ }
+ p_cur += (curr_offset - 3);
+ }
+ }
+ }
+
+ return ;
+}
+
+/* position G */
+void eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch,
+ int blkwidth, int blkheight)
+{
+ int i, j;
+ int offset_in = inpitch - blkwidth;
+ int offset_out = outpitch - blkwidth;
+ uint32 temp;
+ uint8 byte;
+
+ if (((uint32)in)&3)
+ {
+ for (j = blkheight; j > 0; j--)
+ {
+ for (i = blkwidth; i > 0; i -= 4)
+ {
+ temp = *in++;
+ byte = *in++;
+ temp |= (byte << 8);
+ byte = *in++;
+ temp |= (byte << 16);
+ byte = *in++;
+ temp |= (byte << 24);
+
+ *((uint32*)out) = temp; /* write 4 bytes */
+ out += 4;
+ }
+ out += offset_out;
+ in += offset_in;
+ }
+ }
+ else
+ {
+ for (j = blkheight; j > 0; j--)
+ {
+ for (i = blkwidth; i > 0; i -= 4)
+ {
+ temp = *((uint32*)in);
+ *((uint32*)out) = temp;
+ in += 4;
+ out += 4;
+ }
+ out += offset_out;
+ in += offset_in;
+ }
+ }
+ return ;
+}
+
+void ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos)
+{
+ int pad_height;
+ int pad_width;
+ uint8 *start;
+ uint32 word1, word2, word3;
+ int offset, j;
+
+
+ pad_height = 8 + ((y_pos & 7) ? 1 : 0);
+ pad_width = 8 + ((x_pos & 7) ? 1 : 0);
+
+ y_pos >>= 3;
+ x_pos >>= 3;
+ // pad vertical first
+ if (y_pos < 0) // need to pad up
+ {
+ if (x_pos < -8) start = ref - 8;
+ else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width;
+ else start = ref + x_pos;
+
+ /* word-align start */
+ offset = (uint32)start & 0x3;
+ if (offset) start -= offset;
+
+ word1 = *((uint32*)start);
+ word2 = *((uint32*)(start + 4));
+ word3 = *((uint32*)(start + 8));
+
+ /* pad up N rows */
+ j = -y_pos;
+ if (j > 8) j = 8;
+ while (j--)
+ {
+ *((uint32*)(start -= picpitch)) = word1;
+ *((uint32*)(start + 4)) = word2;
+ *((uint32*)(start + 8)) = word3;
+ }
+
+ }
+ else if (y_pos + pad_height >= picheight) /* pad down */
+ {
+ if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8;
+ else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) +
+ picwidth + 7 - pad_width;
+ else start = ref + picpitch * (picheight - 1) + x_pos;
+
+ /* word-align start */
+ offset = (uint32)start & 0x3;
+ if (offset) start -= offset;
+
+ word1 = *((uint32*)start);
+ word2 = *((uint32*)(start + 4));
+ word3 = *((uint32*)(start + 8));
+
+ /* pad down N rows */
+ j = y_pos + pad_height - picheight;
+ if (j > 8) j = 8;
+ while (j--)
+ {
+ *((uint32*)(start += picpitch)) = word1;
+ *((uint32*)(start + 4)) = word2;
+ *((uint32*)(start + 8)) = word3;
+ }
+ }
+
+ /* now pad horizontal */
+ if (x_pos < 0) // pad left
+ {
+ if (y_pos < -8) start = ref - (picpitch << 3);
+ else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch;
+ else start = ref + y_pos * picpitch;
+
+ // now pad left 8 pixels for pad_height rows */
+ j = pad_height;
+ start -= picpitch;
+ while (j--)
+ {
+ word1 = *(start += picpitch);
+ word1 |= (word1 << 8);
+ word1 |= (word1 << 16);
+ *((uint32*)(start - 8)) = word1;
+ *((uint32*)(start - 4)) = word1;
+ }
+ }
+ else if (x_pos + pad_width >= picwidth) /* pad right */
+ {
+ if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1;
+ else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1;
+ else start = ref + y_pos * picpitch + picwidth - 1;
+
+ // now pad right 8 pixels for pad_height rows */
+ j = pad_height;
+ start -= picpitch;
+ while (j--)
+ {
+ word1 = *(start += picpitch);
+ word1 |= (word1 << 8);
+ word1 |= (word1 << 16);
+ *((uint32*)(start + 1)) = word1;
+ *((uint32*)(start + 5)) = word1;
+ }
+ }
+
+ return ;
+}
+
+
+void eChromaMotionComp(uint8 *ref, int picwidth, int picheight,
+ int x_pos, int y_pos,
+ uint8 *pred, int picpitch,
+ int blkwidth, int blkheight)
+{
+ int dx, dy;
+ int offset_dx, offset_dy;
+ int index;
+
+ ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos);
+
+ dx = x_pos & 7;
+ dy = y_pos & 7;
+ offset_dx = (dx + 7) >> 3;
+ offset_dy = (dy + 7) >> 3;
+ x_pos = x_pos >> 3; /* round it to full-pel resolution */
+ y_pos = y_pos >> 3;
+
+ ref += y_pos * picpitch + x_pos;
+
+ index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7);
+
+ (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight);
+ return ;
+}
+
+
+/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */
+void eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ int32 r0, r1, r2, r3, result0, result1;
+ uint8 temp[288];
+ uint8 *ref, *out;
+ int i, j;
+ int dx_8 = 8 - dx;
+ int dy_8 = 8 - dy;
+
+ /* horizontal first */
+ out = temp;
+ for (i = 0; i < blkheight + 1; i++)
+ {
+ ref = pRef;
+ r0 = ref[0];
+ for (j = 0; j < blkwidth; j += 4)
+ {
+ r0 |= (ref[2] << 16);
+ result0 = dx_8 * r0;
+
+ r1 = ref[1] | (ref[3] << 16);
+ result0 += dx * r1;
+ *(int32 *)out = result0;
+
+ result0 = dx_8 * r1;
+
+ r2 = ref[4];
+ r0 = r0 >> 16;
+ r1 = r0 | (r2 << 16);
+ result0 += dx * r1;
+ *(int32 *)(out + 16) = result0;
+
+ ref += 4;
+ out += 4;
+ r0 = r2;
+ }
+ pRef += srcPitch;
+ out += (32 - blkwidth);
+ }
+
+// pRef -= srcPitch*(blkheight+1);
+ ref = temp;
+
+ for (j = 0; j < blkwidth; j += 4)
+ {
+ r0 = *(int32 *)ref;
+ r1 = *(int32 *)(ref + 16);
+ ref += 32;
+ out = pOut;
+ for (i = 0; i < (blkheight >> 1); i++)
+ {
+ result0 = dy_8 * r0 + 0x00200020;
+ r2 = *(int32 *)ref;
+ result0 += dy * r2;
+ result0 >>= 6;
+ result0 &= 0x00FF00FF;
+ r0 = r2;
+
+ result1 = dy_8 * r1 + 0x00200020;
+ r3 = *(int32 *)(ref + 16);
+ result1 += dy * r3;
+ result1 >>= 6;
+ result1 &= 0x00FF00FF;
+ r1 = r3;
+ *(int32 *)out = result0 | (result1 << 8);
+ out += predPitch;
+ ref += 32;
+
+ result0 = dy_8 * r0 + 0x00200020;
+ r2 = *(int32 *)ref;
+ result0 += dy * r2;
+ result0 >>= 6;
+ result0 &= 0x00FF00FF;
+ r0 = r2;
+
+ result1 = dy_8 * r1 + 0x00200020;
+ r3 = *(int32 *)(ref + 16);
+ result1 += dy * r3;
+ result1 >>= 6;
+ result1 &= 0x00FF00FF;
+ r1 = r3;
+ *(int32 *)out = result0 | (result1 << 8);
+ out += predPitch;
+ ref += 32;
+ }
+ pOut += 4;
+ ref = temp + 4; /* since it can only iterate twice max */
+ }
+ return;
+}
+
+void eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(dy);
+
+ int32 r0, r1, r2, result0, result1;
+ uint8 *ref, *out;
+ int i, j;
+ int dx_8 = 8 - dx;
+
+ /* horizontal first */
+ for (i = 0; i < blkheight; i++)
+ {
+ ref = pRef;
+ out = pOut;
+
+ r0 = ref[0];
+ for (j = 0; j < blkwidth; j += 4)
+ {
+ r0 |= (ref[2] << 16);
+ result0 = dx_8 * r0 + 0x00040004;
+
+ r1 = ref[1] | (ref[3] << 16);
+ result0 += dx * r1;
+ result0 >>= 3;
+ result0 &= 0x00FF00FF;
+
+ result1 = dx_8 * r1 + 0x00040004;
+
+ r2 = ref[4];
+ r0 = r0 >> 16;
+ r1 = r0 | (r2 << 16);
+ result1 += dx * r1;
+ result1 >>= 3;
+ result1 &= 0x00FF00FF;
+
+ *(int32 *)out = result0 | (result1 << 8);
+
+ ref += 4;
+ out += 4;
+ r0 = r2;
+ }
+
+ pRef += srcPitch;
+ pOut += predPitch;
+ }
+ return;
+}
+
+void eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(dx);
+
+ int32 r0, r1, r2, r3, result0, result1;
+ int i, j;
+ uint8 *ref, *out;
+ int dy_8 = 8 - dy;
+ /* vertical first */
+ for (i = 0; i < blkwidth; i += 4)
+ {
+ ref = pRef;
+ out = pOut;
+
+ r0 = ref[0] | (ref[2] << 16);
+ r1 = ref[1] | (ref[3] << 16);
+ ref += srcPitch;
+ for (j = 0; j < blkheight; j++)
+ {
+ result0 = dy_8 * r0 + 0x00040004;
+ r2 = ref[0] | (ref[2] << 16);
+ result0 += dy * r2;
+ result0 >>= 3;
+ result0 &= 0x00FF00FF;
+ r0 = r2;
+
+ result1 = dy_8 * r1 + 0x00040004;
+ r3 = ref[1] | (ref[3] << 16);
+ result1 += dy * r3;
+ result1 >>= 3;
+ result1 &= 0x00FF00FF;
+ r1 = r3;
+ *(int32 *)out = result0 | (result1 << 8);
+ ref += srcPitch;
+ out += predPitch;
+ }
+ pOut += 4;
+ pRef += 4;
+ }
+ return;
+}
+
+void eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(blkwidth);
+
+ int32 r0, r1, temp0, temp1, result;
+ int32 temp[9];
+ int32 *out;
+ int i, r_temp;
+ int dy_8 = 8 - dy;
+
+ /* horizontal first */
+ out = temp;
+ for (i = 0; i < blkheight + 1; i++)
+ {
+ r_temp = pRef[1];
+ temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]);
+ temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp);
+ r0 = temp0 | (temp1 << 16);
+ *out++ = r0;
+ pRef += srcPitch;
+ }
+
+ pRef -= srcPitch * (blkheight + 1);
+
+ out = temp;
+
+ r0 = *out++;
+
+ for (i = 0; i < blkheight; i++)
+ {
+ result = dy_8 * r0 + 0x00200020;
+ r1 = *out++;
+ result += dy * r1;
+ result >>= 6;
+ result &= 0x00FF00FF;
+ *(int16 *)pOut = (result >> 8) | (result & 0xFF);
+ r0 = r1;
+ pOut += predPitch;
+ }
+ return;
+}
+
+void eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(dy);
+ (void)(blkwidth);
+
+ int i, temp, temp0, temp1;
+
+ /* horizontal first */
+ for (i = 0; i < blkheight; i++)
+ {
+ temp = pRef[1];
+ temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3;
+ temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3;
+
+ *(int16 *)pOut = temp0 | (temp1 << 8);
+ pRef += srcPitch;
+ pOut += predPitch;
+
+ }
+ return;
+}
+void eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(dx);
+ (void)(blkwidth);
+
+ int32 r0, r1, result;
+ int i;
+ int dy_8 = 8 - dy;
+ r0 = pRef[0] | (pRef[1] << 16);
+ pRef += srcPitch;
+ for (i = 0; i < blkheight; i++)
+ {
+ result = dy_8 * r0 + 0x00040004;
+ r1 = pRef[0] | (pRef[1] << 16);
+ result += dy * r1;
+ result >>= 3;
+ result &= 0x00FF00FF;
+ *(int16 *)pOut = (result >> 8) | (result & 0xFF);
+ r0 = r1;
+ pRef += srcPitch;
+ pOut += predPitch;
+ }
+ return;
+}
+
+void eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
+ uint8 *pOut, int predPitch, int blkwidth, int blkheight)
+{
+ (void)(dx);
+ (void)(dy);
+
+ int i, j;
+ int offset_in = srcPitch - blkwidth;
+ int offset_out = predPitch - blkwidth;
+ uint16 temp;
+ uint8 byte;
+
+ if (((uint32)pRef)&1)
+ {
+ for (j = blkheight; j > 0; j--)
+ {
+ for (i = blkwidth; i > 0; i -= 2)
+ {
+ temp = *pRef++;
+ byte = *pRef++;
+ temp |= (byte << 8);
+ *((uint16*)pOut) = temp; /* write 2 bytes */
+ pOut += 2;
+ }
+ pOut += offset_out;
+ pRef += offset_in;
+ }
+ }
+ else
+ {
+ for (j = blkheight; j > 0; j--)
+ {
+ for (i = blkwidth; i > 0; i -= 2)
+ {
+ temp = *((uint16*)pRef);
+ *((uint16*)pOut) = temp;
+ pRef += 2;
+ pOut += 2;
+ }
+ pOut += offset_out;
+ pRef += offset_in;
+ }
+ }
+ return ;
+}
diff --git a/media/libstagefright/codecs/avc/enc/src/motion_est.cpp b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp
new file mode 100644
index 0000000..f650ef9
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/motion_est.cpp
@@ -0,0 +1,1774 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+#define MIN_GOP 1 /* minimum size of GOP, 1/23/01, need to be tested */
+
+#define DEFAULT_REF_IDX 0 /* always from the first frame in the reflist */
+
+#define ALL_CAND_EQUAL 10 /* any number greater than 5 will work */
+
+
+/* from TMN 3.2 */
+#define PREF_NULL_VEC 129 /* zero vector bias */
+#define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/
+#define PREF_INTRA 3024//512 /* bias for INTRA coding */
+
+const static int tab_exclude[9][9] = // [last_loc][curr_loc]
+{
+ {0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 1, 1, 1, 0, 0},
+ {0, 0, 0, 0, 1, 1, 1, 1, 1},
+ {0, 0, 0, 0, 0, 0, 1, 1, 1},
+ {0, 1, 1, 0, 0, 0, 1, 1, 1},
+ {0, 1, 1, 0, 0, 0, 0, 0, 1},
+ {0, 1, 1, 1, 1, 0, 0, 0, 1},
+ {0, 0, 1, 1, 1, 0, 0, 0, 0},
+ {0, 0, 1, 1, 1, 1, 1, 0, 0}
+}; //to decide whether to continue or compute
+
+const static int refine_next[8][2] = /* [curr_k][increment] */
+{
+ {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
+};
+
+#ifdef _SAD_STAT
+uint32 num_MB = 0;
+uint32 num_cand = 0;
+#endif
+
+/************************************************************************/
+#define TH_INTER_2 100 /* temporary for now */
+
+//#define FIXED_INTERPRED_MODE AVC_P16
+#define FIXED_REF_IDX 0
+#define FIXED_MVX 0
+#define FIXED_MVY 0
+
+// only use when AVC_P8 or AVC_P8ref0
+#define FIXED_SUBMB_MODE AVC_4x4
+/*************************************************************************/
+
+/* Initialize arrays necessary for motion search */
+AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ int search_range = rateCtrl->mvRange;
+ int number_of_subpel_positions = 4 * (2 * search_range + 3);
+ int max_mv_bits, max_mvd;
+ int temp_bits = 0;
+ uint8 *mvbits;
+ int bits, imax, imin, i;
+ uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
+
+
+ while (number_of_subpel_positions > 0)
+ {
+ temp_bits++;
+ number_of_subpel_positions >>= 1;
+ }
+
+ max_mv_bits = 3 + 2 * temp_bits;
+ max_mvd = (1 << (max_mv_bits >> 1)) - 1;
+
+ encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+ sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
+
+ if (encvid->mvbits_array == NULL)
+ {
+ return AVCENC_MEMORY_FAIL;
+ }
+
+ mvbits = encvid->mvbits = encvid->mvbits_array + max_mvd;
+
+ mvbits[0] = 1;
+ for (bits = 3; bits <= max_mv_bits; bits += 2)
+ {
+ imax = 1 << (bits >> 1);
+ imin = imax >> 1;
+
+ for (i = imin; i < imax; i++) mvbits[-i] = mvbits[i] = bits;
+ }
+
+ /* initialize half-pel search */
+ encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
+ encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
+ encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+ encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+ encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+ encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+
+ /* For quarter-pel interpolation around best half-pel result */
+
+ encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
+
+
+ encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
+ encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+
+ encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
+ encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+
+ encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
+ encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
+ encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+ encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
+ encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+ encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+ encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+ encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
+ encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
+
+ encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
+ encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
+ encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+
+ encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
+ encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
+
+ encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
+ encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
+ encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
+
+
+ return AVCENC_SUCCESS;
+}
+
+/* Clean-up memory */
+void CleanMotionSearchModule(AVCHandle *avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+
+ if (encvid->mvbits_array)
+ {
+ avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array));
+ encvid->mvbits = NULL;
+ }
+
+ return ;
+}
+
+
+bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
+{
+ int j;
+ uint8 *out;
+ int temp, SBE;
+ OsclFloat ABE;
+ bool intra = true;
+
+ SBE = 0;
+ /* top neighbor */
+ out = cur - pitch;
+ for (j = 0; j < 16; j++)
+ {
+ temp = out[j] - cur[j];
+ SBE += ((temp >= 0) ? temp : -temp);
+ }
+
+ /* left neighbor */
+ out = cur - 1;
+ out -= pitch;
+ cur -= pitch;
+ for (j = 0; j < 16; j++)
+ {
+ temp = *(out += pitch) - *(cur += pitch);
+ SBE += ((temp >= 0) ? temp : -temp);
+ }
+
+ /* compare mincost/384 and SBE/64 */
+ ABE = SBE / 32.0; //ABE = SBE/64.0; //
+ if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
+ {
+ intra = false; // no possibility of intra, just use inter
+ }
+ else
+ {
+ if (ave == true)
+ {
+ *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
+ }
+ else
+ {
+ *min_cost = (int)(SBE * 8);
+ }
+ }
+
+ return intra;
+}
+
+/******* main function for macroblock prediction for the entire frame ***/
+/* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
+void AVCMotionEstimation(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ int slice_type = video->slice_type;
+ AVCFrameIO *currInput = encvid->currInput;
+ AVCPictureData *refPic = video->RefPicList0[0];
+ int i, j, k;
+ int mbwidth = video->PicWidthInMbs;
+ int mbheight = video->PicHeightInMbs;
+ int totalMB = video->PicSizeInMbs;
+ int pitch = currInput->pitch;
+ AVCMacroblock *currMB, *mblock = video->mblock;
+ AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
+ // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ uint8 *intraSearch = encvid->intraSearch;
+ uint FS_en = encvid->fullsearch_enable;
+
+ int NumIntraSearch, start_i, numLoop, incr_i;
+ int mbnum, offset;
+ uint8 *cur, *best_cand[5];
+ int totalSAD = 0; /* average SAD for rate control */
+ int type_pred;
+ int abe_cost;
+
+#ifdef HTFM
+ /***** HYPOTHESIS TESTING ********/ /* 2/28/01 */
+ int collect = 0;
+ HTFM_Stat htfm_stat;
+ double newvar[16];
+ double exp_lamda[15];
+ /*********************************/
+#endif
+ int hp_guess = 0;
+ uint32 mv_uint32;
+
+ offset = 0;
+
+ if (slice_type == AVC_I_SLICE)
+ {
+ /* cannot do I16 prediction here because it needs full decoding. */
+ for (i = 0; i < totalMB; i++)
+ {
+ encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */
+ }
+
+ memset(intraSearch, 1, sizeof(uint8)*totalMB);
+
+ encvid->firstIntraRefreshMBIndx = 0; /* reset this */
+
+ return ;
+ }
+ else // P_SLICE
+ {
+ for (i = 0; i < totalMB; i++)
+ {
+ mblock[i].mb_intra = 0;
+ }
+ memset(intraSearch, 1, sizeof(uint8)*totalMB);
+ }
+
+ if (refPic->padded == 0)
+ {
+ AVCPaddingEdge(refPic);
+ refPic->padded = 1;
+ }
+ /* Random INTRA update */
+ if (rateCtrl->intraMBRate)
+ {
+ AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
+ }
+
+ encvid->sad_extra_info = NULL;
+#ifdef HTFM
+ /***** HYPOTHESIS TESTING ********/
+ InitHTFM(video, &htfm_stat, newvar, &collect);
+ /*********************************/
+#endif
+
+ if ((rateCtrl->scdEnable == 1)
+ && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
+ /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
+ {
+ incr_i = 2;
+ numLoop = 2;
+ start_i = 1;
+ type_pred = 0; /* for initial candidate selection */
+ }
+ else
+ {
+ incr_i = 1;
+ numLoop = 1;
+ start_i = 0;
+ type_pred = 2;
+ }
+
+ /* First pass, loop thru half the macroblock */
+ /* determine scene change */
+ /* Second pass, for the rest of macroblocks */
+ NumIntraSearch = 0; // to be intra searched in the encoding loop.
+ while (numLoop--)
+ {
+ for (j = 0; j < mbheight; j++)
+ {
+ if (incr_i > 1)
+ start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
+
+ offset = pitch * (j << 4) + (start_i << 4);
+
+ mbnum = j * mbwidth + start_i;
+
+ for (i = start_i; i < mbwidth; i += incr_i)
+ {
+ video->mbNum = mbnum;
+ video->currMB = currMB = mblock + mbnum;
+ mot_mb_16x16 = mot16x16 + mbnum;
+
+ cur = currInput->YCbCr[0] + offset;
+
+ if (currMB->mb_intra == 0) /* for INTER mode */
+ {
+#if defined(HTFM)
+ HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
+#else
+ AVCPrepareCurMB(encvid, cur, pitch);
+#endif
+ /************************************************************/
+ /******** full-pel 1MV search **********************/
+
+ AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
+ FS_en, &hp_guess);
+
+ abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
+
+ /* set mbMode and MVs */
+ currMB->mbMode = AVC_P16;
+ currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
+ mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
+ for (k = 0; k < 32; k += 2)
+ {
+ currMB->mvL0[k>>1] = mv_uint32;
+ }
+
+ /* make a decision whether it should be tested for intra or not */
+ if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
+ {
+ if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
+ {
+ intraSearch[mbnum] = 0;
+ }
+ else
+ {
+ NumIntraSearch++;
+ rateCtrl->MADofMB[mbnum] = abe_cost;
+ }
+ }
+ else // boundary MBs, always do intra search
+ {
+ NumIntraSearch++;
+ }
+
+ totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
+ }
+ else /* INTRA update, use for prediction */
+ {
+ mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
+
+ /* reset all other MVs to zero */
+ /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
+ abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF; /* max value for int */
+
+ if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
+ {
+ IntraDecisionABE(&abe_cost, cur, pitch, false);
+
+ rateCtrl->MADofMB[mbnum] = abe_cost;
+ totalSAD += abe_cost;
+ }
+
+ NumIntraSearch++ ;
+ /* cannot do I16 prediction here because it needs full decoding. */
+ // intraSearch[mbnum] = 1;
+
+ }
+
+ mbnum += incr_i;
+ offset += (incr_i << 4);
+
+ } /* for i */
+ } /* for j */
+
+ /* since we cannot do intra/inter decision here, the SCD has to be
+ based on other criteria such as motion vectors coherency or the SAD */
+ if (incr_i > 1 && numLoop) /* scene change on and first loop */
+ {
+ //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
+ if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
+ /* need to do more investigation about this threshold since the NumIntraSearch
+ only show potential intra MBs, not the actual one */
+ {
+ /* we can choose to just encode I_SLICE without IDR */
+ //video->nal_unit_type = AVC_NALTYPE_IDR;
+ video->nal_unit_type = AVC_NALTYPE_SLICE;
+ video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
+ video->slice_type = AVC_I_SLICE;
+ memset(intraSearch, 1, sizeof(uint8)*totalMB);
+ i = totalMB;
+ while (i--)
+ {
+ mblock[i].mb_intra = 1;
+ encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */
+ }
+
+ rateCtrl->totalSAD = totalSAD * 2; /* SAD */
+
+ return ;
+ }
+ }
+ /******** no scene change, continue motion search **********************/
+ start_i = 0;
+ type_pred++; /* second pass */
+ }
+
+ rateCtrl->totalSAD = totalSAD; /* SAD */
+
+#ifdef HTFM
+ /***** HYPOTHESIS TESTING ********/
+ if (collect)
+ {
+ collect = 0;
+ UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
+ }
+ /*********************************/
+#endif
+
+ return ;
+}
+
+/*=====================================================================
+ Function: PaddingEdge
+ Date: 09/16/2000
+ Purpose: Pad edge of a Vop
+=====================================================================*/
+
+void AVCPaddingEdge(AVCPictureData *refPic)
+{
+ uint8 *src, *dst;
+ int i;
+ int pitch, width, height;
+ uint32 temp1, temp2;
+
+ width = refPic->width;
+ height = refPic->height;
+ pitch = refPic->pitch;
+
+ /* pad top */
+ src = refPic->Sl;
+
+ temp1 = *src; /* top-left corner */
+ temp2 = src[width-1]; /* top-right corner */
+ temp1 |= (temp1 << 8);
+ temp1 |= (temp1 << 16);
+ temp2 |= (temp2 << 8);
+ temp2 |= (temp2 << 16);
+
+ dst = src - (pitch << 4);
+
+ *((uint32*)(dst - 16)) = temp1;
+ *((uint32*)(dst - 12)) = temp1;
+ *((uint32*)(dst - 8)) = temp1;
+ *((uint32*)(dst - 4)) = temp1;
+
+ memcpy(dst, src, width);
+
+ *((uint32*)(dst += width)) = temp2;
+ *((uint32*)(dst + 4)) = temp2;
+ *((uint32*)(dst + 8)) = temp2;
+ *((uint32*)(dst + 12)) = temp2;
+
+ dst = dst - width - 16;
+
+ i = 15;
+ while (i--)
+ {
+ memcpy(dst + pitch, dst, pitch);
+ dst += pitch;
+ }
+
+ /* pad sides */
+ dst += (pitch + 16);
+ src = dst;
+ i = height;
+ while (i--)
+ {
+ temp1 = *src;
+ temp2 = src[width-1];
+ temp1 |= (temp1 << 8);
+ temp1 |= (temp1 << 16);
+ temp2 |= (temp2 << 8);
+ temp2 |= (temp2 << 16);
+
+ *((uint32*)(dst - 16)) = temp1;
+ *((uint32*)(dst - 12)) = temp1;
+ *((uint32*)(dst - 8)) = temp1;
+ *((uint32*)(dst - 4)) = temp1;
+
+ *((uint32*)(dst += width)) = temp2;
+ *((uint32*)(dst + 4)) = temp2;
+ *((uint32*)(dst + 8)) = temp2;
+ *((uint32*)(dst + 12)) = temp2;
+
+ src += pitch;
+ dst = src;
+ }
+
+ /* pad bottom */
+ dst -= 16;
+ i = 16;
+ while (i--)
+ {
+ memcpy(dst, dst - pitch, pitch);
+ dst += pitch;
+ }
+
+
+ return ;
+}
+
+/*===========================================================================
+ Function: AVCRasterIntraUpdate
+ Date: 2/26/01
+ Purpose: To raster-scan assign INTRA-update .
+ N macroblocks are updated (also was programmable).
+===========================================================================*/
+void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
+{
+ int indx, i;
+
+ indx = encvid->firstIntraRefreshMBIndx;
+ for (i = 0; i < numRefresh && indx < totalMB; i++)
+ {
+ (mblock + indx)->mb_intra = 1;
+ encvid->intraSearch[indx++] = 1;
+ }
+
+ /* if read the end of frame, reset and loop around */
+ if (indx >= totalMB - 1)
+ {
+ indx = 0;
+ while (i < numRefresh && indx < totalMB)
+ {
+ (mblock + indx)->mb_intra = 1;
+ encvid->intraSearch[indx++] = 1;
+ i++;
+ }
+ }
+
+ encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
+
+ return ;
+}
+
+
+#ifdef HTFM
+void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
+{
+ AVCCommonObj *video = encvid->common;
+ int i;
+ int lx = video->currPic->width; // padding
+ int lx2 = lx << 1;
+ int lx3 = lx2 + lx;
+ int rx = video->currPic->pitch;
+ int rx2 = rx << 1;
+ int rx3 = rx2 + rx;
+
+ int *offset, *offset2;
+
+ /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
+ if (((int)video->sliceHdr->frame_num) % 30 == 1)
+ {
+
+ *collect = 1;
+
+ htfm_stat->countbreak = 0;
+ htfm_stat->abs_dif_mad_avg = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ newvar[i] = 0.0;
+ }
+// encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
+ encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
+ encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+ encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
+ encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
+ encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
+ encvid->sad_extra_info = (void*)(htfm_stat);
+ offset = htfm_stat->offsetArray;
+ offset2 = htfm_stat->offsetRef;
+ }
+ else
+ {
+// encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
+ encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
+ encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
+ encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
+ encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
+ encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
+ encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
+ offset = encvid->nrmlz_th + 16;
+ offset2 = encvid->nrmlz_th + 32;
+ }
+
+ offset[0] = 0;
+ offset[1] = lx2 + 2;
+ offset[2] = 2;
+ offset[3] = lx2;
+ offset[4] = lx + 1;
+ offset[5] = lx3 + 3;
+ offset[6] = lx + 3;
+ offset[7] = lx3 + 1;
+ offset[8] = lx;
+ offset[9] = lx3 + 2;
+ offset[10] = lx3 ;
+ offset[11] = lx + 2 ;
+ offset[12] = 1;
+ offset[13] = lx2 + 3;
+ offset[14] = lx2 + 1;
+ offset[15] = 3;
+
+ offset2[0] = 0;
+ offset2[1] = rx2 + 2;
+ offset2[2] = 2;
+ offset2[3] = rx2;
+ offset2[4] = rx + 1;
+ offset2[5] = rx3 + 3;
+ offset2[6] = rx + 3;
+ offset2[7] = rx3 + 1;
+ offset2[8] = rx;
+ offset2[9] = rx3 + 2;
+ offset2[10] = rx3 ;
+ offset2[11] = rx + 2 ;
+ offset2[12] = 1;
+ offset2[13] = rx2 + 3;
+ offset2[14] = rx2 + 1;
+ offset2[15] = 3;
+
+ return ;
+}
+
+void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
+{
+ if (htfm_stat->countbreak == 0)
+ htfm_stat->countbreak = 1;
+
+ newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
+
+ if (newvar[0] < 0.001)
+ {
+ newvar[0] = 0.001; /* to prevent floating overflow */
+ }
+ exp_lamda[0] = 1 / (newvar[0] * 1.4142136);
+ exp_lamda[1] = exp_lamda[0] * 1.5825;
+ exp_lamda[2] = exp_lamda[0] * 2.1750;
+ exp_lamda[3] = exp_lamda[0] * 3.5065;
+ exp_lamda[4] = exp_lamda[0] * 3.1436;
+ exp_lamda[5] = exp_lamda[0] * 3.5315;
+ exp_lamda[6] = exp_lamda[0] * 3.7449;
+ exp_lamda[7] = exp_lamda[0] * 4.5854;
+ exp_lamda[8] = exp_lamda[0] * 4.6191;
+ exp_lamda[9] = exp_lamda[0] * 5.4041;
+ exp_lamda[10] = exp_lamda[0] * 6.5974;
+ exp_lamda[11] = exp_lamda[0] * 10.5341;
+ exp_lamda[12] = exp_lamda[0] * 10.0719;
+ exp_lamda[13] = exp_lamda[0] * 12.0516;
+ exp_lamda[14] = exp_lamda[0] * 15.4552;
+
+ CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
+ return ;
+}
+
+
+void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
+{
+ int i;
+ double temp[15];
+ // printf("\nLamda: ");
+
+ /* parametric PREMODELling */
+ for (i = 0; i < 15; i++)
+ {
+ // printf("%g ",exp_lamda[i]);
+ if (pf < 0.5)
+ temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
+ else
+ temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
+ }
+
+ nrmlz_th[15] = 0;
+ for (i = 0; i < 15; i++) /* scale upto no.pixels */
+ nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
+
+ return ;
+}
+
+void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
+{
+ AVCCommonObj *video = encvid->common;
+ uint32 *htfmMB = (uint32*)(encvid->currYMB);
+ uint8 *ptr, byte;
+ int *offset;
+ int i;
+ uint32 word;
+
+ if (((int)video->sliceHdr->frame_num) % 30 == 1)
+ {
+ offset = htfm_stat->offsetArray;
+ }
+ else
+ {
+ offset = encvid->nrmlz_th + 16;
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ ptr = cur + offset[i];
+ word = ptr[0];
+ byte = ptr[4];
+ word |= (byte << 8);
+ byte = ptr[8];
+ word |= (byte << 16);
+ byte = ptr[12];
+ word |= (byte << 24);
+ *htfmMB++ = word;
+
+ word = *(ptr += (pitch << 2));
+ byte = ptr[4];
+ word |= (byte << 8);
+ byte = ptr[8];
+ word |= (byte << 16);
+ byte = ptr[12];
+ word |= (byte << 24);
+ *htfmMB++ = word;
+
+ word = *(ptr += (pitch << 2));
+ byte = ptr[4];
+ word |= (byte << 8);
+ byte = ptr[8];
+ word |= (byte << 16);
+ byte = ptr[12];
+ word |= (byte << 24);
+ *htfmMB++ = word;
+
+ word = *(ptr += (pitch << 2));
+ byte = ptr[4];
+ word |= (byte << 8);
+ byte = ptr[8];
+ word |= (byte << 16);
+ byte = ptr[12];
+ word |= (byte << 24);
+ *htfmMB++ = word;
+ }
+
+ return ;
+}
+
+
+#endif // HTFM
+
+void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
+{
+ void* tmp = (void*)(encvid->currYMB);
+ uint32 *currYMB = (uint32*) tmp;
+ int i;
+
+ cur -= pitch;
+
+ for (i = 0; i < 16; i++)
+ {
+ *currYMB++ = *((uint32*)(cur += pitch));
+ *currYMB++ = *((uint32*)(cur + 4));
+ *currYMB++ = *((uint32*)(cur + 8));
+ *currYMB++ = *((uint32*)(cur + 12));
+ }
+
+ return ;
+}
+
+#ifdef FIXED_INTERPRED_MODE
+
+/* due to the complexity of the predicted motion vector, we may not decide to skip
+a macroblock here just yet. */
+/* We will find the best motion vector and the best intra prediction mode for each block. */
+/* output are
+ currMB->NumMbPart, currMB->MbPartWidth, currMB->MbPartHeight,
+ currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
+ currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
+ currMB->RefIdx[], currMB->ref_idx_L0[],
+ currMB->mvL0[], currMB->mvL1[]
+ */
+
+AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
+ int num_pass)
+{
+ AVCCommonObj *video = encvid->common;
+ int mbPartIdx, subMbPartIdx;
+ int16 *mv;
+ int i;
+ int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
+
+ /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
+
+ currMB->mbMode = FIXED_INTERPRED_MODE;
+ currMB->mb_intra = 0;
+
+ if (currMB->mbMode == AVC_P16)
+ {
+ currMB->NumMbPart = 1;
+ currMB->MbPartWidth = 16;
+ currMB->MbPartHeight = 16;
+ currMB->SubMbPartHeight[0] = 16;
+ currMB->SubMbPartWidth[0] = 16;
+ currMB->NumSubMbPart[0] = 1;
+ }
+ else if (currMB->mbMode == AVC_P16x8)
+ {
+ currMB->NumMbPart = 2;
+ currMB->MbPartWidth = 16;
+ currMB->MbPartHeight = 8;
+ for (i = 0; i < 2; i++)
+ {
+ currMB->SubMbPartWidth[i] = 16;
+ currMB->SubMbPartHeight[i] = 8;
+ currMB->NumSubMbPart[i] = 1;
+ }
+ }
+ else if (currMB->mbMode == AVC_P8x16)
+ {
+ currMB->NumMbPart = 2;
+ currMB->MbPartWidth = 8;
+ currMB->MbPartHeight = 16;
+ for (i = 0; i < 2; i++)
+ {
+ currMB->SubMbPartWidth[i] = 8;
+ currMB->SubMbPartHeight[i] = 16;
+ currMB->NumSubMbPart[i] = 1;
+ }
+ }
+ else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
+ {
+ currMB->NumMbPart = 4;
+ currMB->MbPartWidth = 8;
+ currMB->MbPartHeight = 8;
+ if (FIXED_SUBMB_MODE == AVC_8x8)
+ {
+ SubMbPartHeight = 8;
+ SubMbPartWidth = 8;
+ NumSubMbPart = 1;
+ }
+ else if (FIXED_SUBMB_MODE == AVC_8x4)
+ {
+ SubMbPartHeight = 4;
+ SubMbPartWidth = 8;
+ NumSubMbPart = 2;
+ }
+ else if (FIXED_SUBMB_MODE == AVC_4x8)
+ {
+ SubMbPartHeight = 8;
+ SubMbPartWidth = 4;
+ NumSubMbPart = 2;
+ }
+ else if (FIXED_SUBMB_MODE == AVC_4x4)
+ {
+ SubMbPartHeight = 4;
+ SubMbPartWidth = 4;
+ NumSubMbPart = 4;
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ currMB->subMbMode[i] = FIXED_SUBMB_MODE;
+ currMB->SubMbPartHeight[i] = SubMbPartHeight;
+ currMB->SubMbPartWidth[i] = SubMbPartWidth;
+ currMB->NumSubMbPart[i] = NumSubMbPart;
+ }
+ }
+ else /* it's probably intra mode */
+ {
+ return AVCENC_SUCCESS;
+ }
+
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ currMB->MBPartPredMode[mbPartIdx][0] = AVC_Pred_L0;
+ currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
+ currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
+
+ for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
+ {
+ mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
+
+ *mv++ = FIXED_MVX;
+ *mv = FIXED_MVY;
+ }
+ }
+
+ encvid->min_cost = 0;
+
+ return AVCENC_SUCCESS;
+}
+
+#else /* perform the search */
+
+/* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
+ The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
+ partition. At each level, a decision can be made to stop the search if the expected
+ prediction gain is not worth the computation. The decision can also be made at the finest
+ level for more fullsearch-like behavior with the price of heavier computation. */
+void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
+ int i0, int j0, int type_pred, int FS_en, int *hp_guess)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCPictureData *currPic = video->currPic;
+ AVCSeqParamSet *currSPS = video->currSeqParams;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCMacroblock *currMB = video->currMB;
+ uint8 *ref, *cand, *ncand;
+ void *extra_info = encvid->sad_extra_info;
+ int mbnum = video->mbNum;
+ int width = currPic->width; /* 6/12/01, must be multiple of 16 */
+ int height = currPic->height;
+ AVCMV *mot16x16 = encvid->mot16x16;
+ int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
+
+ int range = rateCtrl->mvRange;
+
+ int lx = currPic->pitch; /* padding */
+ int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
+ int d, dmin, dn[9];
+ int k;
+ int mvx[5], mvy[5];
+ int num_can, center_again;
+ int last_loc, new_loc = 0;
+ int step, max_step = range >> 1;
+ int next;
+
+ int cmvx, cmvy; /* estimated predicted MV */
+ int lev_idx;
+ int lambda_motion = encvid->lambda_motion;
+ uint8 *mvbits = encvid->mvbits;
+ int mvshift = 2;
+ int mvcost;
+
+ int min_sad = 65535;
+
+ ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
+
+ /* have to initialize these params, necessary for interprediction part */
+ currMB->NumMbPart = 1;
+ currMB->SubMbPartHeight[0] = 16;
+ currMB->SubMbPartWidth[0] = 16;
+ currMB->NumSubMbPart[0] = 1;
+ currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
+ currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
+ currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
+ currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
+ currMB->RefIdx[0] = currMB->RefIdx[1] =
+ currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
+
+ cur = encvid->currYMB; /* use smaller memory space for current MB */
+
+ /* find limit of the search (adjusting search range)*/
+ lev_idx = mapLev2Idx[currSPS->level_idc];
+
+ /* we can make this part dynamic based on previous statistics */
+ ilow = i0 - range;
+ if (i0 - ilow > 2047) /* clip to conform with the standard */
+ {
+ ilow = i0 - 2047;
+ }
+ if (ilow < -13) // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
+ {
+ ilow = -13;
+ }
+
+ ihigh = i0 + range - 1;
+ if (ihigh - i0 > 2047) /* clip to conform with the standard */
+ {
+ ihigh = i0 + 2047;
+ }
+ if (ihigh > width - 3)
+ {
+ ihigh = width - 3; // change from width-1 to width-3 for the same reason as above
+ }
+
+ jlow = j0 - range;
+ if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
+ {
+ jlow = j0 - MaxVmvR[lev_idx] + 1;
+ }
+ if (jlow < -13) // same reason as above
+ {
+ jlow = -13;
+ }
+
+ jhigh = j0 + range - 1;
+ if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
+ {
+ jhigh = j0 + MaxVmvR[lev_idx] - 1;
+ }
+ if (jhigh > height - 3) // same reason as above
+ {
+ jhigh = height - 3;
+ }
+
+ /* find initial motion vector & predicted MV*/
+ AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
+
+ imin = i0;
+ jmin = j0; /* needed for fullsearch */
+ ncand = ref + i0 + j0 * lx;
+
+ /* for first row of MB, fullsearch can be used */
+ if (FS_en)
+ {
+ *hp_guess = 0; /* no guess for fast half-pel */
+
+ dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
+
+ ncand = ref + imin + jmin * lx;
+ }
+ else
+ { /* fullsearch the top row to only upto (0,3) MB */
+ /* upto 30% complexity saving with the same complexity */
+ if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
+ {
+ *hp_guess = 0; /* no guess for fast half-pel */
+ dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
+ ncand = ref + imin + jmin * lx;
+ }
+ else
+ {
+ /************** initialize candidate **************************/
+
+ dmin = 65535;
+
+ /* check if all are equal */
+ if (num_can == ALL_CAND_EQUAL)
+ {
+ i = i0 + mvx[0];
+ j = j0 + mvy[0];
+
+ if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+ {
+ cand = ref + i + j * lx;
+
+ d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+ mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+ d += mvcost;
+
+ if (d < dmin)
+ {
+ dmin = d;
+ imin = i;
+ jmin = j;
+ ncand = cand;
+ min_sad = d - mvcost; // for rate control
+ }
+ }
+ }
+ else
+ {
+ /************** evaluate unique candidates **********************/
+ for (k = 0; k < num_can; k++)
+ {
+ i = i0 + mvx[k];
+ j = j0 + mvy[k];
+
+ if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+ {
+ cand = ref + i + j * lx;
+ d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+ mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+ d += mvcost;
+
+ if (d < dmin)
+ {
+ dmin = d;
+ imin = i;
+ jmin = j;
+ ncand = cand;
+ min_sad = d - mvcost; // for rate control
+ }
+ }
+ }
+ }
+
+ /******************* local refinement ***************************/
+ center_again = 0;
+ last_loc = new_loc = 0;
+ // ncand = ref + jmin*lx + imin; /* center of the search */
+ step = 0;
+ dn[0] = dmin;
+ while (!center_again && step <= max_step)
+ {
+
+ AVCMoveNeighborSAD(dn, last_loc);
+
+ center_again = 1;
+ i = imin;
+ j = jmin - 1;
+ cand = ref + i + j * lx;
+
+ /* starting from [0,-1] */
+ /* spiral check one step at a time*/
+ for (k = 2; k <= 8; k += 2)
+ {
+ if (!tab_exclude[last_loc][k]) /* exclude last step computation */
+ { /* not already computed */
+ if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+ {
+ d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
+ mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+ d += mvcost;
+
+ dn[k] = d; /* keep it for half pel use */
+
+ if (d < dmin)
+ {
+ ncand = cand;
+ dmin = d;
+ imin = i;
+ jmin = j;
+ center_again = 0;
+ new_loc = k;
+ min_sad = d - mvcost; // for rate control
+ }
+ }
+ }
+ if (k == 8) /* end side search*/
+ {
+ if (!center_again)
+ {
+ k = -1; /* start diagonal search */
+ cand -= lx;
+ j--;
+ }
+ }
+ else
+ {
+ next = refine_next[k][0];
+ i += next;
+ cand += next;
+ next = refine_next[k][1];
+ j += next;
+ cand += lx * next;
+ }
+ }
+ last_loc = new_loc;
+ step ++;
+ }
+ if (!center_again)
+ AVCMoveNeighborSAD(dn, last_loc);
+
+ *hp_guess = AVCFindMin(dn);
+
+ encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+ }
+ }
+
+ mot16x16[mbnum].sad = dmin;
+ mot16x16[mbnum].x = (imin - i0) << 2;
+ mot16x16[mbnum].y = (jmin - j0) << 2;
+ best_cand[0] = ncand;
+
+ if (rateCtrl->subPelEnable) // always enable half-pel search
+ {
+ /* find half-pel resolution motion vector */
+ min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
+
+ encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+
+
+ if (encvid->best_qpel_pos == -1)
+ {
+ ncand = encvid->hpel_cand[encvid->best_hpel_pos];
+ }
+ else
+ {
+ ncand = encvid->qpel_cand[encvid->best_qpel_pos];
+ }
+ }
+ else
+ {
+ encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
+ }
+
+ /** do motion comp here for now */
+ ref = currPic->Sl + i0 + j0 * lx;
+ /* copy from the best result to current Picture */
+ for (j = 0; j < 16; j++)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ *ref++ = *ncand++;
+ }
+ ref += (lx - 16);
+ ncand += 8;
+ }
+
+ return ;
+}
+
+#endif
+
+/*===============================================================================
+ Function: AVCFullSearch
+ Date: 09/16/2000
+ Purpose: Perform full-search motion estimation over the range of search
+ region in a spiral-outward manner.
+ Input/Output: VideoEncData, current Vol, previou Vop, pointer to the left corner of
+ current VOP, current coord (also output), boundaries.
+===============================================================================*/
+int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
+ int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
+ int cmvx, int cmvy)
+{
+ int range = encvid->rateCtrl->mvRange;
+ AVCPictureData *currPic = encvid->common->currPic;
+ uint8 *cand;
+ int i, j, k, l;
+ int d, dmin;
+ int i0 = *imin; /* current position */
+ int j0 = *jmin;
+ int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
+ void *extra_info = encvid->sad_extra_info;
+ int lx = currPic->pitch; /* with padding */
+
+ int offset = i0 + j0 * lx;
+
+ int lambda_motion = encvid->lambda_motion;
+ uint8 *mvbits = encvid->mvbits;
+ int mvshift = 2;
+ int mvcost;
+ int min_sad;
+
+ cand = prev + offset;
+
+ dmin = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
+ mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
+ min_sad = dmin;
+ dmin += mvcost;
+
+ /* perform spiral search */
+ for (k = 1; k <= range; k++)
+ {
+
+ i = i0 - k;
+ j = j0 - k;
+
+ cand = prev + i + j * lx;
+
+ for (l = 0; l < 8*k; l++)
+ {
+ /* no need for boundary checking again */
+ if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
+ {
+ d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
+ mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
+ d += mvcost;
+
+ if (d < dmin)
+ {
+ dmin = d;
+ *imin = i;
+ *jmin = j;
+ min_sad = d - mvcost;
+ }
+ }
+
+ if (l < (k << 1))
+ {
+ i++;
+ cand++;
+ }
+ else if (l < (k << 2))
+ {
+ j++;
+ cand += lx;
+ }
+ else if (l < ((k << 2) + (k << 1)))
+ {
+ i--;
+ cand--;
+ }
+ else
+ {
+ j--;
+ cand -= lx;
+ }
+ }
+ }
+
+ encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
+
+ return dmin;
+}
+
+/*===============================================================================
+ Function: AVCCandidateSelection
+ Date: 09/16/2000
+ Purpose: Fill up the list of candidate using spatio-temporal correlation
+ among neighboring blocks.
+ Input/Output: type_pred = 0: first pass, 1: second pass, or no SCD
+ Modified: , 09/23/01, get rid of redundant candidates before passing back.
+ , 09/11/07, added return for modified predicted MV, this will be
+ needed for both fast search and fullsearch.
+===============================================================================*/
+
+void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
+ AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMV *mot16x16 = encvid->mot16x16;
+ AVCMV *pmot;
+ int mbnum = video->mbNum;
+ int mbwidth = video->PicWidthInMbs;
+ int mbheight = video->PicHeightInMbs;
+ int i, j, same, num1;
+
+ /* this part is for predicted MV */
+ int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
+ int availA = 0, availB = 0, availC = 0;
+
+ *num_can = 0;
+
+ if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
+ {
+ /* Spatio-Temporal Candidate (five candidates) */
+ if (type_pred == 0) /* first pass */
+ {
+ pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ if (imb >= (mbwidth >> 1) && imb > 0) /*left neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ else if (imb + 1 < mbwidth) /*right neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+
+ if (jmb < mbheight - 1) /*bottom neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum+mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ else if (jmb > 0) /*upper neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+
+ if (imb > 0 && jmb > 0) /* upper-left neighbor current frame*/
+ {
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor current frame*/
+ {
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ }
+ else /* second pass */
+ /* original ST1 algorithm */
+ {
+ pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+
+ if (imb > 0) /*left neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb > 0) /*upper neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (imb < mbwidth - 1) /*right neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb < mbheight - 1) /*bottom neighbor previous frame */
+ {
+ pmot = &mot16x16[mbnum+mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ }
+
+ /* get predicted MV */
+ if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */
+ {
+ availA = 1;
+ pmot = &mot16x16[mbnum-1];
+ pmvA_x = pmot->x;
+ pmvA_y = pmot->y;
+ }
+
+ if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
+ {
+ availB = 1;
+ pmot = &mot16x16[mbnum-mbwidth];
+ pmvB_x = pmot->x;
+ pmvB_y = pmot->y;
+
+ availC = 1;
+
+ if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ }
+ else /* get MV from top-left (D) neighbor of current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ }
+ pmvC_x = pmot->x;
+ pmvC_y = pmot->y;
+ }
+
+ }
+ else /* only Spatial Candidate (four candidates)*/
+ {
+ if (type_pred == 0) /*first pass*/
+ {
+ if (imb > 1) /* neighbor two blocks away to the left */
+ {
+ pmot = &mot16x16[mbnum-2];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (imb > 0 && jmb > 0) /* upper-left neighbor */
+ {
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor */
+ {
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+
+ /* get predicted MV */
+ if (imb > 1) /* get MV from 2nd left (A) neighbor either of current frame */
+ {
+ availA = 1;
+ pmot = &mot16x16[mbnum-2];
+ pmvA_x = pmot->x;
+ pmvA_y = pmot->y;
+ }
+
+ if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
+ {
+ availB = 1;
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ pmvB_x = pmot->x;
+ pmvB_y = pmot->y;
+ }
+
+ if (jmb > 0 && imb < mbwidth - 1)
+ {
+ availC = 1;
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ pmvC_x = pmot->x;
+ pmvC_y = pmot->y;
+ }
+ }
+//#ifdef SCENE_CHANGE_DETECTION
+ /* second pass (ST2 algorithm)*/
+ else
+ {
+ if (type_pred == 1) /* 4/7/01 */
+ {
+ if (imb > 0) /*left neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb > 0) /*upper neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (imb < mbwidth - 1) /*right neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ if (jmb < mbheight - 1) /*bottom neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum+mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ }
+ //#else
+ else /* original ST1 algorithm */
+ {
+ if (imb > 0) /*left neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+
+ if (jmb > 0) /*upper-left neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+
+ }
+ if (jmb > 0) /*upper neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+
+ if (imb < mbheight - 1) /*upper-right neighbor current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ mvx[(*num_can)] = (pmot->x) >> 2;
+ mvy[(*num_can)++] = (pmot->y) >> 2;
+ }
+ }
+ }
+
+ /* get predicted MV */
+ if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */
+ {
+ availA = 1;
+ pmot = &mot16x16[mbnum-1];
+ pmvA_x = pmot->x;
+ pmvA_y = pmot->y;
+ }
+
+ if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
+ {
+ availB = 1;
+ pmot = &mot16x16[mbnum-mbwidth];
+ pmvB_x = pmot->x;
+ pmvB_y = pmot->y;
+
+ availC = 1;
+
+ if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth+1];
+ }
+ else /* get MV from top-left (D) neighbor of current frame */
+ {
+ pmot = &mot16x16[mbnum-mbwidth-1];
+ }
+ pmvC_x = pmot->x;
+ pmvC_y = pmot->y;
+ }
+ }
+//#endif
+ }
+
+ /* 3/23/01, remove redundant candidate (possible k-mean) */
+ num1 = *num_can;
+ *num_can = 1;
+ for (i = 1; i < num1; i++)
+ {
+ same = 0;
+ j = 0;
+ while (!same && j < *num_can)
+ {
+#if (CANDIDATE_DISTANCE==0)
+ if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
+#else
+ // modified k-mean, 3/24/01, shouldn't be greater than 3
+ if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
+#endif
+ same = 1;
+ j++;
+ }
+ if (!same)
+ {
+ mvx[*num_can] = mvx[i];
+ mvy[*num_can] = mvy[i];
+ (*num_can)++;
+ }
+ }
+
+ if (num1 == 5 && *num_can == 1)
+ *num_can = ALL_CAND_EQUAL; /* all are equal */
+
+ /* calculate predicted MV */
+
+ if (availA && !(availB || availC))
+ {
+ *cmvx = pmvA_x;
+ *cmvy = pmvA_y;
+ }
+ else
+ {
+ *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
+ *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
+ }
+
+ return ;
+}
+
+
+/*************************************************************
+ Function: AVCMoveNeighborSAD
+ Date: 3/27/01
+ Purpose: Move neighboring SAD around when center has shifted
+*************************************************************/
+
+void AVCMoveNeighborSAD(int dn[], int new_loc)
+{
+ int tmp[9];
+ tmp[0] = dn[0];
+ tmp[1] = dn[1];
+ tmp[2] = dn[2];
+ tmp[3] = dn[3];
+ tmp[4] = dn[4];
+ tmp[5] = dn[5];
+ tmp[6] = dn[6];
+ tmp[7] = dn[7];
+ tmp[8] = dn[8];
+ dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
+
+ switch (new_loc)
+ {
+ case 0:
+ break;
+ case 1:
+ dn[4] = tmp[2];
+ dn[5] = tmp[0];
+ dn[6] = tmp[8];
+ break;
+ case 2:
+ dn[4] = tmp[3];
+ dn[5] = tmp[4];
+ dn[6] = tmp[0];
+ dn[7] = tmp[8];
+ dn[8] = tmp[1];
+ break;
+ case 3:
+ dn[6] = tmp[4];
+ dn[7] = tmp[0];
+ dn[8] = tmp[2];
+ break;
+ case 4:
+ dn[1] = tmp[2];
+ dn[2] = tmp[3];
+ dn[6] = tmp[5];
+ dn[7] = tmp[6];
+ dn[8] = tmp[0];
+ break;
+ case 5:
+ dn[1] = tmp[0];
+ dn[2] = tmp[4];
+ dn[8] = tmp[6];
+ break;
+ case 6:
+ dn[1] = tmp[8];
+ dn[2] = tmp[0];
+ dn[3] = tmp[4];
+ dn[4] = tmp[5];
+ dn[8] = tmp[7];
+ break;
+ case 7:
+ dn[2] = tmp[8];
+ dn[3] = tmp[0];
+ dn[4] = tmp[6];
+ break;
+ case 8:
+ dn[2] = tmp[1];
+ dn[3] = tmp[2];
+ dn[4] = tmp[0];
+ dn[5] = tmp[6];
+ dn[6] = tmp[7];
+ break;
+ }
+ dn[0] = tmp[new_loc];
+
+ return ;
+}
+
+/* 3/28/01, find minimal of dn[9] */
+
+int AVCFindMin(int dn[])
+{
+ int min, i;
+ int dmin;
+
+ dmin = dn[1];
+ min = 1;
+ for (i = 2; i < 9; i++)
+ {
+ if (dn[i] < dmin)
+ {
+ dmin = dn[i];
+ min = i;
+ }
+ }
+
+ return min;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/rate_control.cpp b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp
new file mode 100644
index 0000000..15b55fb
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/rate_control.cpp
@@ -0,0 +1,981 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include <math.h>
+
+/* rate control variables */
+#define RC_MAX_QUANT 51
+#define RC_MIN_QUANT 0 //cap to 10 to prevent rate fluctuation
+
+#define MAD_MIN 1 /* handle the case of devision by zero in RC */
+
+
+/* local functions */
+double QP2Qstep(int QP);
+int Qstep2QP(double Qstep);
+
+double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl);
+
+void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video,
+ AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP);
+
+void AVCSaveRDSamples(MultiPass *pMP, int counter_samples);
+
+void updateRateControl(AVCRateControl *rateControl, int nal_type);
+
+int GetAvgFrameQP(AVCRateControl *rateCtrl)
+{
+ return rateCtrl->Qc;
+}
+
+AVCEnc_Status RCDetermineFrameNum(AVCEncObject *encvid, AVCRateControl *rateCtrl, uint32 modTime, uint *frameNum)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ uint32 modTimeRef = encvid->modTimeRef;
+ int32 currFrameNum ;
+ int frameInc;
+
+
+ /* check with the buffer fullness to make sure that we have enough bits to encode this frame */
+ /* we can use a threshold to guarantee minimum picture quality */
+ /**********************************/
+
+ /* for now, the default is to encode every frame, To Be Changed */
+ if (rateCtrl->first_frame)
+ {
+ encvid->modTimeRef = modTime;
+ encvid->wrapModTime = 0;
+ encvid->prevFrameNum = 0;
+ encvid->prevProcFrameNum = 0;
+
+ *frameNum = 0;
+
+ /* set frame type to IDR-frame */
+ video->nal_unit_type = AVC_NALTYPE_IDR;
+ sliceHdr->slice_type = AVC_I_ALL_SLICE;
+ video->slice_type = AVC_I_SLICE;
+
+ return AVCENC_SUCCESS;
+ }
+ else
+ {
+ if (modTime < modTimeRef) /* modTime wrapped around */
+ {
+ encvid->wrapModTime += ((uint32)0xFFFFFFFF - modTimeRef) + 1;
+ encvid->modTimeRef = modTimeRef = 0;
+ }
+ modTime += encvid->wrapModTime; /* wrapModTime is non zero after wrap-around */
+
+ currFrameNum = (int32)(((modTime - modTimeRef) * rateCtrl->frame_rate + 200) / 1000); /* add small roundings */
+
+ if (currFrameNum <= (int32)encvid->prevProcFrameNum)
+ {
+ return AVCENC_FAIL; /* this is a late frame do not encode it */
+ }
+
+ frameInc = currFrameNum - encvid->prevProcFrameNum;
+
+ if (frameInc < rateCtrl->skip_next_frame + 1)
+ {
+ return AVCENC_FAIL; /* frame skip required to maintain the target bit rate. */
+ }
+
+ RCUpdateBuffer(video, rateCtrl, frameInc - rateCtrl->skip_next_frame); /* in case more frames dropped */
+
+ *frameNum = currFrameNum;
+
+ /* This part would be similar to DetermineVopType of m4venc */
+ if ((*frameNum >= (uint)rateCtrl->idrPeriod && rateCtrl->idrPeriod > 0) || (*frameNum > video->MaxFrameNum)) /* first frame or IDR*/
+ {
+ /* set frame type to IDR-frame */
+ if (rateCtrl->idrPeriod)
+ {
+ encvid->modTimeRef += (uint32)(rateCtrl->idrPeriod * 1000 / rateCtrl->frame_rate);
+ *frameNum -= rateCtrl->idrPeriod;
+ }
+ else
+ {
+ encvid->modTimeRef += (uint32)(video->MaxFrameNum * 1000 / rateCtrl->frame_rate);
+ *frameNum -= video->MaxFrameNum;
+ }
+
+ video->nal_unit_type = AVC_NALTYPE_IDR;
+ sliceHdr->slice_type = AVC_I_ALL_SLICE;
+ video->slice_type = AVC_I_SLICE;
+ encvid->prevProcFrameNum = *frameNum;
+ }
+ else
+ {
+ video->nal_unit_type = AVC_NALTYPE_SLICE;
+ sliceHdr->slice_type = AVC_P_ALL_SLICE;
+ video->slice_type = AVC_P_SLICE;
+ encvid->prevProcFrameNum = currFrameNum;
+ }
+
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+void RCUpdateBuffer(AVCCommonObj *video, AVCRateControl *rateCtrl, int frameInc)
+{
+ int tmp;
+ MultiPass *pMP = rateCtrl->pMP;
+
+ OSCL_UNUSED_ARG(video);
+
+ if (rateCtrl->rcEnable == TRUE)
+ {
+ if (frameInc > 1)
+ {
+ tmp = rateCtrl->bitsPerFrame * (frameInc - 1);
+ rateCtrl->VBV_fullness -= tmp;
+ pMP->counter_BTsrc += 10 * (frameInc - 1);
+
+ /* Check buffer underflow */
+ if (rateCtrl->VBV_fullness < rateCtrl->low_bound)
+ {
+ rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2;
+ rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound;
+ pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+ }
+ }
+ }
+}
+
+
+AVCEnc_Status InitRateControlModule(AVCHandle *avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCCommonObj *video = encvid->common;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ double L1, L2, L3, bpp;
+ int qp;
+ int i, j;
+
+ rateCtrl->basicUnit = video->PicSizeInMbs;
+
+ rateCtrl->MADofMB = (double*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
+ video->PicSizeInMbs * sizeof(double), DEFAULT_ATTR);
+
+ if (!rateCtrl->MADofMB)
+ {
+ goto CLEANUP_RC;
+ }
+
+ if (rateCtrl->rcEnable == TRUE)
+ {
+ rateCtrl->pMP = (MultiPass*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, sizeof(MultiPass), DEFAULT_ATTR);
+ if (!rateCtrl->pMP)
+ {
+ goto CLEANUP_RC;
+ }
+ memset(rateCtrl->pMP, 0, sizeof(MultiPass));
+ rateCtrl->pMP->encoded_frames = -1; /* forget about the very first I frame */
+
+ /* RDInfo **pRDSamples */
+ rateCtrl->pMP->pRDSamples = (RDInfo **)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (30 * sizeof(RDInfo *)), DEFAULT_ATTR);
+ if (!rateCtrl->pMP->pRDSamples)
+ {
+ goto CLEANUP_RC;
+ }
+
+ for (i = 0; i < 30; i++)
+ {
+ rateCtrl->pMP->pRDSamples[i] = (RDInfo *)avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, (32 * sizeof(RDInfo)), DEFAULT_ATTR);
+ if (!rateCtrl->pMP->pRDSamples[i])
+ {
+ goto CLEANUP_RC;
+ }
+ for (j = 0; j < 32; j++) memset(&(rateCtrl->pMP->pRDSamples[i][j]), 0, sizeof(RDInfo));
+ }
+ rateCtrl->pMP->frameRange = (int)(rateCtrl->frame_rate * 1.0); /* 1.0s time frame*/
+ rateCtrl->pMP->frameRange = AVC_MAX(rateCtrl->pMP->frameRange, 5);
+ rateCtrl->pMP->frameRange = AVC_MIN(rateCtrl->pMP->frameRange, 30);
+
+ rateCtrl->pMP->framePos = -1;
+
+
+ rateCtrl->bitsPerFrame = (int32)(rateCtrl->bitRate / rateCtrl->frame_rate);
+
+ /* BX rate control */
+ rateCtrl->skip_next_frame = 0; /* must be initialized */
+
+ rateCtrl->Bs = rateCtrl->cpbSize;
+ rateCtrl->TMN_W = 0;
+ rateCtrl->VBV_fullness = (int)(rateCtrl->Bs * 0.5); /* rateCtrl->Bs */
+ rateCtrl->encoded_frames = 0;
+
+ rateCtrl->TMN_TH = rateCtrl->bitsPerFrame;
+
+ rateCtrl->max_BitVariance_num = (int)((OsclFloat)(rateCtrl->Bs - rateCtrl->VBV_fullness) / (rateCtrl->bitsPerFrame / 10.0)) - 5;
+ if (rateCtrl->max_BitVariance_num < 0) rateCtrl->max_BitVariance_num += 5;
+
+ // Set the initial buffer fullness
+ /* According to the spec, the initial buffer fullness needs to be set to 1/3 */
+ rateCtrl->VBV_fullness = (int)(rateCtrl->Bs / 3.0 - rateCtrl->Bs / 2.0); /* the buffer range is [-Bs/2, Bs/2] */
+ rateCtrl->pMP->counter_BTsrc = (int)((rateCtrl->Bs / 2.0 - rateCtrl->Bs / 3.0) / (rateCtrl->bitsPerFrame / 10.0));
+ rateCtrl->TMN_W = (int)(rateCtrl->VBV_fullness + rateCtrl->pMP->counter_BTsrc * (rateCtrl->bitsPerFrame / 10.0));
+
+ rateCtrl->low_bound = -rateCtrl->Bs / 2;
+ rateCtrl->VBV_fullness_offset = 0;
+
+ /* Setting the bitrate and framerate */
+ rateCtrl->pMP->bitrate = rateCtrl->bitRate;
+ rateCtrl->pMP->framerate = rateCtrl->frame_rate;
+ rateCtrl->pMP->target_bits_per_frame = rateCtrl->pMP->bitrate / rateCtrl->pMP->framerate;
+
+ /*compute the initial QP*/
+ bpp = 1.0 * rateCtrl->bitRate / (rateCtrl->frame_rate * (video->PicSizeInMbs << 8));
+ if (video->PicWidthInSamplesL == 176)
+ {
+ L1 = 0.1;
+ L2 = 0.3;
+ L3 = 0.6;
+ }
+ else if (video->PicWidthInSamplesL == 352)
+ {
+ L1 = 0.2;
+ L2 = 0.6;
+ L3 = 1.2;
+ }
+ else
+ {
+ L1 = 0.6;
+ L2 = 1.4;
+ L3 = 2.4;
+ }
+
+ if (rateCtrl->initQP == 0)
+ {
+ if (bpp <= L1)
+ qp = 35;
+ else if (bpp <= L2)
+ qp = 25;
+ else if (bpp <= L3)
+ qp = 20;
+ else
+ qp = 15;
+ rateCtrl->initQP = qp;
+ }
+
+ rateCtrl->Qc = rateCtrl->initQP;
+ }
+
+ return AVCENC_SUCCESS;
+
+CLEANUP_RC:
+
+ CleanupRateControlModule(avcHandle);
+ return AVCENC_MEMORY_FAIL;
+
+}
+
+
+void CleanupRateControlModule(AVCHandle *avcHandle)
+{
+ AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ int i;
+
+ if (rateCtrl->MADofMB)
+ {
+ avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->MADofMB));
+ }
+
+ if (rateCtrl->pMP)
+ {
+ if (rateCtrl->pMP->pRDSamples)
+ {
+ for (i = 0; i < 30; i++)
+ {
+ if (rateCtrl->pMP->pRDSamples[i])
+ {
+ avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples[i]);
+ }
+ }
+ avcHandle->CBAVC_Free(avcHandle->userData, (int)rateCtrl->pMP->pRDSamples);
+ }
+ avcHandle->CBAVC_Free(avcHandle->userData, (int)(rateCtrl->pMP));
+ }
+
+ return ;
+}
+
+void RCInitGOP(AVCEncObject *encvid)
+{
+ /* in BX RC, there's no GOP-level RC */
+
+ OSCL_UNUSED_ARG(encvid);
+
+ return ;
+}
+
+
+void RCInitFrameQP(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCPicParamSet *picParam = video->currPicParams;
+ MultiPass *pMP = rateCtrl->pMP;
+
+ if (rateCtrl->rcEnable == TRUE)
+ {
+ /* frame layer rate control */
+ if (rateCtrl->encoded_frames == 0)
+ {
+ video->QPy = rateCtrl->Qc = rateCtrl->initQP;
+ }
+ else
+ {
+ calculateQuantizer_Multipass(encvid, video, rateCtrl, pMP);
+ video->QPy = rateCtrl->Qc;
+ }
+
+ rateCtrl->NumberofHeaderBits = 0;
+ rateCtrl->NumberofTextureBits = 0;
+ rateCtrl->numFrameBits = 0; // reset
+
+ /* update pMP->framePos */
+ if (++pMP->framePos == pMP->frameRange) pMP->framePos = 0;
+
+ if (rateCtrl->T == 0)
+ {
+ pMP->counter_BTdst = (int)(rateCtrl->frame_rate * 7.5 + 0.5); /* 0.75s time frame */
+ pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, (int)(rateCtrl->max_BitVariance_num / 2 * 0.40)); /* 0.75s time frame may go beyond VBV buffer if we set the buffer size smaller than 0.75s */
+ pMP->counter_BTdst = AVC_MAX(pMP->counter_BTdst, (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.30 / (rateCtrl->TMN_TH / 10.0) + 0.5)); /* At least 30% of VBV buffer size/2 */
+ pMP->counter_BTdst = AVC_MIN(pMP->counter_BTdst, 20); /* Limit the target to be smaller than 3C */
+
+ pMP->target_bits = rateCtrl->T = rateCtrl->TMN_TH = (int)(rateCtrl->TMN_TH * (1.0 + pMP->counter_BTdst * 0.1));
+ pMP->diff_counter = pMP->counter_BTdst;
+ }
+
+ /* collect the necessary data: target bits, actual bits, mad and QP */
+ pMP->target_bits = rateCtrl->T;
+ pMP->QP = video->QPy;
+
+ pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl);
+ if (pMP->mad < MAD_MIN) pMP->mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+
+ pMP->bitrate = rateCtrl->bitRate; /* calculated in RCVopQPSetting */
+ pMP->framerate = rateCtrl->frame_rate;
+
+ /* first pass encoding */
+ pMP->nRe_Quantized = 0;
+
+ } // rcEnable
+ else
+ {
+ video->QPy = rateCtrl->initQP;
+ }
+
+// printf(" %d ",video->QPy);
+
+ if (video->CurrPicNum == 0 && encvid->outOfBandParamSet == FALSE)
+ {
+ picParam->pic_init_qs_minus26 = 0;
+ picParam->pic_init_qp_minus26 = video->QPy - 26;
+ }
+
+ // need this for motion estimation
+ encvid->lambda_mode = QP2QUANT[AVC_MAX(0, video->QPy-SHIFT_QP)];
+ encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode);
+ return ;
+}
+
+/* Mad based variable bit allocation + QP calculation with a new quadratic method */
+void calculateQuantizer_Multipass(AVCEncObject *encvid, AVCCommonObj *video,
+ AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+ int prev_actual_bits = 0, curr_target, /*pos=0,*/i, j;
+ OsclFloat Qstep, prev_QP = 0.625;
+
+ OsclFloat curr_mad, prev_mad, curr_RD, prev_RD, average_mad, aver_QP;
+
+ /* Mad based variable bit allocation */
+ targetBitCalculation(encvid, video, rateCtrl, pMP);
+
+ if (rateCtrl->T <= 0 || rateCtrl->totalSAD == 0)
+ {
+ if (rateCtrl->T < 0) rateCtrl->Qc = RC_MAX_QUANT;
+ return;
+ }
+
+ /* ---------------------------------------------------------------------------------------------------*/
+ /* current frame QP estimation */
+ curr_target = rateCtrl->T;
+ curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs;
+ if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+ curr_RD = (OsclFloat)curr_target / curr_mad;
+
+ if (rateCtrl->skip_next_frame == -1) // previous was skipped
+ {
+ i = pMP->framePos;
+ prev_mad = pMP->pRDSamples[i][0].mad;
+ prev_QP = pMP->pRDSamples[i][0].QP;
+ prev_actual_bits = pMP->pRDSamples[i][0].actual_bits;
+ }
+ else
+ {
+ /* Another version of search the optimal point */
+ prev_mad = 0.0;
+ i = 0;
+ while (i < pMP->frameRange && prev_mad < 0.001) /* find first one with nonzero prev_mad */
+ {
+ prev_mad = pMP->pRDSamples[i][0].mad;
+ i++;
+ }
+
+ if (i < pMP->frameRange)
+ {
+ prev_actual_bits = pMP->pRDSamples[i-1][0].actual_bits;
+
+ for (j = 0; i < pMP->frameRange; i++)
+ {
+ if (pMP->pRDSamples[i][0].mad != 0 &&
+ AVC_ABS(prev_mad - curr_mad) > AVC_ABS(pMP->pRDSamples[i][0].mad - curr_mad))
+ {
+ prev_mad = pMP->pRDSamples[i][0].mad;
+ prev_actual_bits = pMP->pRDSamples[i][0].actual_bits;
+ j = i;
+ }
+ }
+ prev_QP = QP2Qstep(pMP->pRDSamples[j][0].QP);
+
+ for (i = 1; i < pMP->samplesPerFrame[j]; i++)
+ {
+ if (AVC_ABS(prev_actual_bits - curr_target) > AVC_ABS(pMP->pRDSamples[j][i].actual_bits - curr_target))
+ {
+ prev_actual_bits = pMP->pRDSamples[j][i].actual_bits;
+ prev_QP = QP2Qstep(pMP->pRDSamples[j][i].QP);
+ }
+ }
+ }
+ }
+
+ // quadratic approximation
+ if (prev_mad > 0.001) // only when prev_mad is greater than 0, otherwise keep using the same QP
+ {
+ prev_RD = (OsclFloat)prev_actual_bits / prev_mad;
+ //rateCtrl->Qc = (Int)(prev_QP * sqrt(prev_actual_bits/curr_target) + 0.4);
+ if (prev_QP == 0.625) // added this to allow getting out of QP = 0 easily
+ {
+ Qstep = (int)(prev_RD / curr_RD + 0.5);
+ }
+ else
+ {
+ // rateCtrl->Qc =(Int)(prev_QP * M4VENC_SQRT(prev_RD/curr_RD) + 0.9);
+
+ if (prev_RD / curr_RD > 0.5 && prev_RD / curr_RD < 2.0)
+ Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + prev_RD / curr_RD) / 2.0 + 0.9); /* Quadratic and linear approximation */
+ else
+ Qstep = (int)(prev_QP * (sqrt(prev_RD / curr_RD) + pow(prev_RD / curr_RD, 1.0 / 3.0)) / 2.0 + 0.9);
+ }
+ // lower bound on Qc should be a function of curr_mad
+ // When mad is already low, lower bound on Qc doesn't have to be small.
+ // Note, this doesn't work well for low complexity clip encoded at high bit rate
+ // it doesn't hit the target bit rate due to this QP lower bound.
+ /// if((curr_mad < 8) && (rateCtrl->Qc < 12)) rateCtrl->Qc = 12;
+ // else if((curr_mad < 128) && (rateCtrl->Qc < 3)) rateCtrl->Qc = 3;
+
+ rateCtrl->Qc = Qstep2QP(Qstep);
+
+ if (rateCtrl->Qc < RC_MIN_QUANT) rateCtrl->Qc = RC_MIN_QUANT;
+ if (rateCtrl->Qc > RC_MAX_QUANT) rateCtrl->Qc = RC_MAX_QUANT;
+ }
+
+ /* active bit resource protection */
+ aver_QP = (pMP->encoded_frames == 0 ? 0 : pMP->sum_QP / (OsclFloat)pMP->encoded_frames);
+ average_mad = (pMP->encoded_frames == 0 ? 0 : pMP->sum_mad / (OsclFloat)pMP->encoded_frames); /* this function is called from the scond encoded frame*/
+ if (pMP->diff_counter == 0 &&
+ ((OsclFloat)rateCtrl->Qc <= aver_QP*1.1 || curr_mad <= average_mad*1.1) &&
+ pMP->counter_BTsrc <= (pMP->counter_BTdst + (int)(pMP->framerate*1.0 + 0.5)))
+ {
+ rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame / 10.0);
+ rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+ pMP->counter_BTsrc++;
+ pMP->diff_counter--;
+ }
+
+}
+
+void targetBitCalculation(AVCEncObject *encvid, AVCCommonObj *video, AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+ OSCL_UNUSED_ARG(encvid);
+ OsclFloat curr_mad;//, average_mad;
+ int diff_counter_BTsrc, diff_counter_BTdst, prev_counter_diff, curr_counter_diff, bound;
+ /* BT = Bit Transfer, for pMP->counter_BTsrc, pMP->counter_BTdst */
+
+ /* some stuff about frame dropping remained here to be done because pMP cannot be inserted into updateRateControl()*/
+ updateRC_PostProc(rateCtrl, pMP);
+
+ /* update pMP->counter_BTsrc and pMP->counter_BTdst to avoid interger overflow */
+ if (pMP->counter_BTsrc > 1000 && pMP->counter_BTdst > 1000)
+ {
+ pMP->counter_BTsrc -= 1000;
+ pMP->counter_BTdst -= 1000;
+ }
+
+ /* ---------------------------------------------------------------------------------------------------*/
+ /* target calculation */
+ curr_mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs;
+ if (curr_mad < MAD_MIN) curr_mad = MAD_MIN; /* MAD_MIN is defined as 1 in mp4def.h */
+ diff_counter_BTsrc = diff_counter_BTdst = 0;
+ pMP->diff_counter = 0;
+
+
+ /*1.calculate average mad */
+ pMP->sum_mad += curr_mad;
+ //average_mad = (pMP->encoded_frames < 1 ? curr_mad : pMP->sum_mad/(OsclFloat)(pMP->encoded_frames+1)); /* this function is called from the scond encoded frame*/
+ //pMP->aver_mad = average_mad;
+ if (pMP->encoded_frames >= 0) /* pMP->encoded_frames is set to -1 initially, so forget about the very first I frame */
+ pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames + curr_mad) / (pMP->encoded_frames + 1);
+
+ if (pMP->overlapped_win_size > 0 && pMP->encoded_frames_prev >= 0)
+ pMP->aver_mad_prev = (pMP->aver_mad_prev * pMP->encoded_frames_prev + curr_mad) / (pMP->encoded_frames_prev + 1);
+
+ /*2.average_mad, mad ==> diff_counter_BTsrc, diff_counter_BTdst */
+ if (pMP->overlapped_win_size == 0)
+ {
+ /* original verison */
+ if (curr_mad > pMP->aver_mad*1.1)
+ {
+ if (curr_mad / (pMP->aver_mad + 0.0001) > 2)
+ diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.4) - 10;
+ //diff_counter_BTdst = (int)((sqrt(curr_mad/pMP->aver_mad)*2+curr_mad/pMP->aver_mad)/(3*0.1) + 0.4) - 10;
+ else
+ diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad + 0.0001) * 10 + 0.4) - 10;
+ }
+ else /* curr_mad <= average_mad*1.1 */
+ //diff_counter_BTsrc = 10 - (int)((sqrt(curr_mad/pMP->aver_mad) + pow(curr_mad/pMP->aver_mad, 1.0/3.0))/(2.0*0.1) + 0.4);
+ diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad + 0.0001)) * 10 + 0.5);
+
+ /* actively fill in the possible gap */
+ if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 &&
+ curr_mad <= pMP->aver_mad*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst)
+ diff_counter_BTsrc = 1;
+
+ }
+ else if (pMP->overlapped_win_size > 0)
+ {
+ /* transition time: use previous average mad "pMP->aver_mad_prev" instead of the current average mad "pMP->aver_mad" */
+ if (curr_mad > pMP->aver_mad_prev*1.1)
+ {
+ if (curr_mad / pMP->aver_mad_prev > 2)
+ diff_counter_BTdst = (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.4) - 10;
+ //diff_counter_BTdst = (int)((M4VENC_SQRT(curr_mad/pMP->aver_mad_prev)*2+curr_mad/pMP->aver_mad_prev)/(3*0.1) + 0.4) - 10;
+ else
+ diff_counter_BTdst = (int)(curr_mad / (pMP->aver_mad_prev + 0.0001) * 10 + 0.4) - 10;
+ }
+ else /* curr_mad <= average_mad*1.1 */
+ //diff_counter_BTsrc = 10 - (Int)((sqrt(curr_mad/pMP->aver_mad_prev) + pow(curr_mad/pMP->aver_mad_prev, 1.0/3.0))/(2.0*0.1) + 0.4);
+ diff_counter_BTsrc = 10 - (int)(sqrt(curr_mad / (pMP->aver_mad_prev + 0.0001)) * 10 + 0.5);
+
+ /* actively fill in the possible gap */
+ if (diff_counter_BTsrc == 0 && diff_counter_BTdst == 0 &&
+ curr_mad <= pMP->aver_mad_prev*1.1 && pMP->counter_BTsrc < pMP->counter_BTdst)
+ diff_counter_BTsrc = 1;
+
+ if (--pMP->overlapped_win_size <= 0) pMP->overlapped_win_size = 0;
+ }
+
+
+ /* if difference is too much, do clipping */
+ /* First, set the upper bound for current bit allocation variance: 80% of available buffer */
+ bound = (int)((rateCtrl->Bs / 2 - rateCtrl->VBV_fullness) * 0.6 / (pMP->target_bits_per_frame / 10)); /* rateCtrl->Bs */
+ diff_counter_BTsrc = AVC_MIN(diff_counter_BTsrc, bound);
+ diff_counter_BTdst = AVC_MIN(diff_counter_BTdst, bound);
+
+ /* Second, set another upper bound for current bit allocation: 4-5*bitrate/framerate */
+ bound = 50;
+// if(video->encParams->RC_Type == CBR_LOWDELAY)
+// not necessary bound = 10; -- For Low delay */
+
+ diff_counter_BTsrc = AVC_MIN(diff_counter_BTsrc, bound);
+ diff_counter_BTdst = AVC_MIN(diff_counter_BTdst, bound);
+
+
+ /* Third, check the buffer */
+ prev_counter_diff = pMP->counter_BTdst - pMP->counter_BTsrc;
+ curr_counter_diff = prev_counter_diff + (diff_counter_BTdst - diff_counter_BTsrc);
+
+ if (AVC_ABS(prev_counter_diff) >= rateCtrl->max_BitVariance_num || AVC_ABS(curr_counter_diff) >= rateCtrl->max_BitVariance_num)
+ { //diff_counter_BTsrc = diff_counter_BTdst = 0;
+
+ if (curr_counter_diff > rateCtrl->max_BitVariance_num && diff_counter_BTdst)
+ {
+ diff_counter_BTdst = (rateCtrl->max_BitVariance_num - prev_counter_diff) + diff_counter_BTsrc;
+ if (diff_counter_BTdst < 0) diff_counter_BTdst = 0;
+ }
+
+ else if (curr_counter_diff < -rateCtrl->max_BitVariance_num && diff_counter_BTsrc)
+ {
+ diff_counter_BTsrc = diff_counter_BTdst - (-rateCtrl->max_BitVariance_num - prev_counter_diff);
+ if (diff_counter_BTsrc < 0) diff_counter_BTsrc = 0;
+ }
+ }
+
+
+ /*3.diff_counter_BTsrc, diff_counter_BTdst ==> TMN_TH */
+ rateCtrl->TMN_TH = (int)(pMP->target_bits_per_frame);
+ pMP->diff_counter = 0;
+
+ if (diff_counter_BTsrc)
+ {
+ rateCtrl->TMN_TH -= (int)(pMP->target_bits_per_frame * diff_counter_BTsrc * 0.1);
+ pMP->diff_counter = -diff_counter_BTsrc;
+ }
+ else if (diff_counter_BTdst)
+ {
+ rateCtrl->TMN_TH += (int)(pMP->target_bits_per_frame * diff_counter_BTdst * 0.1);
+ pMP->diff_counter = diff_counter_BTdst;
+ }
+
+
+ /*4.update pMP->counter_BTsrc, pMP->counter_BTdst */
+ pMP->counter_BTsrc += diff_counter_BTsrc;
+ pMP->counter_BTdst += diff_counter_BTdst;
+
+
+ /*5.target bit calculation */
+ rateCtrl->T = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+
+ return ;
+}
+
+void updateRC_PostProc(AVCRateControl *rateCtrl, MultiPass *pMP)
+{
+ if (rateCtrl->skip_next_frame > 0) /* skip next frame */
+ {
+ pMP->counter_BTsrc += 10 * rateCtrl->skip_next_frame;
+
+ }
+ else if (rateCtrl->skip_next_frame == -1) /* skip current frame */
+ {
+ pMP->counter_BTdst -= pMP->diff_counter;
+ pMP->counter_BTsrc += 10;
+
+ pMP->sum_mad -= pMP->mad;
+ pMP->aver_mad = (pMP->aver_mad * pMP->encoded_frames - pMP->mad) / (pMP->encoded_frames - 1 + 0.0001);
+ pMP->sum_QP -= pMP->QP;
+ pMP->encoded_frames --;
+ }
+ /* some stuff in update VBV_fullness remains here */
+ //if(rateCtrl->VBV_fullness < -rateCtrl->Bs/2) /* rateCtrl->Bs */
+ if (rateCtrl->VBV_fullness < rateCtrl->low_bound)
+ {
+ rateCtrl->VBV_fullness = rateCtrl->low_bound; // -rateCtrl->Bs/2;
+ rateCtrl->TMN_W = rateCtrl->VBV_fullness - rateCtrl->low_bound;
+ pMP->counter_BTsrc = pMP->counter_BTdst + (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+ }
+}
+
+
+void RCInitChromaQP(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMacroblock *currMB = video->currMB;
+ int q_bits;
+
+ /* we have to do the same thing for AVC_CLIP3(0,51,video->QSy) */
+
+ video->QPy_div_6 = (currMB->QPy * 43) >> 8;
+ video->QPy_mod_6 = currMB->QPy - 6 * video->QPy_div_6;
+ currMB->QPc = video->QPc = mapQPi2QPc[AVC_CLIP3(0, 51, currMB->QPy + video->currPicParams->chroma_qp_index_offset)];
+ video->QPc_div_6 = (video->QPc * 43) >> 8;
+ video->QPc_mod_6 = video->QPc - 6 * video->QPc_div_6;
+
+ /* pre-calculate this to save computation */
+ q_bits = 4 + video->QPy_div_6;
+ if (video->slice_type == AVC_I_SLICE)
+ {
+ encvid->qp_const = 682 << q_bits; // intra
+ }
+ else
+ {
+ encvid->qp_const = 342 << q_bits; // inter
+ }
+
+ q_bits = 4 + video->QPc_div_6;
+ if (video->slice_type == AVC_I_SLICE)
+ {
+ encvid->qp_const_c = 682 << q_bits; // intra
+ }
+ else
+ {
+ encvid->qp_const_c = 342 << q_bits; // inter
+ }
+
+ encvid->lambda_mode = QP2QUANT[AVC_MAX(0, currMB->QPy-SHIFT_QP)];
+ encvid->lambda_motion = LAMBDA_FACTOR(encvid->lambda_mode);
+
+ return ;
+}
+
+
+void RCInitMBQP(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCMacroblock *currMB = video->currMB;
+
+ currMB->QPy = video->QPy; /* set to previous value or picture level */
+
+ RCInitChromaQP(encvid);
+
+}
+
+void RCPostMB(AVCCommonObj *video, AVCRateControl *rateCtrl, int num_header_bits, int num_texture_bits)
+{
+ OSCL_UNUSED_ARG(video);
+ rateCtrl->numMBHeaderBits = num_header_bits;
+ rateCtrl->numMBTextureBits = num_texture_bits;
+ rateCtrl->NumberofHeaderBits += rateCtrl->numMBHeaderBits;
+ rateCtrl->NumberofTextureBits += rateCtrl->numMBTextureBits;
+}
+
+void RCRestoreQP(AVCMacroblock *currMB, AVCCommonObj *video, AVCEncObject *encvid)
+{
+ currMB->QPy = video->QPy; /* use previous QP */
+ RCInitChromaQP(encvid);
+
+ return ;
+}
+
+
+void RCCalculateMAD(AVCEncObject *encvid, AVCMacroblock *currMB, uint8 *orgL, int orgPitch)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ uint32 dmin_lx;
+
+ if (rateCtrl->rcEnable == TRUE)
+ {
+ if (currMB->mb_intra)
+ {
+ if (currMB->mbMode == AVC_I16)
+ {
+ dmin_lx = (0xFFFF << 16) | orgPitch;
+ rateCtrl->MADofMB[video->mbNum] = AVCSAD_Macroblock_C(orgL,
+ encvid->pred_i16[currMB->i16Mode], dmin_lx, NULL);
+ }
+ else /* i4 */
+ {
+ rateCtrl->MADofMB[video->mbNum] = encvid->i4_sad / 256.;
+ }
+ }
+ /* for INTER, we have already saved it with the MV search */
+ }
+
+ return ;
+}
+
+
+
+AVCEnc_Status RCUpdateFrame(AVCEncObject *encvid)
+{
+ AVCCommonObj *video = encvid->common;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ MultiPass *pMP = rateCtrl->pMP;
+ int diff_BTCounter;
+ int nal_type = video->nal_unit_type;
+
+ /* update the complexity weight of I, P, B frame */
+
+ if (rateCtrl->rcEnable == TRUE)
+ {
+ pMP->actual_bits = rateCtrl->numFrameBits;
+ pMP->mad = (OsclFloat)rateCtrl->totalSAD / video->PicSizeInMbs; //ComputeFrameMAD(video, rateCtrl);
+
+ AVCSaveRDSamples(pMP, 0);
+
+ pMP->encoded_frames++;
+
+ /* for pMP->samplesPerFrame */
+ pMP->samplesPerFrame[pMP->framePos] = 0;
+
+ pMP->sum_QP += pMP->QP;
+
+ /* update pMP->counter_BTsrc, pMP->counter_BTdst */
+ /* re-allocate the target bit again and then stop encoding */
+ diff_BTCounter = (int)((OsclFloat)(rateCtrl->TMN_TH - rateCtrl->TMN_W - pMP->actual_bits) /
+ (pMP->bitrate / (pMP->framerate + 0.0001) + 0.0001) / 0.1);
+ if (diff_BTCounter >= 0)
+ pMP->counter_BTsrc += diff_BTCounter; /* pMP->actual_bits is smaller */
+ else
+ pMP->counter_BTdst -= diff_BTCounter; /* pMP->actual_bits is bigger */
+
+ rateCtrl->TMN_TH -= (int)((OsclFloat)pMP->bitrate / (pMP->framerate + 0.0001) * (diff_BTCounter * 0.1));
+ rateCtrl->T = pMP->target_bits = rateCtrl->TMN_TH - rateCtrl->TMN_W;
+ pMP->diff_counter -= diff_BTCounter;
+
+ rateCtrl->Rc = rateCtrl->numFrameBits; /* Total Bits for current frame */
+ rateCtrl->Hc = rateCtrl->NumberofHeaderBits; /* Total Bits in Header and Motion Vector */
+
+ /* BX_RC */
+ updateRateControl(rateCtrl, nal_type);
+ if (rateCtrl->skip_next_frame == -1) // skip current frame
+ {
+ status = AVCENC_SKIPPED_PICTURE;
+ }
+ }
+
+ rateCtrl->first_frame = 0; // reset here after we encode the first frame.
+
+ return status;
+}
+
+void AVCSaveRDSamples(MultiPass *pMP, int counter_samples)
+{
+ /* for pMP->pRDSamples */
+ pMP->pRDSamples[pMP->framePos][counter_samples].QP = pMP->QP;
+ pMP->pRDSamples[pMP->framePos][counter_samples].actual_bits = pMP->actual_bits;
+ pMP->pRDSamples[pMP->framePos][counter_samples].mad = pMP->mad;
+ pMP->pRDSamples[pMP->framePos][counter_samples].R_D = (OsclFloat)pMP->actual_bits / (pMP->mad + 0.0001);
+
+ return ;
+}
+
+void updateRateControl(AVCRateControl *rateCtrl, int nal_type)
+{
+ int frame_bits;
+ MultiPass *pMP = rateCtrl->pMP;
+
+ /* BX rate contro\l */
+ frame_bits = (int)(rateCtrl->bitRate / rateCtrl->frame_rate);
+ rateCtrl->TMN_W += (rateCtrl->Rc - rateCtrl->TMN_TH);
+ rateCtrl->VBV_fullness += (rateCtrl->Rc - frame_bits); //rateCtrl->Rp);
+ //if(rateCtrl->VBV_fullness < 0) rateCtrl->VBV_fullness = -1;
+
+ rateCtrl->encoded_frames++;
+
+ /* frame dropping */
+ rateCtrl->skip_next_frame = 0;
+
+ if ((rateCtrl->VBV_fullness > rateCtrl->Bs / 2) && nal_type != AVC_NALTYPE_IDR) /* skip the current frame */ /* rateCtrl->Bs */
+ {
+ rateCtrl->TMN_W -= (rateCtrl->Rc - rateCtrl->TMN_TH);
+ rateCtrl->VBV_fullness -= rateCtrl->Rc;
+ rateCtrl->skip_next_frame = -1;
+ }
+ else if ((OsclFloat)(rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95) /* skip next frame */
+ {
+ rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp;
+ rateCtrl->skip_next_frame = 1;
+ pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+ /* BX_1, skip more than 1 frames */
+ //while(rateCtrl->VBV_fullness > rateCtrl->Bs*0.475)
+ while ((rateCtrl->VBV_fullness - rateCtrl->VBV_fullness_offset) > (rateCtrl->Bs / 2 - rateCtrl->VBV_fullness_offset)*0.95)
+ {
+ rateCtrl->VBV_fullness -= frame_bits; //rateCtrl->Rp;
+ rateCtrl->skip_next_frame++;
+ pMP->counter_BTsrc -= (int)((OsclFloat)(rateCtrl->Bs / 2 - rateCtrl->low_bound) / 2.0 / (pMP->target_bits_per_frame / 10));
+ }
+
+ /* END BX_1 */
+ }
+}
+
+
+double ComputeFrameMAD(AVCCommonObj *video, AVCRateControl *rateCtrl)
+{
+ double TotalMAD;
+ int i;
+ TotalMAD = 0.0;
+ for (i = 0; i < (int)video->PicSizeInMbs; i++)
+ TotalMAD += rateCtrl->MADofMB[i];
+ TotalMAD /= video->PicSizeInMbs;
+ return TotalMAD;
+}
+
+
+
+
+
+/* convert from QP to Qstep */
+double QP2Qstep(int QP)
+{
+ int i;
+ double Qstep;
+ static const double QP2QSTEP[6] = { 0.625, 0.6875, 0.8125, 0.875, 1.0, 1.125 };
+
+ Qstep = QP2QSTEP[QP % 6];
+ for (i = 0; i < (QP / 6); i++)
+ Qstep *= 2;
+
+ return Qstep;
+}
+
+/* convert from step size to QP */
+int Qstep2QP(double Qstep)
+{
+ int q_per = 0, q_rem = 0;
+
+ // assert( Qstep >= QP2Qstep(0) && Qstep <= QP2Qstep(51) );
+ if (Qstep < QP2Qstep(0))
+ return 0;
+ else if (Qstep > QP2Qstep(51))
+ return 51;
+
+ while (Qstep > QP2Qstep(5))
+ {
+ Qstep /= 2;
+ q_per += 1;
+ }
+
+ if (Qstep <= (0.625 + 0.6875) / 2)
+ {
+ Qstep = 0.625;
+ q_rem = 0;
+ }
+ else if (Qstep <= (0.6875 + 0.8125) / 2)
+ {
+ Qstep = 0.6875;
+ q_rem = 1;
+ }
+ else if (Qstep <= (0.8125 + 0.875) / 2)
+ {
+ Qstep = 0.8125;
+ q_rem = 2;
+ }
+ else if (Qstep <= (0.875 + 1.0) / 2)
+ {
+ Qstep = 0.875;
+ q_rem = 3;
+ }
+ else if (Qstep <= (1.0 + 1.125) / 2)
+ {
+ Qstep = 1.0;
+ q_rem = 4;
+ }
+ else
+ {
+ Qstep = 1.125;
+ q_rem = 5;
+ }
+
+ return (q_per * 6 + q_rem);
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/residual.cpp b/media/libstagefright/codecs/avc/enc/src/residual.cpp
new file mode 100644
index 0000000..42eb910
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/residual.cpp
@@ -0,0 +1,389 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+AVCEnc_Status EncodeIntraPCM(AVCEncObject *encvid)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ AVCCommonObj *video = encvid->common;
+ AVCFrameIO *currInput = encvid->currInput;
+ AVCEncBitstream *stream = encvid->bitstream;
+ int x_position = (video->mb_x << 4);
+ int y_position = (video->mb_y << 4);
+ int orgPitch = currInput->pitch;
+ int offset1 = y_position * orgPitch + x_position;
+ int i, j;
+ int offset;
+ uint8 *pDst, *pSrc;
+ uint code;
+
+ ue_v(stream, 25);
+
+ i = stream->bit_left & 0x7;
+ if (i) /* not byte-aligned */
+ {
+ BitstreamWriteBits(stream, 0, i);
+ }
+
+ pSrc = currInput->YCbCr[0] + offset1;
+ pDst = video->currPic->Sl + offset1;
+ offset = video->PicWidthInSamplesL - 16;
+
+ /* at this point bitstream is byte-aligned */
+ j = 16;
+ while (j > 0)
+ {
+#if (WORD_SIZE==32)
+ for (i = 0; i < 4; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 4;
+ *((uint*)pDst) = code;
+ pDst += 4;
+ status = BitstreamWriteBits(stream, 32, code);
+ }
+#else
+ for (i = 0; i < 8; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 2;
+ *((uint*)pDst) = code;
+ pDst += 2;
+ status = BitstreamWriteBits(stream, 16, code);
+ }
+#endif
+ pDst += offset;
+ pSrc += offset;
+ j--;
+ }
+ if (status != AVCENC_SUCCESS) /* check only once per line */
+ return status;
+
+ pDst = video->currPic->Scb + ((offset1 + x_position) >> 2);
+ pSrc = currInput->YCbCr[1] + ((offset1 + x_position) >> 2);
+ offset >>= 1;
+
+ j = 8;
+ while (j > 0)
+ {
+#if (WORD_SIZE==32)
+ for (i = 0; i < 2; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 4;
+ *((uint*)pDst) = code;
+ pDst += 4;
+ status = BitstreamWriteBits(stream, 32, code);
+ }
+#else
+ for (i = 0; i < 4; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 2;
+ *((uint*)pDst) = code;
+ pDst += 2;
+ status = BitstreamWriteBits(stream, 16, code);
+ }
+#endif
+ pDst += offset;
+ pSrc += offset;
+ j--;
+ }
+
+ if (status != AVCENC_SUCCESS) /* check only once per line */
+ return status;
+
+ pDst = video->currPic->Scr + ((offset1 + x_position) >> 2);
+ pSrc = currInput->YCbCr[2] + ((offset1 + x_position) >> 2);
+
+ j = 8;
+ while (j > 0)
+ {
+#if (WORD_SIZE==32)
+ for (i = 0; i < 2; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 4;
+ *((uint*)pDst) = code;
+ pDst += 4;
+ status = BitstreamWriteBits(stream, 32, code);
+ }
+#else
+ for (i = 0; i < 4; i++)
+ {
+ code = *((uint*)pSrc);
+ pSrc += 2;
+ *((uint*)pDst) = code;
+ pDst += 2;
+ status = BitstreamWriteBits(stream, 16, code);
+ }
+#endif
+ pDst += offset;
+ pSrc += offset;
+ j--;
+ }
+
+ return status;
+}
+
+
+AVCEnc_Status enc_residual_block(AVCEncObject *encvid, AVCResidualType type, int cindx, AVCMacroblock *currMB)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ AVCCommonObj *video = encvid->common;
+ int i, maxNumCoeff, nC;
+ int cdc = 0, cac = 0;
+ int TrailingOnes;
+ AVCEncBitstream *stream = encvid->bitstream;
+ uint trailing_ones_sign_flag;
+ int zerosLeft;
+ int *level, *run;
+ int TotalCoeff;
+ const static int incVlc[] = {0, 3, 6, 12, 24, 48, 32768}; // maximum vlc = 6
+ int escape, numPrefix, sufmask, suffix, shift, sign, value, absvalue, vlcnum, level_two_or_higher;
+ int bindx = blkIdx2blkXY[cindx>>2][cindx&3] ; // raster scan index
+
+ switch (type)
+ {
+ case AVC_Luma:
+ maxNumCoeff = 16;
+ level = encvid->level[cindx];
+ run = encvid->run[cindx];
+ TotalCoeff = currMB->nz_coeff[bindx];
+ break;
+ case AVC_Intra16DC:
+ maxNumCoeff = 16;
+ level = encvid->leveldc;
+ run = encvid->rundc;
+ TotalCoeff = cindx; /* special case */
+ bindx = 0;
+ cindx = 0;
+ break;
+ case AVC_Intra16AC:
+ maxNumCoeff = 15;
+ level = encvid->level[cindx];
+ run = encvid->run[cindx];
+ TotalCoeff = currMB->nz_coeff[bindx];
+ break;
+ case AVC_ChromaDC: /* how to differentiate Cb from Cr */
+ maxNumCoeff = 4;
+ cdc = 1;
+ if (cindx >= 8)
+ {
+ level = encvid->levelcdc + 4;
+ run = encvid->runcdc + 4;
+ TotalCoeff = cindx - 8; /* special case */
+ }
+ else
+ {
+ level = encvid->levelcdc;
+ run = encvid->runcdc;
+ TotalCoeff = cindx; /* special case */
+ }
+ break;
+ case AVC_ChromaAC:
+ maxNumCoeff = 15;
+ cac = 1;
+ level = encvid->level[cindx];
+ run = encvid->run[cindx];
+ cindx -= 16;
+ bindx = 16 + blkIdx2blkXY[cindx>>2][cindx&3];
+ cindx += 16;
+ TotalCoeff = currMB->nz_coeff[bindx];
+ break;
+ default:
+ return AVCENC_FAIL;
+ }
+
+
+ /* find TrailingOnes */
+ TrailingOnes = 0;
+ zerosLeft = 0;
+ i = TotalCoeff - 1;
+ nC = 1;
+ while (i >= 0)
+ {
+ zerosLeft += run[i];
+ if (nC && (level[i] == 1 || level[i] == -1))
+ {
+ TrailingOnes++;
+ }
+ else
+ {
+ nC = 0;
+ }
+ i--;
+ }
+ if (TrailingOnes > 3)
+ {
+ TrailingOnes = 3; /* clip it */
+ }
+
+ if (!cdc)
+ {
+ if (!cac) /* not chroma */
+ {
+ nC = predict_nnz(video, bindx & 3, bindx >> 2);
+ }
+ else /* chroma ac but not chroma dc */
+ {
+ nC = predict_nnz_chroma(video, bindx & 3, bindx >> 2);
+ }
+
+ status = ce_TotalCoeffTrailingOnes(stream, TrailingOnes, TotalCoeff, nC);
+ }
+ else
+ {
+ nC = -1; /* Chroma DC level */
+ status = ce_TotalCoeffTrailingOnesChromaDC(stream, TrailingOnes, TotalCoeff);
+ }
+
+ /* This part is done quite differently in ReadCoef4x4_CAVLC() */
+ if (TotalCoeff > 0)
+ {
+
+ i = TotalCoeff - 1;
+
+ if (TrailingOnes) /* keep reading the sign of those trailing ones */
+ {
+ nC = TrailingOnes;
+ trailing_ones_sign_flag = 0;
+ while (nC)
+ {
+ trailing_ones_sign_flag <<= 1;
+ trailing_ones_sign_flag |= ((uint32)level[i--] >> 31); /* 0 or positive, 1 for negative */
+ nC--;
+ }
+
+ /* instead of writing one bit at a time, read the whole thing at once */
+ status = BitstreamWriteBits(stream, TrailingOnes, trailing_ones_sign_flag);
+ }
+
+ level_two_or_higher = 1;
+ if (TotalCoeff > 3 && TrailingOnes == 3)
+ {
+ level_two_or_higher = 0;
+ }
+
+ if (TotalCoeff > 10 && TrailingOnes < 3)
+ {
+ vlcnum = 1;
+ }
+ else
+ {
+ vlcnum = 0;
+ }
+
+ /* then do this TotalCoeff-TrailingOnes times */
+ for (i = TotalCoeff - TrailingOnes - 1; i >= 0; i--)
+ {
+ value = level[i];
+ absvalue = (value >= 0) ? value : -value;
+
+ if (level_two_or_higher)
+ {
+ if (value > 0) value--;
+ else value++;
+ level_two_or_higher = 0;
+ }
+
+ if (value >= 0)
+ {
+ sign = 0;
+ }
+ else
+ {
+ sign = 1;
+ value = -value;
+ }
+
+ if (vlcnum == 0) // VLC1
+ {
+ if (value < 8)
+ {
+ status = BitstreamWriteBits(stream, value * 2 + sign - 1, 1);
+ }
+ else if (value < 8 + 8)
+ {
+ status = BitstreamWriteBits(stream, 14 + 1 + 4, (1 << 4) | ((value - 8) << 1) | sign);
+ }
+ else
+ {
+ status = BitstreamWriteBits(stream, 14 + 2 + 12, (1 << 12) | ((value - 16) << 1) | sign) ;
+ }
+ }
+ else // VLCN
+ {
+ shift = vlcnum - 1;
+ escape = (15 << shift) + 1;
+ numPrefix = (value - 1) >> shift;
+ sufmask = ~((0xffffffff) << shift);
+ suffix = (value - 1) & sufmask;
+ if (value < escape)
+ {
+ status = BitstreamWriteBits(stream, numPrefix + vlcnum + 1, (1 << (shift + 1)) | (suffix << 1) | sign);
+ }
+ else
+ {
+ status = BitstreamWriteBits(stream, 28, (1 << 12) | ((value - escape) << 1) | sign);
+ }
+
+ }
+
+ if (absvalue > incVlc[vlcnum])
+ vlcnum++;
+
+ if (i == TotalCoeff - TrailingOnes - 1 && absvalue > 3)
+ vlcnum = 2;
+ }
+
+ if (status != AVCENC_SUCCESS) /* occasionally check the bitstream */
+ {
+ return status;
+ }
+ if (TotalCoeff < maxNumCoeff)
+ {
+ if (!cdc)
+ {
+ ce_TotalZeros(stream, zerosLeft, TotalCoeff);
+ }
+ else
+ {
+ ce_TotalZerosChromaDC(stream, zerosLeft, TotalCoeff);
+ }
+ }
+ else
+ {
+ zerosLeft = 0;
+ }
+
+ i = TotalCoeff - 1;
+ while (i > 0) /* don't do the last one */
+ {
+ if (zerosLeft > 0)
+ {
+ ce_RunBefore(stream, run[i], zerosLeft);
+ }
+
+ zerosLeft = zerosLeft - run[i];
+ i--;
+ }
+ }
+
+ return status;
+}
diff --git a/media/libstagefright/codecs/avc/enc/src/sad.cpp b/media/libstagefright/codecs/avc/enc/src/sad.cpp
new file mode 100644
index 0000000..ae7acd2
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad.cpp
@@ -0,0 +1,290 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+#include "sad_inline.h"
+
+#define Cached_lx 176
+
+#ifdef _SAD_STAT
+uint32 num_sad_MB = 0;
+uint32 num_sad_Blk = 0;
+uint32 num_sad_MB_call = 0;
+uint32 num_sad_Blk_call = 0;
+
+#define NUM_SAD_MB_CALL() num_sad_MB_call++
+#define NUM_SAD_MB() num_sad_MB++
+#define NUM_SAD_BLK_CALL() num_sad_Blk_call++
+#define NUM_SAD_BLK() num_sad_Blk++
+
+#else
+
+#define NUM_SAD_MB_CALL()
+#define NUM_SAD_MB()
+#define NUM_SAD_BLK_CALL()
+#define NUM_SAD_BLK()
+
+#endif
+
+
+/* consist of
+int AVCSAD_Macroblock_C(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+int AVCSAD_MB_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+int AVCSAD_MB_HTFM(uint8 *ref,uint8 *blk,int dmin,int lx,void *extra_info)
+*/
+
+
+/*==================================================================
+ Function: SAD_Macroblock
+ Date: 09/07/2000
+ Purpose: Compute SAD 16x16 between blk and ref.
+ To do: Uniform subsampling will be inserted later!
+ Hypothesis Testing Fast Matching to be used later!
+ Changes:
+ 11/7/00: implemented MMX
+ 1/24/01: implemented SSE
+==================================================================*/
+/********** C ************/
+int AVCSAD_Macroblock_C(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+ (void)(extra_info);
+
+ int32 x10;
+ int dmin = (uint32)dmin_lx >> 16;
+ int lx = dmin_lx & 0xFFFF;
+
+ NUM_SAD_MB_CALL();
+
+ x10 = simd_sad_mb(ref, blk, dmin, lx);
+
+ return x10;
+}
+
+#ifdef HTFM /* HTFM with uniform subsampling implementation 2/28/01 */
+/*===============================================================
+ Function: AVCAVCSAD_MB_HTFM_Collect and AVCSAD_MB_HTFM
+ Date: 3/2/1
+ Purpose: Compute the SAD on a 16x16 block using
+ uniform subsampling and hypothesis testing fast matching
+ for early dropout. SAD_MB_HP_HTFM_Collect is to collect
+ the statistics to compute the thresholds to be used in
+ SAD_MB_HP_HTFM.
+ Input/Output:
+ Changes:
+ ===============================================================*/
+
+int AVCAVCSAD_MB_HTFM_Collect(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+ int i;
+ int sad = 0;
+ uint8 *p1;
+ int lx4 = (dmin_lx << 2) & 0x3FFFC;
+ uint32 cur_word;
+ int saddata[16], tmp, tmp2; /* used when collecting flag (global) is on */
+ int difmad;
+ int madstar;
+ HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+ int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+ uint *countbreak = &(htfm_stat->countbreak);
+ int *offsetRef = htfm_stat->offsetRef;
+
+ madstar = (uint32)dmin_lx >> 20;
+
+ NUM_SAD_MB_CALL();
+
+ blk -= 4;
+ for (i = 0; i < 16; i++)
+ {
+ p1 = ref + offsetRef[i];
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ NUM_SAD_MB();
+
+ saddata[i] = sad;
+
+ if (i > 0)
+ {
+ if ((uint32)sad > ((uint32)dmin_lx >> 16))
+ {
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+ return sad;
+ }
+ }
+ }
+
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+ return sad;
+}
+
+int AVCSAD_MB_HTFM(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info)
+{
+ int sad = 0;
+ uint8 *p1;
+
+ int i;
+ int tmp, tmp2;
+ int lx4 = (dmin_lx << 2) & 0x3FFFC;
+ int sadstar = 0, madstar;
+ int *nrmlz_th = (int*) extra_info;
+ int *offsetRef = (int*) extra_info + 32;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_lx >> 20;
+
+ NUM_SAD_MB_CALL();
+
+ blk -= 4;
+ for (i = 0; i < 16; i++)
+ {
+ p1 = ref + offsetRef[i];
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = (cur_word >> 24) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[8];
+ tmp2 = (cur_word >> 16) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[4];
+ tmp2 = (cur_word >> 8) & 0xFF;
+ sad = SUB_SAD(sad, tmp, tmp2);
+ tmp = p1[0];
+ p1 += lx4;
+ tmp2 = (cur_word & 0xFF);
+ sad = SUB_SAD(sad, tmp, tmp2);
+
+ NUM_SAD_MB();
+
+ sadstar += madstar;
+ if (((uint32)sad <= ((uint32)dmin_lx >> 16)) && (sad <= (sadstar - *nrmlz_th++)))
+ ;
+ else
+ return 65536;
+ }
+
+ return sad;
+}
+#endif /* HTFM */
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp
new file mode 100644
index 0000000..faf2198
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel.cpp
@@ -0,0 +1,629 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/* contains
+int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
+int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width)
+int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh)
+int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width)
+
+int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info)
+*/
+
+#include "avcenc_lib.h"
+#include "sad_halfpel_inline.h"
+
+#ifdef _SAD_STAT
+uint32 num_sad_HP_MB = 0;
+uint32 num_sad_HP_Blk = 0;
+uint32 num_sad_HP_MB_call = 0;
+uint32 num_sad_HP_Blk_call = 0;
+#define NUM_SAD_HP_MB_CALL() num_sad_HP_MB_call++
+#define NUM_SAD_HP_MB() num_sad_HP_MB++
+#define NUM_SAD_HP_BLK_CALL() num_sad_HP_Blk_call++
+#define NUM_SAD_HP_BLK() num_sad_HP_Blk++
+#else
+#define NUM_SAD_HP_MB_CALL()
+#define NUM_SAD_HP_MB()
+#define NUM_SAD_HP_BLK_CALL()
+#define NUM_SAD_HP_BLK()
+#endif
+
+
+
+/*===============================================================
+ Function: SAD_MB_HalfPel
+ Date: 09/17/2000
+ Purpose: Compute the SAD on the half-pel resolution
+ Input/Output: hmem is assumed to be a pointer to the starting
+ point of the search in the 33x33 matrix search region
+ Changes:
+ 11/7/00: implemented MMX
+ ===============================================================*/
+/*==================================================================
+ Function: AVCSAD_MB_HalfPel_C
+ Date: 04/30/2001
+ Purpose: Compute SAD 16x16 between blk and ref in halfpel
+ resolution,
+ Changes:
+ ==================================================================*/
+/* One component is half-pel */
+int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ (void)(extra_info);
+
+ int i, j;
+ int sad = 0;
+ uint8 *kk, *p1, *p2, *p3, *p4;
+// int sumref=0;
+ int temp;
+ int rx = dmin_rx & 0xFFFF;
+
+ NUM_SAD_HP_MB_CALL();
+
+ p1 = ref;
+ p2 = ref + 1;
+ p3 = ref + rx;
+ p4 = ref + rx + 1;
+ kk = blk;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+
+ temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++;
+ sad += AVC_ABS(temp);
+ }
+
+ NUM_SAD_HP_MB();
+
+ if (sad > (int)((uint32)dmin_rx >> 16))
+ return sad;
+
+ p1 += rx;
+ p3 += rx;
+ p2 += rx;
+ p4 += rx;
+ }
+ return sad;
+}
+
+int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ (void)(extra_info);
+
+ int i, j;
+ int sad = 0;
+ uint8 *kk, *p1, *p2;
+// int sumref=0;
+ int temp;
+ int rx = dmin_rx & 0xFFFF;
+
+ NUM_SAD_HP_MB_CALL();
+
+ p1 = ref;
+ p2 = ref + rx; /* either left/right or top/bottom pixel */
+ kk = blk;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+
+ temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++;
+ sad += AVC_ABS(temp);
+ }
+
+ NUM_SAD_HP_MB();
+
+ if (sad > (int)((uint32)dmin_rx >> 16))
+ return sad;
+ p1 += rx;
+ p2 += rx;
+ }
+ return sad;
+}
+
+int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ (void)(extra_info);
+
+ int i, j;
+ int sad = 0;
+ uint8 *kk, *p1;
+ int temp;
+ int rx = dmin_rx & 0xFFFF;
+
+ NUM_SAD_HP_MB_CALL();
+
+ p1 = ref;
+ kk = blk;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+
+ temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++;
+ sad += AVC_ABS(temp);
+ }
+
+ NUM_SAD_HP_MB();
+
+ if (sad > (int)((uint32)dmin_rx >> 16))
+ return sad;
+ p1 += rx;
+ }
+ return sad;
+}
+
+#ifdef HTFM /* HTFM with uniform subsampling implementation, 2/28/01 */
+
+//Checheck here
+int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0;
+ uint8 *p1, *p2;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int saddata[16]; /* used when collecting flag (global) is on */
+ int difmad, tmp, tmp2;
+ int madstar;
+ HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+ int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+ UInt *countbreak = &(htfm_stat->countbreak);
+ int *offsetRef = htfm_stat->offsetRef;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+ p2 = p1 + rx;
+
+ j = 4;/* 4 lines */
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12] + p2[12];
+ tmp2 = p1[13] + p2[13];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 24) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8] + p2[8];
+ tmp2 = p1[9] + p2[9];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 16) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4] + p2[4];
+ tmp2 = p1[5] + p2[5];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 8) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp2 = p1[1] + p2[1];
+ tmp = p1[0] + p2[0];
+ p1 += refwx4;
+ p2 += refwx4;
+ tmp += tmp2;
+ tmp2 = (cur_word & 0xFF);
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+
+ saddata[i] = sad;
+
+ if (i > 0)
+ {
+ if (sad > ((uint32)dmin_rx >> 16))
+ {
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+ return sad;
+ }
+ }
+ }
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+
+ return sad;
+}
+
+int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0;
+ uint8 *p1, *p2;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int saddata[16]; /* used when collecting flag (global) is on */
+ int difmad, tmp, tmp2;
+ int madstar;
+ HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+ int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+ UInt *countbreak = &(htfm_stat->countbreak);
+ int *offsetRef = htfm_stat->offsetRef;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+ p2 = p1 + rx;
+ j = 4;
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = p2[12];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 24) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8];
+ tmp2 = p2[8];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 16) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4];
+ tmp2 = p2[4];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 8) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[0];
+ p1 += refwx4;
+ tmp2 = p2[0];
+ p2 += refwx4;
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word & 0xFF);
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+
+ saddata[i] = sad;
+
+ if (i > 0)
+ {
+ if (sad > ((uint32)dmin_rx >> 16))
+ {
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+ return sad;
+ }
+ }
+ }
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+
+ return sad;
+}
+
+int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0;
+ uint8 *p1;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int saddata[16]; /* used when collecting flag (global) is on */
+ int difmad, tmp, tmp2;
+ int madstar;
+ HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info;
+ int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg);
+ UInt *countbreak = &(htfm_stat->countbreak);
+ int *offsetRef = htfm_stat->offsetRef;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+
+ j = 4; /* 4 lines */
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = p1[13];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 24) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8];
+ tmp2 = p1[9];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 16) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4];
+ tmp2 = p1[5];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 8) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[0];
+ tmp2 = p1[1];
+ p1 += refwx4;
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word & 0xFF);
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+
+ saddata[i] = sad;
+
+ if (i > 0)
+ {
+ if (sad > ((uint32)dmin_rx >> 16))
+ {
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+ return sad;
+ }
+ }
+ }
+ difmad = saddata[0] - ((saddata[1] + 1) >> 1);
+ (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad);
+ (*countbreak)++;
+
+ return sad;
+}
+
+int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0, tmp, tmp2;
+ uint8 *p1, *p2;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int sadstar = 0, madstar;
+ int *nrmlz_th = (int*) extra_info;
+ int *offsetRef = nrmlz_th + 32;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+ p2 = p1 + rx;
+
+ j = 4; /* 4 lines */
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12] + p2[12];
+ tmp2 = p1[13] + p2[13];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 24) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8] + p2[8];
+ tmp2 = p1[9] + p2[9];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 16) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4] + p2[4];
+ tmp2 = p1[5] + p2[5];
+ tmp += tmp2;
+ tmp2 = (cur_word >> 8) & 0xFF;
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ tmp2 = p1[1] + p2[1];
+ tmp = p1[0] + p2[0];
+ p1 += refwx4;
+ p2 += refwx4;
+ tmp += tmp2;
+ tmp2 = (cur_word & 0xFF);
+ tmp += 2;
+ sad = INTERP2_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+
+ sadstar += madstar;
+ if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+ {
+ return 65536;
+ }
+ }
+
+ return sad;
+}
+
+int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0, tmp, tmp2;
+ uint8 *p1, *p2;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int sadstar = 0, madstar;
+ int *nrmlz_th = (int*) extra_info;
+ int *offsetRef = nrmlz_th + 32;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+ p2 = p1 + rx;
+ j = 4;
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = p2[12];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 24) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8];
+ tmp2 = p2[8];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 16) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4];
+ tmp2 = p2[4];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 8) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[0];
+ p1 += refwx4;
+ tmp2 = p2[0];
+ p2 += refwx4;
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word & 0xFF);
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+ sadstar += madstar;
+ if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+ {
+ return 65536;
+ }
+ }
+
+ return sad;
+}
+
+int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info)
+{
+ int i, j;
+ int sad = 0, tmp, tmp2;
+ uint8 *p1;
+ int rx = dmin_rx & 0xFFFF;
+ int refwx4 = rx << 2;
+ int sadstar = 0, madstar;
+ int *nrmlz_th = (int*) extra_info;
+ int *offsetRef = nrmlz_th + 32;
+ uint32 cur_word;
+
+ madstar = (uint32)dmin_rx >> 20;
+
+ NUM_SAD_HP_MB_CALL();
+
+ blk -= 4;
+
+ for (i = 0; i < 16; i++) /* 16 stages */
+ {
+ p1 = ref + offsetRef[i];
+
+ j = 4;/* 4 lines */
+ do
+ {
+ cur_word = *((uint32*)(blk += 4));
+ tmp = p1[12];
+ tmp2 = p1[13];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 24) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[8];
+ tmp2 = p1[9];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 16) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[4];
+ tmp2 = p1[5];
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word >> 8) & 0xFF;
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ tmp = p1[0];
+ tmp2 = p1[1];
+ p1 += refwx4;
+ tmp++;
+ tmp2 += tmp;
+ tmp = (cur_word & 0xFF);
+ sad = INTERP1_SUB_SAD(sad, tmp, tmp2);;
+ }
+ while (--j);
+
+ NUM_SAD_HP_MB();
+
+ sadstar += madstar;
+ if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16))
+ {
+ return 65536;
+ }
+ }
+
+ return sad;
+}
+
+#endif /* HTFM */
+
+
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h
new file mode 100644
index 0000000..3a21647
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_halfpel_inline.h
@@ -0,0 +1,96 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+
+#ifndef _SAD_HALFPEL_INLINE_H_
+#define _SAD_HALFPEL_INLINE_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+ __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ tmp = (tmp2 >> 1) - tmp;
+ if (tmp > 0) sad += tmp;
+ else sad -= tmp;
+
+ return sad;
+ }
+
+ __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ tmp = (tmp >> 2) - tmp2;
+ if (tmp > 0) sad += tmp;
+ else sad -= tmp;
+
+ return sad;
+ }
+
+#elif defined(__CC_ARM) /* only work with arm v5 */
+
+ __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ __asm
+ {
+ rsbs tmp, tmp, tmp2, asr #1 ;
+ rsbmi tmp, tmp, #0 ;
+ add sad, sad, tmp ;
+ }
+
+ return sad;
+ }
+
+ __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ __asm
+ {
+ rsbs tmp, tmp2, tmp, asr #2 ;
+ rsbmi tmp, tmp, #0 ;
+ add sad, sad, tmp ;
+ }
+
+ return sad;
+ }
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+ __inline int32 INTERP1_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+__asm__ volatile("rsbs %1, %1, %2, asr #1\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2));
+
+ return sad;
+ }
+
+ __inline int32 INTERP2_SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+__asm__ volatile("rsbs %1, %2, %1, asr #2\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad), "=r"(tmp): "r"(tmp2));
+
+ return sad;
+ }
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_SAD_HALFPEL_INLINE_H_
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_inline.h b/media/libstagefright/codecs/avc/enc/src/sad_inline.h
new file mode 100644
index 0000000..f39794f
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_inline.h
@@ -0,0 +1,488 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#ifndef _SAD_INLINE_H_
+#define _SAD_INLINE_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ tmp = tmp - tmp2;
+ if (tmp > 0) sad += tmp;
+ else sad -= tmp;
+
+ return sad;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ x7 = src2 ^ src1; /* check odd/even combination */
+ if ((uint32)src2 >= (uint32)src1)
+ {
+ src1 = src2 - src1; /* subs */
+ }
+ else
+ {
+ src1 = src1 - src2;
+ }
+ x7 = x7 ^ src1; /* only odd bytes need to add carry */
+ x7 = mask & ((uint32)x7 >> 1);
+ x7 = (x7 << 8) - x7;
+ src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */
+ src1 = src1 ^(x7 >> 7); /* take absolute value of negative byte */
+
+ return src1;
+ }
+
+#define NUMBER 3
+#define SHIFT 24
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+ {
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+
+ x8 = (uint32)ref & 0x3;
+ if (x8 == 3)
+ goto SadMBOffset3;
+ if (x8 == 2)
+ goto SadMBOffset2;
+ if (x8 == 1)
+ goto SadMBOffset1;
+
+// x5 = (x4<<8)-x4; /* x5 = x4*255; */
+ x4 = x5 = 0;
+
+ x6 = 0xFFFF00FF;
+
+ ref -= lx;
+ blk -= 16;
+
+ x8 = 16;
+
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref += lx));
+ x11 = *((uint32*)(ref + 4));
+ x12 = *((uint32*)(blk += 16));
+ x14 = *((uint32*)(blk + 4));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref + 8));
+ x11 = *((uint32*)(ref + 12));
+ x12 = *((uint32*)(blk + 8));
+ x14 = *((uint32*)(blk + 12));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+ goto LOOP_SAD0;
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin);
+
+ }
+
+#elif defined(__CC_ARM) /* only work with arm v5 */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ __asm
+ {
+ rsbs tmp, tmp, tmp2 ;
+ rsbmi tmp, tmp, #0 ;
+ add sad, sad, tmp ;
+ }
+
+ return sad;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ __asm
+ {
+ EOR x7, src2, src1; /* check odd/even combination */
+ SUBS src1, src2, src1;
+ EOR x7, x7, src1;
+ AND x7, mask, x7, lsr #1;
+ ORRCC x7, x7, #0x80000000;
+ RSB x7, x7, x7, lsl #8;
+ ADD src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */
+ EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */
+ }
+
+ return src1;
+ }
+
+ __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ __asm
+ {
+ EOR x7, src2, src1; /* check odd/even combination */
+ ADDS src1, src2, src1;
+ EOR x7, x7, src1; /* only odd bytes need to add carry */
+ ANDS x7, mask, x7, rrx;
+ RSB x7, x7, x7, lsl #8;
+ SUB src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */
+ EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */
+ }
+
+ return src1;
+ }
+
+#define sum_accumulate __asm{ SBC x5, x5, x10; /* accumulate low bytes */ \
+ BIC x10, x6, x10; /* x10 & 0xFF00FF00 */ \
+ ADD x4, x4, x10,lsr #8; /* accumulate high bytes */ \
+ SBC x5, x5, x11; /* accumulate low bytes */ \
+ BIC x11, x6, x11; /* x11 & 0xFF00FF00 */ \
+ ADD x4, x4, x11,lsr #8; } /* accumulate high bytes */
+
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+ {
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+
+ __asm
+ {
+ MOVS x8, ref, lsl #31 ;
+ BHI SadMBOffset3;
+ BCS SadMBOffset2;
+ BMI SadMBOffset1;
+
+ MVN x6, #0xFF00;
+ }
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+ x12 = *((int32*)(blk + 8));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ __asm
+ {
+ /****** process 8 pixels ******/
+ LDR x11, [ref, #4];
+ LDR x10, [ref], lx ;
+ LDR x14, [blk, #4];
+ LDR x12, [blk], #16 ;
+ }
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ __asm
+ {
+ /****************/
+ RSBS x11, dmin, x10, lsr #16;
+ ADDLSS x8, x8, #0x10000001;
+ BLS LOOP_SAD0;
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin, x8);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin, x8);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin, x8);
+ }
+
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+__asm__ volatile("rsbs %1, %1, %2\n\trsbmi %1, %1, #0\n\tadd %0, %0, %1": "=r"(sad): "r"(tmp), "r"(tmp2));
+ return sad;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+__asm__ volatile("EOR %1, %2, %0\n\tSUBS %0, %2, %0\n\tEOR %1, %1, %0\n\tAND %1, %3, %1, lsr #1\n\tORRCC %1, %1, #0x80000000\n\tRSB %1, %1, %1, lsl #8\n\tADD %0, %0, %1, asr #7\n\tEOR %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask));
+
+ return src1;
+ }
+
+ __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+__asm__ volatile("EOR %1, %2, %0\n\tADDS %0, %2, %0\n\tEOR %1, %1, %0\n\tANDS %1, %3, %1, rrx\n\tRSB %1, %1, %1, lsl #8\n\tSUB %0, %0, %1, asr #7\n\tEOR %0, %0, %1, asr #7": "=r"(src1), "=&r"(x7): "r"(src2), "r"(mask));
+
+ return src1;
+ }
+
+#define sum_accumulate __asm__ volatile("SBC %0, %0, %1\n\tBIC %1, %4, %1\n\tADD %2, %2, %1, lsr #8\n\tSBC %0, %0, %3\n\tBIC %3, %4, %3\n\tADD %2, %2, %3, lsr #8": "=&r" (x5), "=&r" (x10), "=&r" (x4), "=&r" (x11): "r" (x6));
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(uint8 *ref, uint8 *blk, int dmin, int lx)
+ {
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+
+ x8 = (uint32)ref & 0x3;
+ if (x8 == 3)
+ goto SadMBOffset3;
+ if (x8 == 2)
+ goto SadMBOffset2;
+ if (x8 == 1)
+ goto SadMBOffset1;
+
+ x8 = 16;
+///
+__asm__ volatile("MVN %0, #0xFF00": "=r"(x6));
+
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+ x12 = *((int32*)(blk + 8));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 4));
+__asm__ volatile("LDR %0, [%1], %2": "=&r"(x10), "=r"(ref): "r"(lx));
+ //x10 = *((int32*)ref); ref+=lx;
+ x14 = *((int32*)(blk + 4));
+__asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "=r"(blk));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ /****************/
+
+ if (((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+ goto LOOP_SAD0;
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin);
+ }
+
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SAD_INLINE_H_
+
diff --git a/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h
new file mode 100644
index 0000000..d5d4a42
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/sad_mb_offset.h
@@ -0,0 +1,311 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin)
+#endif
+{
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ // x5 = (x4<<8) - x4;
+ x4 = x5 = 0;
+ x6 = 0xFFFF00FF;
+ x9 = 0x80808080; /* const. */
+ ref -= NUMBER; /* bic ref, ref, #3 */
+ ref -= lx;
+ blk -= 16;
+ x8 = 16;
+
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref += lx)); /* D C B A */
+ x11 = *((uint32*)(ref + 4)); /* H G F E */
+ x12 = *((uint32*)(ref + 8)); /* L K J I */
+
+ x10 = ((uint32)x10 >> SHIFT); /* 0 0 0 D */
+ x10 = x10 | (x11 << (32 - SHIFT)); /* G F E D */
+ x11 = ((uint32)x11 >> SHIFT); /* 0 0 0 H */
+ x11 = x11 | (x12 << (32 - SHIFT)); /* K J I H */
+
+ x12 = *((uint32*)(blk += 16));
+ x14 = *((uint32*)(blk + 4));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref + 8)); /* D C B A */
+ x11 = *((uint32*)(ref + 12)); /* H G F E */
+ x12 = *((uint32*)(ref + 16)); /* L K J I */
+
+ x10 = ((uint32)x10 >> SHIFT); /* mvn x10, x10, lsr #24 = 0xFF 0xFF 0xFF ~D */
+ x10 = x10 | (x11 << (32 - SHIFT)); /* bic x10, x10, x11, lsl #8 = ~G ~F ~E ~D */
+ x11 = ((uint32)x11 >> SHIFT); /* 0xFF 0xFF 0xFF ~H */
+ x11 = x11 | (x12 << (32 - SHIFT)); /* ~K ~J ~I ~H */
+
+ x12 = *((uint32*)(blk + 8));
+ x14 = *((uint32*)(blk + 12));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ if ((int)((uint32)x10 >> 16) <= dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+#if (NUMBER==3)
+ goto LOOP_SAD3;
+#elif (NUMBER==2)
+ goto LOOP_SAD2;
+#elif (NUMBER==1)
+ goto LOOP_SAD1;
+#endif
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+}
+
+#elif defined(__CC_ARM) /* only work with arm v5 */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin, int32 x8)
+#endif
+{
+ int32 x4, x5, x6, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+
+ __asm{
+ MVN x6, #0xff0000;
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+ BIC ref, ref, #3;
+ }
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x12 = *((int32*)(ref + 16));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+
+ __asm{
+ MVN x10, x10, lsr #SHIFT;
+ BIC x10, x10, x11, lsl #(32-SHIFT);
+ MVN x11, x11, lsr #SHIFT;
+ BIC x11, x11, x12, lsl #(32-SHIFT);
+
+ LDR x12, [blk, #8];
+ }
+
+ /* process x11 & x14 */
+ x11 = sad_4pixelN(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixelN(x10, x12, x9);
+
+ sum_accumulate;
+
+ __asm{
+ /****** process 8 pixels ******/
+ LDR x11, [ref, #4];
+ LDR x12, [ref, #8];
+ LDR x10, [ref], lx ;
+ LDR x14, [blk, #4];
+
+ MVN x10, x10, lsr #SHIFT;
+ BIC x10, x10, x11, lsl #(32-SHIFT);
+ MVN x11, x11, lsr #SHIFT;
+ BIC x11, x11, x12, lsl #(32-SHIFT);
+
+ LDR x12, [blk], #16;
+ }
+
+ /* process x11 & x14 */
+ x11 = sad_4pixelN(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixelN(x10, x12, x9);
+
+ sum_accumulate;
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ __asm{
+ RSBS x11, dmin, x10, lsr #16
+ ADDLSS x8, x8, #INC_X8
+#if (NUMBER==3)
+ BLS LOOP_SAD3;
+#elif (NUMBER==2)
+BLS LOOP_SAD2;
+#elif (NUMBER==1)
+BLS LOOP_SAD1;
+#endif
+ }
+
+ return ((uint32)x10 >> 16);
+}
+
+#elif defined(__GNUC__) && defined(__arm__) /* ARM GNU COMPILER */
+
+#if (NUMBER==3)
+__inline int32 sad_mb_offset3(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==2)
+__inline int32 sad_mb_offset2(uint8 *ref, uint8 *blk, int lx, int dmin)
+#elif (NUMBER==1)
+__inline int32 sad_mb_offset1(uint8 *ref, uint8 *blk, int lx, int dmin)
+#endif
+{
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+ x8 = 16; //<<===========*******
+
+__asm__ volatile("MVN %0, #0xFF0000": "=r"(x6));
+
+#if (NUMBER==3)
+LOOP_SAD3:
+#elif (NUMBER==2)
+LOOP_SAD2:
+#elif (NUMBER==1)
+LOOP_SAD1:
+#endif
+__asm__ volatile("BIC %0, %0, #3": "=r"(ref));
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x12 = *((int32*)(ref + 16));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+
+#if (SHIFT==8)
+__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==16)
+__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==24)
+__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12));
+#endif
+
+ x12 = *((int32*)(blk + 8));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixelN(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixelN(x10, x12, x9);
+
+ sum_accumulate;
+
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 4));
+ x12 = *((int32*)(ref + 8));
+ x10 = *((int32*)ref); ref += lx;
+ x14 = *((int32*)(blk + 4));
+
+#if (SHIFT==8)
+__asm__ volatile("MVN %0, %0, lsr #8\n\tBIC %0, %0, %1,lsl #24\n\tMVN %1, %1,lsr #8\n\tBIC %1, %1, %2,lsl #24": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==16)
+__asm__ volatile("MVN %0, %0, lsr #16\n\tBIC %0, %0, %1,lsl #16\n\tMVN %1, %1,lsr #16\n\tBIC %1, %1, %2,lsl #16": "=&r"(x10), "=&r"(x11): "r"(x12));
+#elif (SHIFT==24)
+__asm__ volatile("MVN %0, %0, lsr #24\n\tBIC %0, %0, %1,lsl #8\n\tMVN %1, %1,lsr #24\n\tBIC %1, %1, %2,lsl #8": "=&r"(x10), "=&r"(x11): "r"(x12));
+#endif
+__asm__ volatile("LDR %0, [%1], #16": "=&r"(x12), "=r"(blk));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixelN(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixelN(x10, x12, x9);
+
+ sum_accumulate;
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+#if (NUMBER==3)
+ goto LOOP_SAD3;
+#elif (NUMBER==2)
+goto LOOP_SAD2;
+#elif (NUMBER==1)
+goto LOOP_SAD1;
+#endif
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+}
+
+#endif
+
diff --git a/media/libstagefright/codecs/avc/enc/src/slice.cpp b/media/libstagefright/codecs/avc/enc/src/slice.cpp
new file mode 100644
index 0000000..f6d066e
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/slice.cpp
@@ -0,0 +1,1025 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+
+AVCEnc_Status AVCEncodeSlice(AVCEncObject *encvid)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ AVCCommonObj *video = encvid->common;
+ AVCPicParamSet *pps = video->currPicParams;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ AVCMacroblock *currMB ;
+ AVCEncBitstream *stream = encvid->bitstream;
+ uint slice_group_id;
+ int CurrMbAddr, slice_type;
+
+ slice_type = video->slice_type;
+
+ /* set the first mb in slice */
+ video->mbNum = CurrMbAddr = sliceHdr->first_mb_in_slice;// * (1+video->MbaffFrameFlag);
+ slice_group_id = video->MbToSliceGroupMap[CurrMbAddr];
+
+ video->mb_skip_run = 0;
+
+ /* while loop , see subclause 7.3.4 */
+ while (1)
+ {
+ video->mbNum = CurrMbAddr;
+ currMB = video->currMB = &(video->mblock[CurrMbAddr]);
+ currMB->slice_id = video->slice_id; // for deblocking
+
+ video->mb_x = CurrMbAddr % video->PicWidthInMbs;
+ video->mb_y = CurrMbAddr / video->PicWidthInMbs;
+
+ /* initialize QP for this MB here*/
+ /* calculate currMB->QPy */
+ RCInitMBQP(encvid);
+
+ /* check the availability of neighboring macroblocks */
+ InitNeighborAvailability(video, CurrMbAddr);
+
+ /* Assuming that InitNeighborAvailability has been called prior to this function */
+ video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
+ /* this is necessary for all subsequent intra search */
+
+ if (!video->currPicParams->constrained_intra_pred_flag)
+ {
+ video->intraAvailA = video->mbAvailA;
+ video->intraAvailB = video->mbAvailB;
+ video->intraAvailC = video->mbAvailC;
+ video->intraAvailD = video->mbAvailD;
+ }
+ else
+ {
+ if (video->mbAvailA)
+ {
+ video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
+ }
+ if (video->mbAvailB)
+ {
+ video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
+ }
+ if (video->mbAvailC)
+ {
+ video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
+ }
+ if (video->mbAvailD)
+ {
+ video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
+ }
+ }
+
+ /* encode_one_macroblock() */
+ status = EncodeMB(encvid);
+ if (status != AVCENC_SUCCESS)
+ {
+ break;
+ }
+
+ /* go to next MB */
+ CurrMbAddr++;
+
+ while ((uint)video->MbToSliceGroupMap[CurrMbAddr] != slice_group_id &&
+ (uint)CurrMbAddr < video->PicSizeInMbs)
+ {
+ CurrMbAddr++;
+ }
+
+ if ((uint)CurrMbAddr >= video->PicSizeInMbs)
+ {
+ /* end of slice, return, but before that check to see if there are other slices
+ to be encoded. */
+ encvid->currSliceGroup++;
+ if (encvid->currSliceGroup > (int)pps->num_slice_groups_minus1) /* no more slice group */
+ {
+ status = AVCENC_PICTURE_READY;
+ break;
+ }
+ else
+ {
+ /* find first_mb_num for the next slice */
+ CurrMbAddr = 0;
+ while (video->MbToSliceGroupMap[CurrMbAddr] != encvid->currSliceGroup &&
+ (uint)CurrMbAddr < video->PicSizeInMbs)
+ {
+ CurrMbAddr++;
+ }
+ if ((uint)CurrMbAddr >= video->PicSizeInMbs)
+ {
+ status = AVCENC_SLICE_EMPTY; /* error, one slice group has no MBs in it */
+ }
+
+ video->mbNum = CurrMbAddr;
+ status = AVCENC_SUCCESS;
+ break;
+ }
+ }
+ }
+
+ if (video->mb_skip_run > 0)
+ {
+ /* write skip_run */
+ if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE)
+ {
+ ue_v(stream, video->mb_skip_run);
+ video->mb_skip_run = 0;
+ }
+ else /* shouldn't happen */
+ {
+ status = AVCENC_FAIL;
+ }
+ }
+
+ return status;
+}
+
+
+AVCEnc_Status EncodeMB(AVCEncObject *encvid)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ AVCCommonObj *video = encvid->common;
+ AVCPictureData *currPic = video->currPic;
+ AVCFrameIO *currInput = encvid->currInput;
+ AVCMacroblock *currMB = video->currMB;
+ AVCMacroblock *MB_A, *MB_B;
+ AVCEncBitstream *stream = encvid->bitstream;
+ AVCRateControl *rateCtrl = encvid->rateCtrl;
+ uint8 *cur, *curL, *curCb, *curCr;
+ uint8 *orgL, *orgCb, *orgCr, *org4;
+ int CurrMbAddr = video->mbNum;
+ int picPitch = currPic->pitch;
+ int orgPitch = currInput->pitch;
+ int x_position = (video->mb_x << 4);
+ int y_position = (video->mb_y << 4);
+ int offset;
+ int b8, b4, blkidx;
+ AVCResidualType resType;
+ int slice_type;
+ int numcoeff; /* output from residual_block_cavlc */
+ int cost16, cost8;
+
+ int num_bits, start_mb_bits, start_text_bits;
+
+ slice_type = video->slice_type;
+
+ /* now, point to the reconstructed frame */
+ offset = y_position * picPitch + x_position;
+ curL = currPic->Sl + offset;
+ orgL = currInput->YCbCr[0] + offset;
+ offset = (offset + x_position) >> 2;
+ curCb = currPic->Scb + offset;
+ curCr = currPic->Scr + offset;
+ orgCb = currInput->YCbCr[1] + offset;
+ orgCr = currInput->YCbCr[2] + offset;
+
+ if (orgPitch != picPitch)
+ {
+ offset = y_position * (orgPitch - picPitch);
+ orgL += offset;
+ offset >>= 2;
+ orgCb += offset;
+ orgCr += offset;
+ }
+
+ /******* determine MB prediction mode *******/
+ if (encvid->intraSearch[CurrMbAddr])
+ {
+ MBIntraSearch(encvid, CurrMbAddr, curL, picPitch);
+ }
+ /******* This part should be determined somehow ***************/
+ if (currMB->mbMode == AVC_I_PCM)
+ {
+ /* write down mb_type and PCM data */
+ /* and copy from currInput to currPic */
+ status = EncodeIntraPCM(encvid);
+
+
+ return status;
+ }
+
+ /****** for intra prediction, pred is already done *******/
+ /****** for I4, the recon is ready and Xfrm coefs are ready to be encoded *****/
+
+ //RCCalculateMAD(encvid,currMB,orgL,orgPitch); // no need to re-calculate MAD for Intra
+ // not used since totalSAD is used instead
+
+ /* compute the prediction */
+ /* output is video->pred_block */
+ if (!currMB->mb_intra)
+ {
+ AVCMBMotionComp(encvid, video); /* perform prediction and residue calculation */
+ /* we can do the loop here and call dct_luma */
+ video->pred_pitch = picPitch;
+ currMB->CBP = 0;
+ cost16 = 0;
+ cur = curL;
+ org4 = orgL;
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ cost8 = 0;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ blkidx = blkIdx2blkXY[b8][b4];
+ video->pred_block = cur;
+ numcoeff = dct_luma(encvid, blkidx, cur, org4, &cost8);
+ currMB->nz_coeff[blkidx] = numcoeff;
+ if (numcoeff)
+ {
+ video->cbp4x4 |= (1 << blkidx);
+ currMB->CBP |= (1 << b8);
+ }
+
+ if (b4&1)
+ {
+ cur += ((picPitch << 2) - 4);
+ org4 += ((orgPitch << 2) - 4);
+ }
+ else
+ {
+ cur += 4;
+ org4 += 4;
+ }
+ }
+
+ /* move the IDCT part out of dct_luma to accommodate the check
+ for coeff_cost. */
+
+ if ((currMB->CBP&(1 << b8)) && (cost8 <= _LUMA_COEFF_COST_))
+ {
+ cost8 = 0; // reset it
+
+ currMB->CBP ^= (1 << b8);
+ blkidx = blkIdx2blkXY[b8][0];
+
+ currMB->nz_coeff[blkidx] = 0;
+ currMB->nz_coeff[blkidx+1] = 0;
+ currMB->nz_coeff[blkidx+4] = 0;
+ currMB->nz_coeff[blkidx+5] = 0;
+ }
+
+ cost16 += cost8;
+
+ if (b8&1)
+ {
+ cur -= 8;
+ org4 -= 8;
+ }
+ else
+ {
+ cur += (8 - (picPitch << 3));
+ org4 += (8 - (orgPitch << 3));
+ }
+ }
+
+ /* after the whole MB, we do another check for coeff_cost */
+ if ((currMB->CBP&0xF) && (cost16 <= _LUMA_MB_COEFF_COST_))
+ {
+ currMB->CBP = 0; // reset it to zero
+ memset(currMB->nz_coeff, 0, sizeof(uint8)*16);
+ }
+
+ // now we do IDCT
+ MBInterIdct(video, curL, currMB, picPitch);
+
+// video->pred_block = video->pred + 256;
+ }
+ else /* Intra prediction */
+ {
+ encvid->numIntraMB++;
+
+ if (currMB->mbMode == AVC_I16) /* do prediction for the whole macroblock */
+ {
+ currMB->CBP = 0;
+ /* get the prediction from encvid->pred_i16 */
+ dct_luma_16x16(encvid, curL, orgL);
+ }
+ video->pred_block = encvid->pred_ic[currMB->intra_chroma_pred_mode];
+ }
+
+ /* chrominance */
+ /* not need to do anything, the result is in encvid->pred_ic
+ chroma dct must be aware that prediction block can come from either intra or inter. */
+
+ dct_chroma(encvid, curCb, orgCb, 0);
+
+ dct_chroma(encvid, curCr, orgCr, 1);
+
+
+ /* 4.1 if there's nothing in there, video->mb_skip_run++ */
+ /* 4.2 if coded, check if there is a run of skipped MB, encodes it,
+ set video->QPyprev = currMB->QPy; */
+
+ /* 5. vlc encode */
+
+ /* check for skipped macroblock, INTER only */
+ if (!currMB->mb_intra)
+ {
+ /* decide whether this MB (for inter MB) should be skipped if there's nothing left. */
+ if (!currMB->CBP && currMB->NumMbPart == 1 && currMB->QPy == video->QPy)
+ {
+ if (currMB->MBPartPredMode[0][0] == AVC_Pred_L0 && currMB->ref_idx_L0[0] == 0)
+ {
+ MB_A = &video->mblock[video->mbAddrA];
+ MB_B = &video->mblock[video->mbAddrB];
+
+ if (!video->mbAvailA || !video->mbAvailB)
+ {
+ if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/
+ {
+ currMB->mbMode = AVC_SKIP;
+ video->mvd_l0[0][0][0] = 0;
+ video->mvd_l0[0][0][1] = 0;
+ }
+ }
+ else
+ {
+ if ((MB_A->ref_idx_L0[1] == 0 && MB_A->mvL0[3] == 0) ||
+ (MB_B->ref_idx_L0[2] == 0 && MB_B->mvL0[12] == 0))
+ {
+ if (currMB->mvL0[0] == 0) /* both mv components are zeros.*/
+ {
+ currMB->mbMode = AVC_SKIP;
+ video->mvd_l0[0][0][0] = 0;
+ video->mvd_l0[0][0][1] = 0;
+ }
+ }
+ else if (video->mvd_l0[0][0][0] == 0 && video->mvd_l0[0][0][1] == 0)
+ {
+ currMB->mbMode = AVC_SKIP;
+ }
+ }
+ }
+
+ if (currMB->mbMode == AVC_SKIP)
+ {
+ video->mb_skip_run++;
+
+ /* set parameters */
+ /* not sure whether we need the followings */
+ if (slice_type == AVC_P_SLICE)
+ {
+ currMB->mbMode = AVC_SKIP;
+ currMB->MbPartWidth = currMB->MbPartHeight = 16;
+ currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
+ currMB->NumMbPart = 1;
+ currMB->NumSubMbPart[0] = currMB->NumSubMbPart[1] =
+ currMB->NumSubMbPart[2] = currMB->NumSubMbPart[3] = 1;
+ currMB->SubMbPartWidth[0] = currMB->SubMbPartWidth[1] =
+ currMB->SubMbPartWidth[2] = currMB->SubMbPartWidth[3] = currMB->MbPartWidth;
+ currMB->SubMbPartHeight[0] = currMB->SubMbPartHeight[1] =
+ currMB->SubMbPartHeight[2] = currMB->SubMbPartHeight[3] = currMB->MbPartHeight;
+
+ }
+ else if (slice_type == AVC_B_SLICE)
+ {
+ currMB->mbMode = AVC_SKIP;
+ currMB->MbPartWidth = currMB->MbPartHeight = 8;
+ currMB->MBPartPredMode[0][0] = AVC_Direct;
+ currMB->NumMbPart = -1;
+ }
+
+ /* for skipped MB, always look at the first entry in RefPicList */
+ currMB->RefIdx[0] = currMB->RefIdx[1] =
+ currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[0]->RefIdx;
+
+ /* do not return yet, need to do some copies */
+ }
+ }
+ }
+ /* non-skipped MB */
+
+
+ /************* START ENTROPY CODING *************************/
+
+ start_mb_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+ /* encode mb_type, mb_pred, sub_mb_pred, CBP */
+ if (slice_type != AVC_I_SLICE && slice_type != AVC_SI_SLICE && currMB->mbMode != AVC_SKIP)
+ {
+ //if(!pps->entropy_coding_mode_flag) ALWAYS true
+ {
+ ue_v(stream, video->mb_skip_run);
+ video->mb_skip_run = 0;
+ }
+ }
+
+ if (currMB->mbMode != AVC_SKIP)
+ {
+ status = EncodeMBHeader(currMB, encvid);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+
+ start_text_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+ /**** now decoding part *******/
+ resType = AVC_Luma;
+
+ /* DC transform for luma I16 mode */
+ if (currMB->mbMode == AVC_I16)
+ {
+ /* vlc encode level/run */
+ status = enc_residual_block(encvid, AVC_Intra16DC, encvid->numcoefdc, currMB);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ resType = AVC_Intra16AC;
+ }
+
+ /* VLC encoding for luma */
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ if (currMB->CBP&(1 << b8))
+ {
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ /* vlc encode level/run */
+ status = enc_residual_block(encvid, resType, (b8 << 2) + b4, currMB);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+ }
+ }
+
+ /* chroma */
+ if (currMB->CBP & (3 << 4)) /* chroma DC residual present */
+ {
+ for (b8 = 0; b8 < 2; b8++) /* for iCbCr */
+ {
+ /* vlc encode level/run */
+ status = enc_residual_block(encvid, AVC_ChromaDC, encvid->numcoefcdc[b8] + (b8 << 3), currMB);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+ }
+
+ if (currMB->CBP & (2 << 4))
+ {
+ /* AC part */
+ for (b8 = 0; b8 < 2; b8++) /* for iCbCr */
+ {
+ for (b4 = 0; b4 < 4; b4++) /* for each block inside Cb or Cr */
+ {
+ /* vlc encode level/run */
+ status = enc_residual_block(encvid, AVC_ChromaAC, 16 + (b8 << 2) + b4, currMB);
+ if (status != AVCENC_SUCCESS)
+ {
+ return status;
+ }
+ }
+ }
+ }
+
+
+ num_bits = 32 + (encvid->bitstream->write_pos << 3) - encvid->bitstream->bit_left;
+
+ RCPostMB(video, rateCtrl, start_text_bits - start_mb_bits,
+ num_bits - start_text_bits);
+
+// num_bits -= start_mb_bits;
+// fprintf(fdebug,"MB #%d: %d bits\n",CurrMbAddr,num_bits);
+// fclose(fdebug);
+ return status;
+}
+
+/* copy the content from predBlock back to the reconstructed YUV frame */
+void Copy_MB(uint8 *curL, uint8 *curCb, uint8 *curCr, uint8 *predBlock, int picPitch)
+{
+ int j, offset;
+ uint32 *dst, *dst2, *src;
+
+ dst = (uint32*)curL;
+ src = (uint32*)predBlock;
+
+ offset = (picPitch - 16) >> 2;
+
+ for (j = 0; j < 16; j++)
+ {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+
+ dst += offset;
+ }
+
+ dst = (uint32*)curCb;
+ dst2 = (uint32*)curCr;
+ offset >>= 1;
+
+ for (j = 0; j < 8; j++)
+ {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst2++ = *src++;
+ *dst2++ = *src++;
+
+ dst += offset;
+ dst2 += offset;
+ }
+ return ;
+}
+
+/* encode mb_type, mb_pred, sub_mb_pred, CBP */
+/* decide whether this MB (for inter MB) should be skipped */
+AVCEnc_Status EncodeMBHeader(AVCMacroblock *currMB, AVCEncObject *encvid)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ uint mb_type;
+ AVCCommonObj *video = encvid->common;
+ AVCEncBitstream *stream = encvid->bitstream;
+
+ if (currMB->CBP > 47) /* chroma CBP is 11 */
+ {
+ currMB->CBP -= 16; /* remove the 5th bit from the right */
+ }
+
+ mb_type = InterpretMBType(currMB, video->slice_type);
+
+ status = ue_v(stream, mb_type);
+
+ if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
+ {
+ status = sub_mb_pred(video, currMB, stream);
+ }
+ else
+ {
+ status = mb_pred(video, currMB, stream) ;
+ }
+
+ if (currMB->mbMode != AVC_I16)
+ {
+ /* decode coded_block_pattern */
+ status = EncodeCBP(currMB, stream);
+ }
+
+ /* calculate currMB->mb_qp_delta = currMB->QPy - video->QPyprev */
+ if (currMB->CBP > 0 || currMB->mbMode == AVC_I16)
+ {
+ status = se_v(stream, currMB->QPy - video->QPy);
+ video->QPy = currMB->QPy; /* = (video->QPyprev + currMB->mb_qp_delta + 52)%52; */
+ // no need video->QPc = currMB->QPc;
+ }
+ else
+ {
+ if (currMB->QPy != video->QPy) // current QP is not the same as previous QP
+ {
+ /* restore these values */
+ RCRestoreQP(currMB, video, encvid);
+ }
+ }
+
+ return status;
+}
+
+
+/* inputs are mbMode, mb_intra, i16Mode, CBP, NumMbPart, MbPartWidth, MbPartHeight */
+uint InterpretMBType(AVCMacroblock *currMB, int slice_type)
+{
+ int CBP_chrom;
+ int mb_type;// part1, part2, part3;
+// const static int MapParts2Type[2][3][3]={{{4,8,12},{10,6,14},{16,18,20}},
+// {{5,9,13},{11,7,15},{17,19,21}}};
+
+ if (currMB->mb_intra)
+ {
+ if (currMB->mbMode == AVC_I4)
+ {
+ mb_type = 0;
+ }
+ else if (currMB->mbMode == AVC_I16)
+ {
+ CBP_chrom = (currMB->CBP & 0x30);
+ if (currMB->CBP&0xF)
+ {
+ currMB->CBP |= 0xF; /* either 0x0 or 0xF */
+ mb_type = 13;
+ }
+ else
+ {
+ mb_type = 1;
+ }
+ mb_type += (CBP_chrom >> 2) + currMB->i16Mode;
+ }
+ else /* if(currMB->mbMode == AVC_I_PCM) */
+ {
+ mb_type = 25;
+ }
+ }
+ else
+ { /* P-MB *//* note that the order of the enum AVCMBMode cannot be changed
+ since we use it here. */
+ mb_type = currMB->mbMode - AVC_P16;
+ }
+
+ if (slice_type == AVC_P_SLICE)
+ {
+ if (currMB->mb_intra)
+ {
+ mb_type += 5;
+ }
+ }
+ // following codes have not been tested yet, not needed.
+ /* else if(slice_type == AVC_B_SLICE)
+ {
+ if(currMB->mbMode == AVC_BDirect16)
+ {
+ mb_type = 0;
+ }
+ else if(currMB->mbMode == AVC_P16)
+ {
+ mb_type = currMB->MBPartPredMode[0][0] + 1; // 1 or 2
+ }
+ else if(currMB->mbMode == AVC_P8)
+ {
+ mb_type = 26;
+ }
+ else if(currMB->mbMode == AVC_P8ref0)
+ {
+ mb_type = 27;
+ }
+ else
+ {
+ part1 = currMB->mbMode - AVC_P16x8;
+ part2 = currMB->MBPartPredMode[0][0];
+ part3 = currMB->MBPartPredMode[1][0];
+ mb_type = MapParts2Type[part1][part2][part3];
+ }
+ }
+
+ if(slice_type == AVC_SI_SLICE)
+ {
+ mb_type++;
+ }
+ */
+ return (uint)mb_type;
+}
+
+//const static int mbPart2raster[3][4] = {{0,0,0,0},{1,1,0,0},{1,0,1,0}};
+
+/* see subclause 7.3.5.1 */
+AVCEnc_Status mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ int mbPartIdx;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ int max_ref_idx;
+ uint code;
+
+ if (currMB->mbMode == AVC_I4 || currMB->mbMode == AVC_I16)
+ {
+ if (currMB->mbMode == AVC_I4)
+ {
+ /* perform prediction to get the actual intra 4x4 pred mode */
+ EncodeIntra4x4Mode(video, currMB, stream);
+ /* output will be in currMB->i4Mode[4][4] */
+ }
+
+ /* assume already set from MBPrediction() */
+ status = ue_v(stream, currMB->intra_chroma_pred_mode);
+ }
+ else if (currMB->MBPartPredMode[0][0] != AVC_Direct)
+ {
+
+ memset(currMB->ref_idx_L0, 0, sizeof(int16)*4);
+
+ /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+ max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1;
+ /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+ max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1;
+ */
+ /* decode ref index for L0 */
+ if (sliceHdr->num_ref_idx_l0_active_minus1 > 0)
+ {
+ for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+ {
+ if (/*(sliceHdr->num_ref_idx_l0_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+ {
+ code = currMB->ref_idx_L0[mbPartIdx];
+ status = te_v(stream, code, max_ref_idx);
+ }
+ }
+ }
+
+ /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+ max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1;
+ /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+ max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1;
+ */
+ /* decode ref index for L1 */
+ if (sliceHdr->num_ref_idx_l1_active_minus1 > 0)
+ {
+ for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+ {
+ if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+ {
+ status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx);
+ }
+ }
+ }
+
+ /* encode mvd_l0 */
+ for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+ {
+ if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+ {
+ status = se_v(stream, video->mvd_l0[mbPartIdx][0][0]);
+ status = se_v(stream, video->mvd_l0[mbPartIdx][0][1]);
+ }
+ }
+ /* encode mvd_l1 */
+ for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
+ {
+ if (currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+ {
+ status = se_v(stream, video->mvd_l1[mbPartIdx][0][0]);
+ status = se_v(stream, video->mvd_l1[mbPartIdx][0][1]);
+ }
+ }
+ }
+
+ return status;
+}
+
+/* see subclause 7.3.5.2 */
+AVCEnc_Status sub_mb_pred(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ int mbPartIdx, subMbPartIdx;
+ AVCSliceHeader *sliceHdr = video->sliceHdr;
+ uint max_ref_idx;
+ uint slice_type = video->slice_type;
+ uint sub_mb_type[4];
+
+ /* this should move somewhere else where we don't have to make this check */
+ if (currMB->mbMode == AVC_P8ref0)
+ {
+ memset(currMB->ref_idx_L0, 0, sizeof(int16)*4);
+ }
+
+ /* we have to check the values to make sure they are valid */
+ /* assign values to currMB->sub_mb_type[] */
+ if (slice_type == AVC_P_SLICE)
+ {
+ InterpretSubMBTypeP(currMB, sub_mb_type);
+ }
+ /* no need to check for B-slice
+ else if(slice_type == AVC_B_SLICE)
+ {
+ InterpretSubMBTypeB(currMB,sub_mb_type);
+ }*/
+
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ status = ue_v(stream, sub_mb_type[mbPartIdx]);
+ }
+
+ /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+ max_ref_idx = sliceHdr->num_ref_idx_l0_active_minus1;
+ /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+ max_ref_idx = 2*sliceHdr->num_ref_idx_l0_active_minus1 + 1; */
+
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ if ((sliceHdr->num_ref_idx_l0_active_minus1 > 0 /*|| currMB->mb_field_decoding_flag*/) &&
+ currMB->mbMode != AVC_P8ref0 && /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+ {
+ status = te_v(stream, currMB->ref_idx_L0[mbPartIdx], max_ref_idx);
+ }
+ /* used in deblocking */
+ currMB->RefIdx[mbPartIdx] = video->RefPicList0[currMB->ref_idx_L0[mbPartIdx]]->RefIdx;
+ }
+ /* see subclause 7.4.5.1 for the range of ref_idx_lX */
+ max_ref_idx = sliceHdr->num_ref_idx_l1_active_minus1;
+ /* if(video->MbaffFrameFlag && currMB->mb_field_decoding_flag)
+ max_ref_idx = 2*sliceHdr->num_ref_idx_l1_active_minus1 + 1;*/
+
+ if (sliceHdr->num_ref_idx_l1_active_minus1 > 0)
+ {
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ if (/*(sliceHdr->num_ref_idx_l1_active_minus1>0 || currMB->mb_field_decoding_flag) &&*/
+ /*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+ {
+ status = te_v(stream, currMB->ref_idx_L1[mbPartIdx], max_ref_idx);
+ }
+ }
+ }
+
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L1)
+ {
+ for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+ {
+ status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][0]);
+ status = se_v(stream, video->mvd_l0[mbPartIdx][subMbPartIdx][1]);
+ }
+ }
+ }
+
+ for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
+ {
+ if (/*currMB->subMbMode[mbPartIdx]!=AVC_BDirect8 &&*/
+ currMB->MBPartPredMode[mbPartIdx][0] != AVC_Pred_L0)
+ {
+ for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
+ {
+ status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][0]);
+ status = se_v(stream, video->mvd_l1[mbPartIdx][subMbPartIdx][1]);
+ }
+ }
+ }
+
+ return status;
+}
+
+/* input is mblock->sub_mb_type[] */
+void InterpretSubMBTypeP(AVCMacroblock *mblock, uint *sub_mb_type)
+{
+ int i;
+ /* see enum AVCMBType declaration */
+ /*const static AVCSubMBMode map2subMbMode[4] = {AVC_8x8,AVC_8x4,AVC_4x8,AVC_4x4};
+ const static int map2subPartWidth[4] = {8,8,4,4};
+ const static int map2subPartHeight[4] = {8,4,8,4};
+ const static int map2numSubPart[4] = {1,2,2,4};*/
+
+ for (i = 0; i < 4 ; i++)
+ {
+ sub_mb_type[i] = mblock->subMbMode[i] - AVC_8x8;
+ }
+
+ return ;
+}
+
+void InterpretSubMBTypeB(AVCMacroblock *mblock, uint *sub_mb_type)
+{
+ int i;
+ /* see enum AVCMBType declaration */
+ /* const static AVCSubMBMode map2subMbMode[13] = {AVC_BDirect8,AVC_8x8,AVC_8x8,
+ AVC_8x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_8x4,AVC_4x8,AVC_4x4,AVC_4x4,AVC_4x4};
+ const static int map2subPartWidth[13] = {4,8,8,8,8,4,8,4,8,4,4,4,4};
+ const static int map2subPartHeight[13] = {4,8,8,8,4,8,4,8,4,8,4,4,4};
+ const static int map2numSubPart[13] = {4,1,1,1,2,2,2,2,2,2,4,4,4};
+ const static int map2predMode[13] = {3,0,1,2,0,0,1,1,2,2,0,1,2};*/
+
+ for (i = 0; i < 4 ; i++)
+ {
+ if (mblock->subMbMode[i] == AVC_BDirect8)
+ {
+ sub_mb_type[i] = 0;
+ }
+ else if (mblock->subMbMode[i] == AVC_8x8)
+ {
+ sub_mb_type[i] = 1 + mblock->MBPartPredMode[i][0];
+ }
+ else if (mblock->subMbMode[i] == AVC_4x4)
+ {
+ sub_mb_type[i] = 10 + mblock->MBPartPredMode[i][0];
+ }
+ else
+ {
+ sub_mb_type[i] = 4 + (mblock->MBPartPredMode[i][0] << 1) + (mblock->subMbMode[i] - AVC_8x4);
+ }
+ }
+
+ return ;
+}
+
+/* see subclause 8.3.1 */
+AVCEnc_Status EncodeIntra4x4Mode(AVCCommonObj *video, AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+ int intra4x4PredModeA = 0;
+ int intra4x4PredModeB, predIntra4x4PredMode;
+ int component, SubBlock_indx, block_x, block_y;
+ int dcOnlyPredictionFlag;
+ uint flag;
+ int rem = 0;
+ int mode;
+ int bindx = 0;
+
+ for (component = 0; component < 4; component++) /* partition index */
+ {
+ block_x = ((component & 1) << 1);
+ block_y = ((component >> 1) << 1);
+
+ for (SubBlock_indx = 0; SubBlock_indx < 4; SubBlock_indx++) /* sub-partition index */
+ {
+ dcOnlyPredictionFlag = 0;
+ if (block_x > 0)
+ {
+ intra4x4PredModeA = currMB->i4Mode[(block_y << 2) + block_x - 1 ];
+ }
+ else
+ {
+ if (video->intraAvailA)
+ {
+ if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
+ {
+ intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[(block_y << 2) + 3];
+ }
+ else
+ {
+ intra4x4PredModeA = AVC_I4_DC;
+ }
+ }
+ else
+ {
+ dcOnlyPredictionFlag = 1;
+ }
+ }
+
+ if (block_y > 0)
+ {
+ intra4x4PredModeB = currMB->i4Mode[((block_y-1) << 2) + block_x];
+ }
+ else
+ {
+ if (video->intraAvailB)
+ {
+ if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
+ {
+ intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[(3 << 2) + block_x];
+ }
+ else
+ {
+ intra4x4PredModeB = AVC_I4_DC;
+ }
+ }
+ else
+ {
+ dcOnlyPredictionFlag = 1;
+ }
+ }
+
+ if (dcOnlyPredictionFlag)
+ {
+ intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
+ }
+
+ predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
+
+ flag = 0;
+ mode = currMB->i4Mode[(block_y<<2)+block_x];
+
+ if (mode == (AVCIntra4x4PredMode)predIntra4x4PredMode)
+ {
+ flag = 1;
+ }
+ else if (mode < predIntra4x4PredMode)
+ {
+ rem = mode;
+ }
+ else
+ {
+ rem = mode - 1;
+ }
+
+ BitstreamWrite1Bit(stream, flag);
+
+ if (!flag)
+ {
+ BitstreamWriteBits(stream, 3, rem);
+ }
+
+ bindx++;
+ block_y += (SubBlock_indx & 1) ;
+ block_x += (1 - 2 * (SubBlock_indx & 1)) ;
+ }
+ }
+
+ return AVCENC_SUCCESS;
+}
+
+
+
diff --git a/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp
new file mode 100644
index 0000000..222e709
--- /dev/null
+++ b/media/libstagefright/codecs/avc/enc/src/vlc_encode.cpp
@@ -0,0 +1,336 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+#include "avcenc_lib.h"
+
+/**
+See algorithm in subclause 9.1, Table 9-1, Table 9-2. */
+AVCEnc_Status ue_v(AVCEncBitstream *bitstream, uint codeNum)
+{
+ if (AVCENC_SUCCESS != SetEGBitstring(bitstream, codeNum))
+ return AVCENC_FAIL;
+
+ return AVCENC_SUCCESS;
+}
+
+/**
+See subclause 9.1.1, Table 9-3 */
+AVCEnc_Status se_v(AVCEncBitstream *bitstream, int value)
+{
+ uint codeNum;
+ AVCEnc_Status status;
+
+ if (value <= 0)
+ {
+ codeNum = -value * 2;
+ }
+ else
+ {
+ codeNum = value * 2 - 1;
+ }
+
+ status = ue_v(bitstream, codeNum);
+
+ return status;
+}
+
+AVCEnc_Status te_v(AVCEncBitstream *bitstream, uint value, uint range)
+{
+ AVCEnc_Status status;
+
+ if (range > 1)
+ {
+ return ue_v(bitstream, value);
+ }
+ else
+ {
+ status = BitstreamWrite1Bit(bitstream, 1 - value);
+ return status;
+ }
+}
+
+/**
+See subclause 9.1, Table 9-1, 9-2. */
+// compute leadingZeros and inforbits
+//codeNum = (1<<leadingZeros)-1+infobits;
+AVCEnc_Status SetEGBitstring(AVCEncBitstream *bitstream, uint codeNum)
+{
+ AVCEnc_Status status;
+ int leadingZeros;
+ int infobits;
+
+ if (!codeNum)
+ {
+ status = BitstreamWrite1Bit(bitstream, 1);
+ return status;
+ }
+
+ /* calculate leadingZeros and infobits */
+ leadingZeros = 1;
+ while ((uint)(1 << leadingZeros) < codeNum + 2)
+ {
+ leadingZeros++;
+ }
+ leadingZeros--;
+ infobits = codeNum - (1 << leadingZeros) + 1;
+
+ status = BitstreamWriteBits(bitstream, leadingZeros, 0);
+ infobits |= (1 << leadingZeros);
+ status = BitstreamWriteBits(bitstream, leadingZeros + 1, infobits);
+ return status;
+}
+
+/* see Table 9-4 assignment of codeNum to values of coded_block_pattern. */
+const static uint8 MapCBP2code[48][2] =
+{
+ {3, 0}, {29, 2}, {30, 3}, {17, 7}, {31, 4}, {18, 8}, {37, 17}, {8, 13}, {32, 5}, {38, 18}, {19, 9}, {9, 14},
+ {20, 10}, {10, 15}, {11, 16}, {2, 11}, {16, 1}, {33, 32}, {34, 33}, {21, 36}, {35, 34}, {22, 37}, {39, 44}, {4, 40},
+ {36, 35}, {40, 45}, {23, 38}, {5, 41}, {24, 39}, {6, 42}, {7, 43}, {1, 19}, {41, 6}, {42, 24}, {43, 25}, {25, 20},
+ {44, 26}, {26, 21}, {46, 46}, {12, 28}, {45, 27}, {47, 47}, {27, 22}, {13, 29}, {28, 23}, {14, 30}, {15, 31}, {0, 12}
+};
+
+AVCEnc_Status EncodeCBP(AVCMacroblock *currMB, AVCEncBitstream *stream)
+{
+ AVCEnc_Status status;
+ uint codeNum;
+
+ if (currMB->mbMode == AVC_I4)
+ {
+ codeNum = MapCBP2code[currMB->CBP][0];
+ }
+ else
+ {
+ codeNum = MapCBP2code[currMB->CBP][1];
+ }
+
+ status = ue_v(stream, codeNum);
+
+ return status;
+}
+
+AVCEnc_Status ce_TotalCoeffTrailingOnes(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff, int nC)
+{
+ const static uint8 totCoeffTrailOne[3][4][17][2] =
+ {
+ { // 0702
+ {{1, 1}, {6, 5}, {8, 7}, {9, 7}, {10, 7}, {11, 7}, {13, 15}, {13, 11}, {13, 8}, {14, 15}, {14, 11}, {15, 15}, {15, 11}, {16, 15}, {16, 11}, {16, 7}, {16, 4}},
+ {{0, 0}, {2, 1}, {6, 4}, {8, 6}, {9, 6}, {10, 6}, {11, 6}, {13, 14}, {13, 10}, {14, 14}, {14, 10}, {15, 14}, {15, 10}, {15, 1}, {16, 14}, {16, 10}, {16, 6}},
+ {{0, 0}, {0, 0}, {3, 1}, {7, 5}, {8, 5}, {9, 5}, {10, 5}, {11, 5}, {13, 13}, {13, 9}, {14, 13}, {14, 9}, {15, 13}, {15, 9}, {16, 13}, {16, 9}, {16, 5}},
+ {{0, 0}, {0, 0}, {0, 0}, {5, 3}, {6, 3}, {7, 4}, {8, 4}, {9, 4}, {10, 4}, {11, 4}, {13, 12}, {14, 12}, {14, 8}, {15, 12}, {15, 8}, {16, 12}, {16, 8}},
+ },
+ {
+ {{2, 3}, {6, 11}, {6, 7}, {7, 7}, {8, 7}, {8, 4}, {9, 7}, {11, 15}, {11, 11}, {12, 15}, {12, 11}, {12, 8}, {13, 15}, {13, 11}, {13, 7}, {14, 9}, {14, 7}},
+ {{0, 0}, {2, 2}, {5, 7}, {6, 10}, {6, 6}, {7, 6}, {8, 6}, {9, 6}, {11, 14}, {11, 10}, {12, 14}, {12, 10}, {13, 14}, {13, 10}, {14, 11}, {14, 8}, {14, 6}},
+ {{0, 0}, {0, 0}, {3, 3}, {6, 9}, {6, 5}, {7, 5}, {8, 5}, {9, 5}, {11, 13}, {11, 9}, {12, 13}, {12, 9}, {13, 13}, {13, 9}, {13, 6}, {14, 10}, {14, 5}},
+ {{0, 0}, {0, 0}, {0, 0}, {4, 5}, {4, 4}, {5, 6}, {6, 8}, {6, 4}, {7, 4}, {9, 4}, {11, 12}, {11, 8}, {12, 12}, {13, 12}, {13, 8}, {13, 1}, {14, 4}},
+ },
+ {
+ {{4, 15}, {6, 15}, {6, 11}, {6, 8}, {7, 15}, {7, 11}, {7, 9}, {7, 8}, {8, 15}, {8, 11}, {9, 15}, {9, 11}, {9, 8}, {10, 13}, {10, 9}, {10, 5}, {10, 1}},
+ {{0, 0}, {4, 14}, {5, 15}, {5, 12}, {5, 10}, {5, 8}, {6, 14}, {6, 10}, {7, 14}, {8, 14}, {8, 10}, {9, 14}, {9, 10}, {9, 7}, {10, 12}, {10, 8}, {10, 4}},
+ {{0, 0}, {0, 0}, {4, 13}, {5, 14}, {5, 11}, {5, 9}, {6, 13}, {6, 9}, {7, 13}, {7, 10}, {8, 13}, {8, 9}, {9, 13}, {9, 9}, {10, 11}, {10, 7}, {10, 3}},
+ {{0, 0}, {0, 0}, {0, 0}, {4, 12}, {4, 11}, {4, 10}, {4, 9}, {4, 8}, {5, 13}, {6, 12}, {7, 12}, {8, 12}, {8, 8}, {9, 12}, {10, 10}, {10, 6}, {10, 2}}
+ }
+ };
+
+
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ uint code, len;
+ int vlcnum;
+
+ if (TrailingOnes > 3)
+ {
+ return AVCENC_TRAILINGONES_FAIL;
+ }
+
+ if (nC >= 8)
+ {
+ if (TotalCoeff)
+ {
+ code = ((TotalCoeff - 1) << 2) | (TrailingOnes);
+ }
+ else
+ {
+ code = 3;
+ }
+ status = BitstreamWriteBits(stream, 6, code);
+ }
+ else
+ {
+ if (nC < 2)
+ {
+ vlcnum = 0;
+ }
+ else if (nC < 4)
+ {
+ vlcnum = 1;
+ }
+ else
+ {
+ vlcnum = 2;
+ }
+
+ len = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][0];
+ code = totCoeffTrailOne[vlcnum][TrailingOnes][TotalCoeff][1];
+ status = BitstreamWriteBits(stream, len, code);
+ }
+
+ return status;
+}
+
+AVCEnc_Status ce_TotalCoeffTrailingOnesChromaDC(AVCEncBitstream *stream, int TrailingOnes, int TotalCoeff)
+{
+ const static uint8 totCoeffTrailOneChrom[4][5][2] =
+ {
+ { {2, 1}, {6, 7}, {6, 4}, {6, 3}, {6, 2}},
+ { {0, 0}, {1, 1}, {6, 6}, {7, 3}, {8, 3}},
+ { {0, 0}, {0, 0}, {3, 1}, {7, 2}, {8, 2}},
+ { {0, 0}, {0, 0}, {0, 0}, {6, 5}, {7, 0}},
+ };
+
+ AVCEnc_Status status = AVCENC_SUCCESS;
+ uint code, len;
+
+ len = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][0];
+ code = totCoeffTrailOneChrom[TrailingOnes][TotalCoeff][1];
+ status = BitstreamWriteBits(stream, len, code);
+
+ return status;
+}
+
+/* see Table 9-7 and 9-8 */
+AVCEnc_Status ce_TotalZeros(AVCEncBitstream *stream, int total_zeros, int TotalCoeff)
+{
+ const static uint8 lenTotalZeros[15][16] =
+ {
+ { 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9},
+ { 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6},
+ { 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6},
+ { 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5},
+ { 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5},
+ { 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6},
+ { 6, 5, 3, 3, 3, 2, 3, 4, 3, 6},
+ { 6, 4, 5, 3, 2, 2, 3, 3, 6},
+ { 6, 6, 4, 2, 2, 3, 2, 5},
+ { 5, 5, 3, 2, 2, 2, 4},
+ { 4, 4, 3, 3, 1, 3},
+ { 4, 4, 2, 1, 3},
+ { 3, 3, 1, 2},
+ { 2, 2, 1},
+ { 1, 1},
+ };
+
+ const static uint8 codTotalZeros[15][16] =
+ {
+ {1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1},
+ {7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0},
+ {5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0},
+ {3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0},
+ {5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0},
+ {1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0},
+ {1, 1, 5, 4, 3, 3, 2, 1, 1, 0},
+ {1, 1, 1, 3, 3, 2, 2, 1, 0},
+ {1, 0, 1, 3, 2, 1, 1, 1, },
+ {1, 0, 1, 3, 2, 1, 1, },
+ {0, 1, 1, 2, 1, 3},
+ {0, 1, 1, 1, 1},
+ {0, 1, 1, 1},
+ {0, 1, 1},
+ {0, 1},
+ };
+ int len, code;
+ AVCEnc_Status status;
+
+ len = lenTotalZeros[TotalCoeff-1][total_zeros];
+ code = codTotalZeros[TotalCoeff-1][total_zeros];
+
+ status = BitstreamWriteBits(stream, len, code);
+
+ return status;
+}
+
+/* see Table 9-9 */
+AVCEnc_Status ce_TotalZerosChromaDC(AVCEncBitstream *stream, int total_zeros, int TotalCoeff)
+{
+ const static uint8 lenTotalZerosChromaDC[3][4] =
+ {
+ { 1, 2, 3, 3, },
+ { 1, 2, 2, 0, },
+ { 1, 1, 0, 0, },
+ };
+
+ const static uint8 codTotalZerosChromaDC[3][4] =
+ {
+ { 1, 1, 1, 0, },
+ { 1, 1, 0, 0, },
+ { 1, 0, 0, 0, },
+ };
+
+ int len, code;
+ AVCEnc_Status status;
+
+ len = lenTotalZerosChromaDC[TotalCoeff-1][total_zeros];
+ code = codTotalZerosChromaDC[TotalCoeff-1][total_zeros];
+
+ status = BitstreamWriteBits(stream, len, code);
+
+ return status;
+}
+
+/* see Table 9-10 */
+AVCEnc_Status ce_RunBefore(AVCEncBitstream *stream, int run_before, int zerosLeft)
+{
+ const static uint8 lenRunBefore[7][16] =
+ {
+ {1, 1},
+ {1, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 3, 3},
+ {2, 2, 3, 3, 3, 3},
+ {2, 3, 3, 3, 3, 3, 3},
+ {3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+ };
+
+ const static uint8 codRunBefore[7][16] =
+ {
+ {1, 0},
+ {1, 1, 0},
+ {3, 2, 1, 0},
+ {3, 2, 1, 1, 0},
+ {3, 2, 3, 2, 1, 0},
+ {3, 0, 1, 3, 2, 5, 4},
+ {7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ };
+
+ int len, code;
+ AVCEnc_Status status;
+
+ if (zerosLeft <= 6)
+ {
+ len = lenRunBefore[zerosLeft-1][run_before];
+ code = codRunBefore[zerosLeft-1][run_before];
+ }
+ else
+ {
+ len = lenRunBefore[6][run_before];
+ code = codRunBefore[6][run_before];
+ }
+
+ status = BitstreamWriteBits(stream, len, code);
+
+
+ return status;
+}
diff --git a/media/libstagefright/include/AVCEncoder.h b/media/libstagefright/include/AVCEncoder.h
new file mode 100644
index 0000000..4fe2e30
--- /dev/null
+++ b/media/libstagefright/include/AVCEncoder.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AVC_ENCODER_H_
+
+#define AVC_ENCODER_H_
+
+#include <media/stagefright/MediaBuffer.h>
+#include <media/stagefright/MediaSource.h>
+#include <utils/Vector.h>
+
+struct tagAVCHandle;
+struct tagAVCEncParam;
+
+namespace android {
+
+struct MediaBuffer;
+struct MediaBufferGroup;
+
+struct AVCEncoder : public MediaSource,
+ public MediaBufferObserver {
+ AVCEncoder(const sp<MediaSource> &source,
+ const sp<MetaData>& meta);
+
+ virtual status_t start(MetaData *params);
+ virtual status_t stop();
+
+ virtual sp<MetaData> getFormat();
+
+ virtual status_t read(
+ MediaBuffer **buffer, const ReadOptions *options);
+
+ virtual void signalBufferReturned(MediaBuffer *buffer);
+
+ // Callbacks required by the encoder
+ int32_t allocOutputBuffers(unsigned int sizeInMbs, unsigned int numBuffers);
+ void unbindOutputBuffer(int32_t index);
+ int32_t bindOutputBuffer(int32_t index, uint8_t **yuv);
+
+protected:
+ virtual ~AVCEncoder();
+
+private:
+ sp<MediaSource> mSource;
+ sp<MetaData> mFormat;
+ sp<MetaData> mMeta;
+
+ int32_t mVideoWidth;
+ int32_t mVideoHeight;
+ int32_t mVideoFrameRate;
+ int32_t mVideoBitRate;
+ int32_t mVideoColorFormat;
+ int64_t mNumInputFrames;
+ status_t mInitCheck;
+ bool mStarted;
+ bool mSpsPpsHeaderReceived;
+ bool mReadyForNextFrame;
+ int32_t mIsIDRFrame; // for set kKeyIsSyncFrame
+
+ tagAVCHandle *mHandle;
+ tagAVCEncParam *mEncParams;
+ MediaBuffer *mInputBuffer;
+ uint8_t *mInputFrameData;
+ MediaBufferGroup *mGroup;
+ Vector<MediaBuffer *> mOutputBuffers;
+
+
+ status_t initCheck(const sp<MetaData>& meta);
+ void releaseOutputBuffers();
+
+ AVCEncoder(const AVCEncoder &);
+ AVCEncoder &operator=(const AVCEncoder &);
+};
+
+} // namespace android
+
+#endif // AVC_ENCODER_H_