diff options
Diffstat (limited to 'media/libstagefright/codecs/avc/enc/src/block.cpp')
-rw-r--r-- | media/libstagefright/codecs/avc/enc/src/block.cpp | 1283 |
1 files changed, 1283 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/avc/enc/src/block.cpp b/media/libstagefright/codecs/avc/enc/src/block.cpp new file mode 100644 index 0000000..01e26a6 --- /dev/null +++ b/media/libstagefright/codecs/avc/enc/src/block.cpp @@ -0,0 +1,1283 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 1998-2009 PacketVideo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ +#include "avcenc_lib.h" + +/* subtract with the prediction and do transformation */ +void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock) +{ + int16 *ptr = dataBlock; + int r0, r1, r2, r3, j; + int curpitch = (uint)pitch >> 16; + int predpitch = (pitch & 0xFFFF); + + /* horizontal */ + j = 4; + while (j > 0) + { + /* calculate the residue first */ + r0 = cur[0] - predBlock[0]; + r1 = cur[1] - predBlock[1]; + r2 = cur[2] - predBlock[2]; + r3 = cur[3] - predBlock[3]; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + ptr[0] = r0 + r1; + ptr[2] = r0 - r1; + ptr[1] = (r3 << 1) + r2; + ptr[3] = r3 - (r2 << 1); + + ptr += 16; + predBlock += predpitch; + cur += curpitch; + j--; + } + /* vertical */ + ptr = dataBlock; + j = 4; + while (j > 0) + { + r0 = ptr[0] + ptr[48]; + r3 = ptr[0] - ptr[48]; + r1 = ptr[16] + ptr[32]; + r2 = ptr[16] - ptr[32]; + + ptr[0] = r0 + r1; + ptr[32] = r0 - r1; + ptr[16] = (r3 << 1) + r2; + ptr[48] = r3 - (r2 << 1); + + ptr++; + j--; + } + + return ; +} + + +/* do residue transform quant invquant, invtrans and write output out */ +int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost) +{ + AVCCommonObj *video = encvid->common; + int org_pitch = encvid->currInput->pitch; + int pitch = video->currPic->pitch; + int16 *coef = video->block; + uint8 *pred = video->pred_block; // size 16 for a 4x4 block + int pred_pitch = video->pred_pitch; + int r0, r1, r2, r3, j, k, idx; + int *level, *run; + int Qq, Rq, q_bits, qp_const, quant; + int data, lev, zero_run; + int numcoeff; + + coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */ + + /* first take a 4x4 transform */ + /* horizontal */ + j = 4; + while (j > 0) + { + /* calculate the residue first */ + r0 = org[0] - pred[0]; /* OPTIMIZEABLE */ + r1 = org[1] - pred[1]; + r2 = org[2] - pred[2]; + r3 = org[3] - pred[3]; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + coef[0] = r0 + r1; + coef[2] = r0 - r1; + coef[1] = (r3 << 1) + r2; + coef[3] = r3 - (r2 << 1); + + coef += 16; + org += org_pitch; + pred += pred_pitch; + j--; + } + /* vertical */ + coef -= 64; + pred -= (pred_pitch << 2); + j = 4; + while (j > 0) /* OPTIMIZABLE */ + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + j--; + } + + coef -= 4; + + /* quant */ + level = encvid->level[ras2dec[blkidx]]; + run = encvid->run[ras2dec[blkidx]]; + + Rq = video->QPy_mod_6; + Qq = video->QPy_div_6; + qp_const = encvid->qp_const; + q_bits = 15 + Qq; + + zero_run = 0; + numcoeff = 0; + for (k = 0; k < 16; k++) + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { + *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]); + + /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[numcoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[numcoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[numcoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + if (video->currMB->mb_intra) // only do inverse transform with intra block + { + if (numcoeff) /* then do inverse transform */ + { + for (j = 4; j > 0; j--) /* horizontal */ + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + + coef -= 64; + for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = *(pred += pred_pitch) + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + + *cur = r0; + *(cur += pitch) = r1; + *(cur += pitch) = r2; + cur[pitch] = r3; + cur -= (pitch << 1); + cur++; + pred -= (pred_pitch << 1); + pred++; + coef++; + } + } + else // copy from pred to cur + { + *((uint32*)cur) = *((uint32*)pred); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); + } + } + + return numcoeff; +} + + +void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch) +{ + int16 *coef, *coef8 = video->block; + uint8 *cur; // the same as curL + int b8, b4; + int r0, r1, r2, r3, j, blkidx; + + for (b8 = 0; b8 < 4; b8++) + { + cur = curL; + coef = coef8; + + if (currMB->CBP&(1 << b8)) + { + for (b4 = 0; b4 < 4; b4++) + { + blkidx = blkIdx2blkXY[b8][b4]; + /* do IDCT */ + if (currMB->nz_coeff[blkidx]) + { + for (j = 4; j > 0; j--) /* horizontal */ + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + + coef -= 64; + for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + + r0 = cur[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + *cur = r0; + r1 = *(cur += picPitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + *cur = r1; + r2 = *(cur += picPitch) + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + *cur = r2; + r3 = cur[picPitch] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + cur[picPitch] = r3; + + cur -= (picPitch << 1); + cur++; + coef++; + } + cur -= 4; + coef -= 4; + } + if (b4&1) + { + cur += ((picPitch << 2) - 4); + coef += 60; + } + else + { + cur += 4; + coef += 4; + } + } + } + + if (b8&1) + { + curL += ((picPitch << 3) - 8); + coef8 += 120; + } + else + { + curL += 8; + coef8 += 8; + } + } + + return ; +} + +/* performa dct, quant, iquant, idct for the entire MB */ +void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL) +{ + AVCCommonObj *video = encvid->common; + int pitch = video->currPic->pitch; + int org_pitch = encvid->currInput->pitch; + AVCMacroblock *currMB = video->currMB; + int16 *coef = video->block; + uint8 *pred = encvid->pred_i16[currMB->i16Mode]; + int blk_x, blk_y, j, k, idx, b8, b4; + int r0, r1, r2, r3, m0, m1, m2 , m3; + int data, lev; + int *level, *run, zero_run, ncoeff; + int Rq, Qq, quant, q_bits, qp_const; + int offset_cur[4], offset_pred[4], offset; + + /* horizontal */ + for (j = 16; j > 0; j--) + { + for (blk_x = 4; blk_x > 0; blk_x--) + { + /* calculate the residue first */ + r0 = *orgL++ - *pred++; + r1 = *orgL++ - *pred++; + r2 = *orgL++ - *pred++; + r3 = *orgL++ - *pred++; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + *coef++ = r0 + r1; + *coef++ = (r3 << 1) + r2; + *coef++ = r0 - r1; + *coef++ = r3 - (r2 << 1); + } + orgL += (org_pitch - 16); + } + pred -= 256; + coef -= 256; + /* vertical */ + for (blk_y = 4; blk_y > 0; blk_y--) + { + for (j = 16; j > 0; j--) + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + } + coef += 48; + } + + /* then perform DC transform */ + coef -= 256; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[12]; + r3 = coef[0] - coef[12]; + r1 = coef[4] + coef[8]; + r2 = coef[4] - coef[8]; + + coef[0] = r0 + r1; + coef[8] = r0 - r1; + coef[4] = r3 + r2; + coef[12] = r3 - r2; + coef += 64; + } + coef -= 256; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[192]; + r3 = coef[0] - coef[192]; + r1 = coef[64] + coef[128]; + r2 = coef[64] - coef[128]; + + coef[0] = (r0 + r1) >> 1; + coef[128] = (r0 - r1) >> 1; + coef[64] = (r3 + r2) >> 1; + coef[192] = (r3 - r2) >> 1; + coef += 4; + } + + coef -= 16; + // then quantize DC + level = encvid->leveldc; + run = encvid->rundc; + + Rq = video->QPy_mod_6; + Qq = video->QPy_div_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = encvid->qp_const; + + zero_run = 0; + ncoeff = 0; + for (k = 0; k < 16; k++) /* in zigzag scan order */ + { + idx = ZIGZAG2RASTERDC[k]; + data = coef[idx]; + if (data > 0) // quant + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) // dequant + { + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = lev; + } + else + { + level[ncoeff] = -lev; + coef[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + /* inverse transform DC */ + encvid->numcoefdc = ncoeff; + if (ncoeff) + { + quant = dequant_coefres[Rq][0]; + + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[4]; + m1 = coef[0] - coef[4]; + m2 = coef[8] + coef[12]; + m3 = coef[8] - coef[12]; + + + coef[0] = m0 + m2; + coef[4] = m0 - m2; + coef[8] = m1 - m3; + coef[12] = m1 + m3; + coef += 64; + } + + coef -= 256; + + if (Qq >= 2) /* this way should be faster than JM */ + { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */ + Qq -= 2; + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[64]; + m1 = coef[0] - coef[64]; + m2 = coef[128] + coef[192]; + m3 = coef[128] - coef[192]; + + coef[0] = ((m0 + m2) * quant) << Qq; + coef[64] = ((m0 - m2) * quant) << Qq; + coef[128] = ((m1 - m3) * quant) << Qq; + coef[192] = ((m1 + m3) * quant) << Qq; + coef += 4; + } + Qq += 2; /* restore the value */ + } + else + { + Qq = 2 - Qq; + offset = 1 << (Qq - 1); + + for (j = 0; j < 4; j++) + { + m0 = coef[0] + coef[64]; + m1 = coef[0] - coef[64]; + m2 = coef[128] + coef[192]; + m3 = coef[128] - coef[192]; + + coef[0] = (((m0 + m2) * quant + offset) >> Qq); + coef[64] = (((m0 - m2) * quant + offset) >> Qq); + coef[128] = (((m1 - m3) * quant + offset) >> Qq); + coef[192] = (((m1 + m3) * quant + offset) >> Qq); + coef += 4; + } + Qq = 2 - Qq; /* restore the value */ + } + coef -= 16; /* back to the origin */ + } + + /* now zigzag scan ac coefs, quant, iquant and itrans */ + run = encvid->run[0]; + level = encvid->level[0]; + + /* offset btw 4x4 block */ + offset_cur[0] = 0; + offset_cur[1] = (pitch << 2) - 8; + + /* offset btw 8x8 block */ + offset_cur[2] = 8 - (pitch << 3); + offset_cur[3] = -8; + + /* similarly for pred */ + offset_pred[0] = 0; + offset_pred[1] = 56; + offset_pred[2] = -120; + offset_pred[3] = -8; + + currMB->CBP = 0; + + for (b8 = 0; b8 < 4; b8++) + { + for (b4 = 0; b4 < 4; b4++) + { + + zero_run = 0; + ncoeff = 0; + + for (k = 1; k < 16; k++) + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[ncoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */ + if (ncoeff) + { + currMB->CBP |= (1 << b8); + + // do inverse transform here + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + coef -= 64; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = pred[16] + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[32] + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[48] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curL = r0; + *(curL += pitch) = r1; + *(curL += pitch) = r2; + curL[pitch] = r3; + curL -= (pitch << 1); + curL++; + pred++; + coef++; + } + } + else // do DC-only inverse + { + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = pred[16] + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[32] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[48] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curL = r0; + *(curL += pitch) = r1; + *(curL += pitch) = r2; + curL[pitch] = r3; + curL -= (pitch << 1); + curL++; + pred++; + } + coef += 4; + } + + run += 16; // follow coding order + level += 16; + curL += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_pred[b4&1]; + } + + curL += offset_cur[2 + (b8&1)]; + pred += offset_pred[2 + (b8&1)]; + coef += offset_pred[2 + (b8&1)]; + } + + return ; +} + + +void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr) +{ + AVCCommonObj *video = encvid->common; + AVCMacroblock *currMB = video->currMB; + int org_pitch = (encvid->currInput->pitch) >> 1; + int pitch = (video->currPic->pitch) >> 1; + int pred_pitch = 16; + int16 *coef = video->block + 256; + uint8 *pred = video->pred_block; + int j, blk_x, blk_y, k, idx, b4; + int r0, r1, r2, r3, m0; + int Qq, Rq, qp_const, q_bits, quant; + int *level, *run, zero_run, ncoeff; + int data, lev; + int offset_cur[2], offset_pred[2], offset_coef[2]; + uint8 nz_temp[4]; + int coeff_cost; + + if (cr) + { + coef += 8; + pred += 8; + } + + if (currMB->mb_intra == 0) // inter mode + { + pred = curC; + pred_pitch = pitch; + } + + /* do 4x4 transform */ + /* horizontal */ + for (j = 8; j > 0; j--) + { + for (blk_x = 2; blk_x > 0; blk_x--) + { + /* calculate the residue first */ + r0 = *orgC++ - *pred++; + r1 = *orgC++ - *pred++; + r2 = *orgC++ - *pred++; + r3 = *orgC++ - *pred++; + + r0 += r3; //ptr[0] + ptr[3]; + r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; + r1 += r2; //ptr[1] + ptr[2]; + r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; + + *coef++ = r0 + r1; + *coef++ = (r3 << 1) + r2; + *coef++ = r0 - r1; + *coef++ = r3 - (r2 << 1); + + } + coef += 8; // coef pitch is 16 + pred += (pred_pitch - 8); // pred_pitch is 16 + orgC += (org_pitch - 8); + } + pred -= (pred_pitch << 3); + coef -= 128; + /* vertical */ + for (blk_y = 2; blk_y > 0; blk_y--) + { + for (j = 8; j > 0; j--) + { + r0 = coef[0] + coef[48]; + r3 = coef[0] - coef[48]; + r1 = coef[16] + coef[32]; + r2 = coef[16] - coef[32]; + + coef[0] = r0 + r1; + coef[32] = r0 - r1; + coef[16] = (r3 << 1) + r2; + coef[48] = r3 - (r2 << 1); + + coef++; + } + coef += 56; + } + /* then perform DC transform */ + coef -= 128; + + /* 2x2 transform of DC components*/ + r0 = coef[0]; + r1 = coef[4]; + r2 = coef[64]; + r3 = coef[68]; + + coef[0] = r0 + r1 + r2 + r3; + coef[4] = r0 - r1 + r2 - r3; + coef[64] = r0 + r1 - r2 - r3; + coef[68] = r0 - r1 - r2 + r3; + + Qq = video->QPc_div_6; + Rq = video->QPc_mod_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = encvid->qp_const_c; + + zero_run = 0; + ncoeff = 0; + run = encvid->runcdc + (cr << 2); + level = encvid->levelcdc + (cr << 2); + + /* in zigzag scan order */ + for (k = 0; k < 4; k++) + { + idx = ((k >> 1) << 6) + ((k & 1) << 2); + data = coef[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = lev; + } + else + { + level[ncoeff] = -lev; + coef[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + encvid->numcoefcdc[cr] = ncoeff; + + if (ncoeff) + { + currMB->CBP |= (1 << 4); // DC present + // do inverse transform + quant = dequant_coefres[Rq][0]; + + r0 = coef[0] + coef[4]; + r1 = coef[0] - coef[4]; + r2 = coef[64] + coef[68]; + r3 = coef[64] - coef[68]; + + r0 += r2; + r2 = r0 - (r2 << 1); + r1 += r3; + r3 = r1 - (r3 << 1); + + if (Qq >= 1) + { + Qq -= 1; + coef[0] = (r0 * quant) << Qq; + coef[4] = (r1 * quant) << Qq; + coef[64] = (r2 * quant) << Qq; + coef[68] = (r3 * quant) << Qq; + Qq++; + } + else + { + coef[0] = (r0 * quant) >> 1; + coef[4] = (r1 * quant) >> 1; + coef[64] = (r2 * quant) >> 1; + coef[68] = (r3 * quant) >> 1; + } + } + + /* now do AC zigzag scan, quant, iquant and itrans */ + if (cr) + { + run = encvid->run[20]; + level = encvid->level[20]; + } + else + { + run = encvid->run[16]; + level = encvid->level[16]; + } + + /* offset btw 4x4 block */ + offset_cur[0] = 0; + offset_cur[1] = (pitch << 2) - 8; + offset_pred[0] = 0; + offset_pred[1] = (pred_pitch << 2) - 8; + offset_coef[0] = 0; + offset_coef[1] = 56; + + coeff_cost = 0; + + for (b4 = 0; b4 < 4; b4++) + { + zero_run = 0; + ncoeff = 0; + for (k = 1; k < 16; k++) /* in zigzag scan order */ + { + idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ + data = coef[idx]; + quant = quant_coef[Rq][k]; + if (data > 0) + { + lev = data * quant + qp_const; + } + else + { + lev = -data * quant + qp_const; + } + lev >>= q_bits; + if (lev) + { + /* for RD performance*/ + if (lev > 1) + coeff_cost += MAX_VALUE; // set high cost, shall not be discarded + else + coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run]; + + /* dequant */ + quant = dequant_coefres[Rq][k]; + if (data > 0) + { + level[ncoeff] = lev; + coef[idx] = (lev * quant) << Qq; + } + else + { + level[ncoeff] = -lev; + coef[idx] = (-lev * quant) << Qq; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + coef[idx] = 0; + } + } + + nz_temp[b4] = ncoeff; // raster scan + + // just advance the pointers for now, do IDCT later + coef += 4; + run += 16; + level += 16; + coef += offset_coef[b4&1]; + } + + /* rewind the pointers */ + coef -= 128; + + if (coeff_cost < _CHROMA_COEFF_COST_) + { + /* if it's not efficient to encode any blocks. + Just do DC only */ + /* We can reset level and run also, but setting nz to zero should be enough. */ + currMB->nz_coeff[16+(cr<<1)] = 0; + currMB->nz_coeff[17+(cr<<1)] = 0; + currMB->nz_coeff[20+(cr<<1)] = 0; + currMB->nz_coeff[21+(cr<<1)] = 0; + + for (b4 = 0; b4 < 4; b4++) + { + // do DC-only inverse + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + } + coef += 4; + curC += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_coef[b4&1]; + } + } + else // not dropping anything, continue with the IDCT + { + for (b4 = 0; b4 < 4; b4++) + { + ncoeff = nz_temp[b4] ; // in raster scan + currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan + + if (ncoeff) // do a check on the nonzero-coeff + { + currMB->CBP |= (2 << 4); + + // do inverse transform here + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[2]; + r1 = coef[0] - coef[2]; + r2 = (coef[1] >> 1) - coef[3]; + r3 = coef[1] + (coef[3] >> 1); + + coef[0] = r0 + r3; + coef[1] = r1 + r2; + coef[2] = r1 - r2; + coef[3] = r0 - r3; + + coef += 16; + } + coef -= 64; + for (j = 4; j > 0; j--) + { + r0 = coef[0] + coef[32]; + r1 = coef[0] - coef[32]; + r2 = (coef[16] >> 1) - coef[48]; + r3 = coef[16] + (coef[48] >> 1); + + r0 += r3; + r3 = (r0 - (r3 << 1)); /* r0-r3 */ + r1 += r2; + r2 = (r1 - (r2 << 1)); /* r1-r2 */ + r0 += 32; + r1 += 32; + r2 += 32; + r3 += 32; + r0 = pred[0] + (r0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (r1 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (r2 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (r3 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + coef++; + } + } + else + { + // do DC-only inverse + m0 = coef[0] + 32; + + for (j = 4; j > 0; j--) + { + r0 = pred[0] + (m0 >> 6); + if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ + r1 = *(pred += pred_pitch) + (m0 >> 6); + if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ + r2 = pred[pred_pitch] + (m0 >> 6); + if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ + r3 = pred[pred_pitch<<1] + (m0 >> 6); + if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ + *curC = r0; + *(curC += pitch) = r1; + *(curC += pitch) = r2; + curC[pitch] = r3; + curC -= (pitch << 1); + curC++; + pred += (1 - pred_pitch); + } + coef += 4; + } + curC += offset_cur[b4&1]; + pred += offset_pred[b4&1]; + coef += offset_coef[b4&1]; + } + } + + return ; +} + + +/* only DC transform */ +int TransQuantIntra16DC(AVCEncObject *encvid) +{ + AVCCommonObj *video = encvid->common; + int16 *block = video->block; + int *level = encvid->leveldc; + int *run = encvid->rundc; + int16 *ptr = block; + int r0, r1, r2, r3, j; + int Qq = video->QPy_div_6; + int Rq = video->QPy_mod_6; + int q_bits, qp_const, quant; + int data, lev, zero_run; + int k, ncoeff, idx; + + /* DC transform */ + /* horizontal */ + j = 4; + while (j) + { + r0 = ptr[0] + ptr[12]; + r3 = ptr[0] - ptr[12]; + r1 = ptr[4] + ptr[8]; + r2 = ptr[4] - ptr[8]; + + ptr[0] = r0 + r1; + ptr[8] = r0 - r1; + ptr[4] = r3 + r2; + ptr[12] = r3 - r2; + ptr += 64; + j--; + } + /* vertical */ + ptr = block; + j = 4; + while (j) + { + r0 = ptr[0] + ptr[192]; + r3 = ptr[0] - ptr[192]; + r1 = ptr[64] + ptr[128]; + r2 = ptr[64] - ptr[128]; + + ptr[0] = (r0 + r1) >> 1; + ptr[128] = (r0 - r1) >> 1; + ptr[64] = (r3 + r2) >> 1; + ptr[192] = (r3 - r2) >> 1; + ptr += 4; + j--; + } + + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + qp_const = (1 << q_bits) / 3; // intra + + zero_run = 0; + ncoeff = 0; + + for (k = 0; k < 16; k++) /* in zigzag scan order */ + { + idx = ZIGZAG2RASTERDC[k]; + data = block[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + block[idx] = lev; + } + else + { + level[ncoeff] = -lev; + block[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + block[idx] = 0; + } + } + return ncoeff; +} + +int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr) +{ + AVCCommonObj *video = encvid->common; + int *level, *run; + int r0, r1, r2, r3; + int Qq, Rq, q_bits, qp_const, quant; + int data, lev, zero_run; + int k, ncoeff, idx; + + level = encvid->levelcdc + (cr << 2); /* cb or cr */ + run = encvid->runcdc + (cr << 2); + + /* 2x2 transform of DC components*/ + r0 = block[0]; + r1 = block[4]; + r2 = block[64]; + r3 = block[68]; + + block[0] = r0 + r1 + r2 + r3; + block[4] = r0 - r1 + r2 - r3; + block[64] = r0 + r1 - r2 - r3; + block[68] = r0 - r1 - r2 + r3; + + Qq = video->QPc_div_6; + Rq = video->QPc_mod_6; + quant = quant_coef[Rq][0]; + q_bits = 15 + Qq; + if (slice_type == AVC_I_SLICE) + { + qp_const = (1 << q_bits) / 3; + } + else + { + qp_const = (1 << q_bits) / 6; + } + + zero_run = 0; + ncoeff = 0; + + for (k = 0; k < 4; k++) /* in zigzag scan order */ + { + idx = ((k >> 1) << 6) + ((k & 1) << 2); + data = block[idx]; + if (data > 0) + { + lev = data * quant + (qp_const << 1); + } + else + { + lev = -data * quant + (qp_const << 1); + } + lev >>= (q_bits + 1); + if (lev) + { + if (data > 0) + { + level[ncoeff] = lev; + block[idx] = lev; + } + else + { + level[ncoeff] = -lev; + block[idx] = -lev; + } + run[ncoeff++] = zero_run; + zero_run = 0; + } + else + { + zero_run++; + block[idx] = 0; + } + } + return ncoeff; +} + + |