diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c | 2315 |
1 files changed, 2315 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c new file mode 100755 index 0000000..c948776 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c @@ -0,0 +1,2315 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*------------------------------------------------------------------------------ + + Table of contents + + 1. Include headers + 2. External compiler flags + 3. Module defines + 4. Local function prototypes + 5. Functions + +------------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------------ + 1. Include headers +------------------------------------------------------------------------------*/ + +#include "basetype.h" +#include "h264bsd_reconstruct.h" +#include "h264bsd_macroblock_layer.h" +#include "h264bsd_image.h" +#include "h264bsd_util.h" + +#ifdef H264DEC_OMXDL +#include "omxtypes.h" +#include "omxVC.h" +#include "armVC.h" +#endif /* H264DEC_OMXDL */ + +/*------------------------------------------------------------------------------ + 2. External compiler flags +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + 3. Module defines +------------------------------------------------------------------------------*/ + +/* Switch off the following Lint messages for this file: + * Info 701: Shift left of signed quantity (int) + * Info 702: Shift right of signed quantity (int) + */ +/*lint -e701 -e702 */ + +/* Luma fractional-sample positions + * + * G a b c H + * d e f g + * h i j k m + * n p q r + * M s N + * + * G, H, M and N are integer sample positions + * a-s are fractional samples that need to be interpolated. + */ +#ifndef H264DEC_OMXDL +static const u32 lumaFracPos[4][4] = { + /* G d h n a e i p b f j q c g k r */ + {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}}; +#endif /* H264DEC_OMXDL */ + +/* clipping table, defined in h264bsd_intra_prediction.c */ +extern const u8 h264bsdClip[]; + +/*------------------------------------------------------------------------------ + 4. Local function prototypes +------------------------------------------------------------------------------*/ + +#ifndef H264DEC_OMXDL + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaHor + + Functional description: + This function performs chroma interpolation in horizontal direction. + Overfilling is done only if needed. Reference image (pRef) is + read at correct position and the predicted part is written to + macroblock's chrominance (predPartChroma) + Inputs: + pRef pointer to reference frame Cb top-left corner + x0 integer x-coordinate for prediction + y0 integer y-coordinate for prediction + width width of the reference frame chrominance in pixels + height height of the reference frame chrominance in pixels + xFrac horizontal fraction for prediction in 1/8 pixels + chromaPartWidth width of the predicted part in pixels + chromaPartHeight height of the predicted part in pixels + Outputs: + predPartChroma pointer where predicted part is written + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_ARM11 +void h264bsdInterpolateChromaHor( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + +/* Variables */ + + u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val; + u8 *ptrA, *cbr; + u32 comp; + u8 block[9*8*2]; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(xFrac < 8); + ASSERT(pRef); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight > height)) + { + h264bsdFillBlock(pRef, block, x0, y0, width, height, + chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1); + pRef += width * height; + h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight, + x0, y0, width, height, chromaPartWidth + 1, + chromaPartHeight, chromaPartWidth + 1); + + pRef = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth+1; + height = chromaPartHeight; + } + + val = 8 - xFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = pRef + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear horizontal interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp1 = ptrA[width]; + tmp2 = *ptrA++; + tmp3 = ptrA[width]; + tmp4 = *ptrA++; + c = ((val * tmp1 + xFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp2 + xFrac * tmp4) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + tmp1 = ptrA[width]; + tmp2 = *ptrA; + c = ((val * tmp3 + xFrac * tmp1) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp4 + xFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaVer + + Functional description: + This function performs chroma interpolation in vertical direction. + Overfilling is done only if needed. Reference image (pRef) is + read at correct position and the predicted part is written to + macroblock's chrominance (predPartChroma) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateChromaVer( + u8 *pRef, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + +/* Variables */ + + u32 x, y, tmp1, tmp2, tmp3, c, val; + u8 *ptrA, *cbr; + u32 comp; + u8 block[9*8*2]; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(yFrac < 8); + ASSERT(pRef); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) + { + h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth, + chromaPartHeight + 1, chromaPartWidth); + pRef += width * height; + h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1), + x0, y0, width, height, chromaPartWidth, + chromaPartHeight + 1, chromaPartWidth); + + pRef = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth; + height = chromaPartHeight+1; + } + + val = 8 - yFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = pRef + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear vertical interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp3 = ptrA[width*2]; + tmp2 = ptrA[width]; + tmp1 = *ptrA++; + c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + tmp3 = ptrA[width*2]; + tmp2 = ptrA[width]; + tmp1 = *ptrA++; + c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; + c >>= 6; + cbr[8] = (u8)c; + c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; + c >>= 6; + *cbr++ = (u8)c; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} +#endif +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateChromaHorVer + + Functional description: + This function performs chroma interpolation in horizontal and + vertical direction. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock's chrominance (predPartChroma) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateChromaHorVer( + u8 *ref, + u8 *predPartChroma, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 xFrac, + u32 yFrac, + u32 chromaPartWidth, + u32 chromaPartHeight) +{ + u8 block[9*9*2]; + u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32; + u32 comp; + u8 *ptrA, *cbr; + +/* Code */ + + ASSERT(predPartChroma); + ASSERT(chromaPartWidth); + ASSERT(chromaPartHeight); + ASSERT(xFrac < 8); + ASSERT(yFrac < 8); + ASSERT(ref); + + if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || + (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) + { + h264bsdFillBlock(ref, block, x0, y0, width, height, + chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1); + ref += width * height; + h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1), + x0, y0, width, height, chromaPartWidth + 1, + chromaPartHeight + 1, chromaPartWidth + 1); + + ref = block; + x0 = 0; + y0 = 0; + width = chromaPartWidth+1; + height = chromaPartHeight+1; + } + + valX = 8 - xFrac; + valY = 8 - yFrac; + + for (comp = 0; comp <= 1; comp++) + { + + ptrA = ref + (comp * height + (u32)y0) * width + x0; + cbr = predPartChroma + comp * 8 * 8; + + /* 2x2 pels per iteration + * bilinear vertical and horizontal interpolation */ + for (y = (chromaPartHeight >> 1); y; y--) + { + tmp1 = *ptrA; + tmp3 = ptrA[width]; + tmp5 = ptrA[width*2]; + tmp1 *= valY; + tmp1 += tmp3 * yFrac; + tmp3 *= valY; + tmp3 += tmp5 * yFrac; + for (x = (chromaPartWidth >> 1); x; x--) + { + tmp2 = *++ptrA; + tmp4 = ptrA[width]; + tmp6 = ptrA[width*2]; + tmp2 *= valY; + tmp2 += tmp4 * yFrac; + tmp4 *= valY; + tmp4 += tmp6 * yFrac; + tmp1 = tmp1 * valX + plus32; + tmp3 = tmp3 * valX + plus32; + tmp1 += tmp2 * xFrac; + tmp1 >>= 6; + tmp3 += tmp4 * xFrac; + tmp3 >>= 6; + cbr[8] = (u8)tmp3; + *cbr++ = (u8)tmp1; + + tmp1 = *++ptrA; + tmp3 = ptrA[width]; + tmp5 = ptrA[width*2]; + tmp1 *= valY; + tmp1 += tmp3 * yFrac; + tmp3 *= valY; + tmp3 += tmp5 * yFrac; + tmp2 = tmp2 * valX + plus32; + tmp4 = tmp4 * valX + plus32; + tmp2 += tmp1 * xFrac; + tmp2 >>= 6; + tmp4 += tmp3 * xFrac; + tmp4 >>= 6; + cbr[8] = (u8)tmp4; + *cbr++ = (u8)tmp2; + } + cbr += 2*8 - chromaPartWidth; + ptrA += 2*width - chromaPartWidth; + } + } + +} + +/*------------------------------------------------------------------------------ + + Function: PredictChroma + + Functional description: + Top level chroma prediction function that calls the appropriate + interpolation function. The output is written to macroblock array. + +------------------------------------------------------------------------------*/ + +static void PredictChroma( + u8 *mbPartChroma, + u32 xAL, + u32 yAL, + u32 partWidth, + u32 partHeight, + mv_t *mv, + image_t *refPic) +{ + +/* Variables */ + + u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight; + i32 xInt, yInt; + u8 *ref; + +/* Code */ + + ASSERT(mv); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + width = 8 * refPic->width; + height = 8 * refPic->height; + + xInt = (xAL >> 1) + (mv->hor >> 3); + yInt = (yAL >> 1) + (mv->ver >> 3); + xFrac = mv->hor & 0x7; + yFrac = mv->ver & 0x7; + + chromaPartWidth = partWidth >> 1; + chromaPartHeight = partHeight >> 1; + ref = refPic->data + 256 * refPic->width * refPic->height; + + if (xFrac && yFrac) + { + h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width, + height, xFrac, yFrac, chromaPartWidth, chromaPartHeight); + } + else if (xFrac) + { + h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width, + height, xFrac, chromaPartWidth, chromaPartHeight); + } + else if (yFrac) + { + h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width, + height, yFrac, chromaPartWidth, chromaPartHeight); + } + else + { + h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height, + chromaPartWidth, chromaPartHeight, 8); + ref += width * height; + h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height, + chromaPartWidth, chromaPartHeight, 8); + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateVerHalf + + Functional description: + Function to perform vertical interpolation of pixel position 'h' + for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_ARM11 +void h264bsdInterpolateVerHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u32 i, j; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + u8 *ptrC, *ptrV; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth, partHeight+5, partWidth); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* 4 pixels per iteration, interpolate using 5 vertical samples */ + for (i = (partHeight >> 2); i; i--) + { + /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ + for (j = partWidth; j; j--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp2 = clp[tmp2>>5]; + tmp1 += 16; + mb[48] = (u8)tmp2; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp1 = clp[tmp1>>5]; + tmp6 += 16; + mb[32] = (u8)tmp1; + + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp1 = *ptrC; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp5 += 16; + mb[16] = (u8)tmp6; + + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp6 = ptrC[-(i32)width]; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + *mb++ = (u8)tmp5; + ptrC++; + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateVerQuarter + + Functional description: + Function to perform vertical interpolation of pixel position 'd' + or 'n' for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset) /* 0 for pixel d, 1 for pixel n */ +{ + u32 p1[21*21/4+1]; + u32 i, j; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + u8 *ptrC, *ptrV, *ptrInt; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth, partHeight+5, partWidth); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* Pointer to integer sample position, either M or R */ + ptrInt = ptrC + (2+verOffset)*width; + + /* 4 pixels per iteration + * interpolate using 5 vertical samples and average between + * interpolated value and integer sample value */ + for (i = (partHeight >> 2); i; i--) + { + /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ + for (j = partWidth; j; j--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp2 = clp[tmp2>>5]; + tmp7 = ptrInt[width*2]; + tmp1 += 16; + tmp2++; + mb[48] = (u8)((tmp2 + tmp7) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp1 = clp[tmp1>>5]; + tmp7 = ptrInt[width]; + tmp6 += 16; + tmp1++; + mb[32] = (u8)((tmp1 + tmp7) >> 1); + + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp1 = *ptrC; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp7 = *ptrInt; + tmp5 += 16; + tmp6++; + mb[16] = (u8)((tmp6 + tmp7) >> 1); + + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp6 = ptrC[-(i32)width]; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + tmp7 = ptrInt[-(i32)width]; + tmp5++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + ptrC++; + ptrInt++; + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + ptrInt += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorHalf + + Functional description: + Function to perform horizontal interpolation of pixel position 'b' + for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u8 *ptrJ; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + ASSERT((partWidth&0x3) == 0); + ASSERT((partHeight&0x3) == 0); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth + 5; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrJ = ref + 5; + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + /* Second pixel */ + tmp5 += 16; + tmp7 = tmp2 + tmp3; + *mb++ = (u8)tmp6; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + /* Third pixel */ + tmp4 += 16; + tmp7 = tmp1 + tmp2; + *mb++ = (u8)tmp5; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + /* Fourth pixel */ + tmp3 += 16; + tmp7 = tmp6 + tmp1; + *mb++ = (u8)tmp4; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + *mb++ = (u8)tmp3; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorQuarter + + Functional description: + Function to perform horizontal interpolation of pixel position 'a' + or 'c' for a block. Overfilling is done only if needed. Reference + image (ref) is read at correct position and the predicted part + is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset) /* 0 for pixel a, 1 for pixel c */ +{ + u32 p1[21*21/4+1]; + u8 *ptrJ; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth + 5; + } + + ref += (u32)y0 * width + (u32)x0; + + ptrJ = ref + 5; + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + tmp5 += 16; + if (!horOffset) + tmp6 += tmp4; + else + tmp6 += tmp3; + *mb++ = (u8)((tmp6 + 1) >> 1); + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + tmp4 += 16; + if (!horOffset) + tmp5 += tmp3; + else + tmp5 += tmp2; + *mb++ = (u8)((tmp5 + 1) >> 1); + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + tmp3 += 16; + if (!horOffset) + tmp4 += tmp2; + else + tmp4 += tmp1; + *mb++ = (u8)((tmp4 + 1) >> 1); + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + if (!horOffset) + tmp3 += tmp1; + else + tmp3 += tmp6; + *mb++ = (u8)((tmp3 + 1) >> 1); + tmp3 = tmp5; + tmp5 = tmp1; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + +} + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateHorVerQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only + if needed. Reference image (ref) is read at correct position and + the predicted part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateHorVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horVerOffset) /* 0 for pixel e, 1 for pixel g, + 2 for pixel p, 3 for pixel r */ +{ + u32 p1[21*21/4+1]; + u8 *ptrC, *ptrJ, *ptrV; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + /* Ref points to G + (-2, -2) */ + ref += (u32)y0 * width + (u32)x0; + + /* ptrJ points to either J or Q, depending on vertical offset */ + ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5; + + /* ptrC points to either C or D, depending on horizontal offset */ + ptrC = ref + width + 2 + (horVerOffset & 0x1); + + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* Horizontal interpolation, calculate 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp6 += 16; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + tmp6 = clp[tmp6>>5]; + /* Second pixel */ + tmp5 += 16; + tmp7 = tmp2 + tmp3; + *mb++ = (u8)tmp6; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + tmp5 = clp[tmp5>>5]; + /* Third pixel */ + tmp4 += 16; + tmp7 = tmp1 + tmp2; + *mb++ = (u8)tmp5; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + tmp4 = clp[tmp4>>5]; + /* Fourth pixel */ + tmp3 += 16; + tmp7 = tmp6 + tmp1; + *mb++ = (u8)tmp4; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + tmp3 = clp[tmp3>>5]; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + *mb++ = (u8)tmp3; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + mb += 16 - partWidth; + } + + mb -= 16*partHeight; + ptrV = ptrC + 5*width; + + for (y = (partHeight >> 2); y; y--) + { + /* Vertical interpolation and averaging, 4 pels per iteration */ + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 16; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp7 = clp[tmp2>>5]; + tmp2 = mb[48]; + tmp1 += 16; + tmp7++; + mb[48] = (u8)((tmp2 + tmp7) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp7 = clp[tmp1>>5]; + tmp1 = mb[32]; + tmp6 += 16; + tmp7++; + mb[32] = (u8)((tmp1 + tmp7) >> 1); + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp7 = clp[tmp6>>5]; + tmp6 = mb[16]; + tmp5 += 16; + tmp7++; + mb[16] = (u8)((tmp6 + tmp7) >> 1); + + tmp6 = ptrC[-(i32)width]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp7 = clp[tmp5>>5]; + tmp5 = *mb; + tmp7++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + ptrC++; + + } + ptrC += 4*width - partWidth; + ptrV += 4*width - partWidth; + mb += 4*16 - partWidth; + } + +} +#endif + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidHalf + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'j' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidHalf( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight) +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrC, *ptrV, *b1; + u8 *ptrJ; + i32 table[21*16]; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + b1 = table; + ptrJ = ref + 5; + + /* First step: calculate intermediate values for + * horizontal interpolation */ + for (y = partHeight + 5; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + + /* 4 pels per iteration */ + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + *b1++ = tmp6; + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + *b1++ = tmp5; + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + *b1++ = tmp4; + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + *b1++ = tmp3; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + } + + /* Second step: calculate vertical interpolation */ + ptrC = table + partWidth; + ptrV = ptrC + 5*partWidth; + for (y = (partHeight >> 2); y; y--) + { + /* 4 pels per iteration */ + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)partWidth*2]; + tmp5 = ptrV[-(i32)partWidth]; + tmp1 = ptrV[partWidth]; + tmp2 = ptrV[partWidth*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 512; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[partWidth*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + tmp7 = clp[tmp2>>10]; + tmp1 += 512; + mb[48] = (u8)tmp7; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[partWidth]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + tmp7 = clp[tmp1>>10]; + tmp6 += 512; + mb[32] = (u8)tmp7; + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + tmp7 = clp[tmp6>>10]; + tmp5 += 512; + mb[16] = (u8)tmp7; + + tmp6 = ptrC[-(i32)partWidth]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + tmp7 = clp[tmp5>>10]; + *mb++ = (u8)tmp7; + ptrC++; + } + mb += 4*16 - partWidth; + ptrC += 3*partWidth; + ptrV += 3*partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidVerQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'f' or 'q' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidVerQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 verOffset) /* 0 for pixel f, 1 for pixel q */ +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrC, *ptrV, *ptrInt, *b1; + u8 *ptrJ; + i32 table[21*16]; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + b1 = table; + ptrJ = ref + 5; + + /* First step: calculate intermediate values for + * horizontal interpolation */ + for (y = partHeight + 5; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + for (x = (partWidth >> 2); x; x--) + { + /* First pixel */ + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp6 += tmp1; + *b1++ = tmp6; + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp5 += tmp6; + *b1++ = tmp5; + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp4 += tmp5; + *b1++ = tmp4; + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp3 += tmp4; + *b1++ = tmp3; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + tmp3 = tmp5; + tmp5 = tmp1; + } + ptrJ += width - partWidth; + } + + /* Second step: calculate vertical interpolation and average */ + ptrC = table + partWidth; + ptrV = ptrC + 5*partWidth; + /* Pointer to integer sample position, either M or R */ + ptrInt = ptrC + (2+verOffset)*partWidth; + for (y = (partHeight >> 2); y; y--) + { + for (x = partWidth; x; x--) + { + tmp4 = ptrV[-(i32)partWidth*2]; + tmp5 = ptrV[-(i32)partWidth]; + tmp1 = ptrV[partWidth]; + tmp2 = ptrV[partWidth*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp2 += 512; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[partWidth*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp7 = ptrInt[partWidth*2]; + tmp2 += tmp3; + tmp2 = clp[tmp2>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp1 += 512; + tmp2++; + mb[48] = (u8)((tmp7 + tmp2) >> 1); + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[partWidth]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp7 = ptrInt[partWidth]; + tmp1 += tmp2; + tmp1 = clp[tmp1>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp6 += 512; + tmp1++; + mb[32] = (u8)((tmp7 + tmp1) >> 1); + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = *ptrInt; + tmp6 += tmp1; + tmp6 = clp[tmp6>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp5 += 512; + tmp6++; + mb[16] = (u8)((tmp7 + tmp6) >> 1); + + tmp6 = ptrC[-(i32)partWidth]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp7 = ptrInt[-(i32)partWidth]; + tmp5 += tmp6; + tmp5 = clp[tmp5>>10]; + tmp7 += 16; + tmp7 = clp[tmp7>>5]; + tmp5++; + *mb++ = (u8)((tmp7 + tmp5) >> 1); + ptrC++; + ptrInt++; + } + mb += 4*16 - partWidth; + ptrC += 3*partWidth; + ptrV += 3*partWidth; + ptrInt += 3*partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdInterpolateMidHorQuarter + + Functional description: + Function to perform horizontal and vertical interpolation of pixel + position 'i' or 'k' for a block. Overfilling is done only if needed. + Reference image (ref) is read at correct position and the predicted + part is written to macroblock array (mb) + +------------------------------------------------------------------------------*/ + +void h264bsdInterpolateMidHorQuarter( + u8 *ref, + u8 *mb, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 partWidth, + u32 partHeight, + u32 horOffset) /* 0 for pixel i, 1 for pixel k */ +{ + u32 p1[21*21/4+1]; + u32 x, y; + i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + i32 *ptrJ, *ptrInt, *h1; + u8 *ptrC, *ptrV; + i32 table[21*16]; + i32 tableWidth = (i32)partWidth+5; + const u8 *clp = h264bsdClip + 512; + + /* Code */ + + ASSERT(ref); + ASSERT(mb); + + if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || + (y0 < 0) || ((u32)y0+partHeight+5 > height)) + { + h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, + partWidth+5, partHeight+5, partWidth+5); + + x0 = 0; + y0 = 0; + ref = (u8*)p1; + width = partWidth+5; + } + + ref += (u32)y0 * width + (u32)x0; + + h1 = table + tableWidth; + ptrC = ref + width; + ptrV = ptrC + 5*width; + + /* First step: calculate intermediate values for + * vertical interpolation */ + for (y = (partHeight >> 2); y; y--) + { + for (x = (u32)tableWidth; x; x--) + { + tmp4 = ptrV[-(i32)width*2]; + tmp5 = ptrV[-(i32)width]; + tmp1 = ptrV[width]; + tmp2 = ptrV[width*2]; + tmp6 = *ptrV++; + + tmp7 = tmp4 + tmp1; + tmp2 -= (tmp7 << 2); + tmp2 -= tmp7; + tmp7 = tmp5 + tmp6; + tmp3 = ptrC[width*2]; + tmp2 += (tmp7 << 4); + tmp2 += (tmp7 << 2); + tmp2 += tmp3; + h1[tableWidth*2] = tmp2; + + tmp7 = tmp3 + tmp6; + tmp1 -= (tmp7 << 2); + tmp1 -= tmp7; + tmp7 = tmp4 + tmp5; + tmp2 = ptrC[width]; + tmp1 += (tmp7 << 4); + tmp1 += (tmp7 << 2); + tmp1 += tmp2; + h1[tableWidth] = tmp1; + + tmp1 = *ptrC; + tmp7 = tmp2 + tmp5; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = tmp4 + tmp3; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp6 += tmp1; + *h1 = tmp6; + + tmp6 = ptrC[-(i32)width]; + tmp1 += tmp4; + tmp5 -= (tmp1 << 2); + tmp5 -= tmp1; + tmp3 += tmp2; + tmp5 += (tmp3 << 4); + tmp5 += (tmp3 << 2); + tmp5 += tmp6; + h1[-tableWidth] = tmp5; + h1++; + ptrC++; + } + ptrC += 4*width - partWidth - 5; + ptrV += 4*width - partWidth - 5; + h1 += 3*tableWidth; + } + + /* Second step: calculate horizontal interpolation and average */ + ptrJ = table + 5; + /* Pointer to integer sample position, either G or H */ + ptrInt = table + 2 + horOffset; + for (y = partHeight; y; y--) + { + tmp6 = *(ptrJ - 5); + tmp5 = *(ptrJ - 4); + tmp4 = *(ptrJ - 3); + tmp3 = *(ptrJ - 2); + tmp2 = *(ptrJ - 1); + for (x = (partWidth>>2); x; x--) + { + /* First pixel */ + tmp6 += 512; + tmp7 = tmp3 + tmp4; + tmp6 += (tmp7 << 4); + tmp6 += (tmp7 << 2); + tmp7 = tmp2 + tmp5; + tmp1 = *ptrJ++; + tmp6 -= (tmp7 << 2); + tmp6 -= tmp7; + tmp7 = *ptrInt++; + tmp6 += tmp1; + tmp6 = clp[tmp6 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp5 += 512; + tmp6++; + *mb++ = (u8)((tmp6 + tmp7) >> 1); + /* Second pixel */ + tmp7 = tmp2 + tmp3; + tmp5 += (tmp7 << 4); + tmp5 += (tmp7 << 2); + tmp7 = tmp1 + tmp4; + tmp6 = *ptrJ++; + tmp5 -= (tmp7 << 2); + tmp5 -= tmp7; + tmp7 = *ptrInt++; + tmp5 += tmp6; + tmp5 = clp[tmp5 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp4 += 512; + tmp5++; + *mb++ = (u8)((tmp5 + tmp7) >> 1); + /* Third pixel */ + tmp7 = tmp1 + tmp2; + tmp4 += (tmp7 << 4); + tmp4 += (tmp7 << 2); + tmp7 = tmp6 + tmp3; + tmp5 = *ptrJ++; + tmp4 -= (tmp7 << 2); + tmp4 -= tmp7; + tmp7 = *ptrInt++; + tmp4 += tmp5; + tmp4 = clp[tmp4 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp3 += 512; + tmp4++; + *mb++ = (u8)((tmp4 + tmp7) >> 1); + /* Fourth pixel */ + tmp7 = tmp6 + tmp1; + tmp3 += (tmp7 << 4); + tmp3 += (tmp7 << 2); + tmp7 = tmp5 + tmp2; + tmp4 = *ptrJ++; + tmp3 -= (tmp7 << 2); + tmp3 -= tmp7; + tmp7 = *ptrInt++; + tmp3 += tmp4; + tmp3 = clp[tmp3 >> 10]; + tmp7 += 16; + tmp7 = clp[tmp7 >> 5]; + tmp3++; + *mb++ = (u8)((tmp3 + tmp7) >> 1); + tmp3 = tmp5; + tmp5 = tmp1; + tmp7 = tmp4; + tmp4 = tmp6; + tmp6 = tmp2; + tmp2 = tmp7; + } + ptrJ += 5; + ptrInt += 5; + mb += 16 - partWidth; + } + +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdPredictSamples + + Functional description: + This function reconstructs a prediction for a macroblock partition. + The prediction is either copied or interpolated using the reference + frame and the motion vector. Both luminance and chrominance parts are + predicted. The prediction is stored in given macroblock array (data). + Inputs: + data pointer to macroblock array (384 bytes) for output + mv pointer to motion vector used for prediction + refPic pointer to reference picture structure + xA x-coordinate for current macroblock + yA y-coordinate for current macroblock + partX x-offset for partition in macroblock + partY y-offset for partition in macroblock + partWidth width of partition + partHeight height of partition + Outputs: + data macroblock array (16x16+8x8+8x8) where predicted + partition is stored at correct position + +------------------------------------------------------------------------------*/ + +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 xA, + u32 yA, + u32 partX, + u32 partY, + u32 partWidth, + u32 partHeight) + +{ + +/* Variables */ + + u32 xFrac, yFrac, width, height; + i32 xInt, yInt; + u8 *lumaPartData; + +/* Code */ + + ASSERT(data); + ASSERT(mv); + ASSERT(partWidth); + ASSERT(partHeight); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + /* luma */ + lumaPartData = data + 16*partY + partX; + + xFrac = mv->hor & 0x3; + yFrac = mv->ver & 0x3; + + width = 16 * refPic->width; + height = 16 * refPic->height; + + xInt = (i32)xA + (i32)partX + (mv->hor >> 2); + yInt = (i32)yA + (i32)partY + (mv->ver >> 2); + + ASSERT(lumaFracPos[xFrac][yFrac] < 16); + + switch (lumaFracPos[xFrac][yFrac]) + { + case 0: /* G */ + h264bsdFillBlock(refPic->data, lumaPartData, + xInt,yInt,width,height,partWidth,partHeight,16); + break; + case 1: /* d */ + h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 2: /* h */ + h264bsdInterpolateVerHalf(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight); + break; + case 3: /* n */ + h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, + xInt, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 4: /* a */ + h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight, 0); + break; + case 5: /* e */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 6: /* i */ + h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 7: /* p */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 2); + break; + case 8: /* b */ + h264bsdInterpolateHorHalf(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight); + break; + case 9: /* f */ + h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 0); + break; + case 10: /* j */ + h264bsdInterpolateMidHalf(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight); + break; + case 11: /* q */ + h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 12: /* c */ + h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt, width, height, partWidth, partHeight, 1); + break; + case 13: /* g */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + case 14: /* k */ + h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 1); + break; + default: /* case 15, r */ + h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, + xInt-2, yInt-2, width, height, partWidth, partHeight, 3); + break; + } + + /* chroma */ + PredictChroma( + data + 16*16 + (partY>>1)*8 + (partX>>1), + xA + partX, + yA + partY, + partWidth, + partHeight, + mv, + refPic); + +} + +#else /* H264DEC_OMXDL */ +/*------------------------------------------------------------------------------ + + Function: h264bsdPredictSamples + + Functional description: + This function reconstructs a prediction for a macroblock partition. + The prediction is either copied or interpolated using the reference + frame and the motion vector. Both luminance and chrominance parts are + predicted. The prediction is stored in given macroblock array (data). + Inputs: + data pointer to macroblock array (384 bytes) for output + mv pointer to motion vector used for prediction + refPic pointer to reference picture structure + xA x-coordinate for current macroblock + yA y-coordinate for current macroblock + partX x-offset for partition in macroblock + partY y-offset for partition in macroblock + partWidth width of partition + partHeight height of partition + Outputs: + data macroblock array (16x16+8x8+8x8) where predicted + partition is stored at correct position + +------------------------------------------------------------------------------*/ + +/*lint -e{550} Symbol 'res' not accessed */ +void h264bsdPredictSamples( + u8 *data, + mv_t *mv, + image_t *refPic, + u32 colAndRow, + u32 part, + u8 *pFill) + +{ + +/* Variables */ + + u32 xFrac, yFrac; + u32 width, height; + i32 xInt, yInt, x0, y0; + u8 *partData, *ref; + OMXSize roi; + u32 fillWidth; + u32 fillHeight; + OMXResult res; + u32 xA, yA; + u32 partX, partY; + u32 partWidth, partHeight; + +/* Code */ + + ASSERT(data); + ASSERT(mv); + ASSERT(refPic); + ASSERT(refPic->data); + ASSERT(refPic->width); + ASSERT(refPic->height); + + xA = (colAndRow & 0xFFFF0000) >> 16; + yA = (colAndRow & 0x0000FFFF); + + partX = (part & 0xFF000000) >> 24; + partY = (part & 0x00FF0000) >> 16; + partWidth = (part & 0x0000FF00) >> 8; + partHeight = (part & 0x000000FF); + + ASSERT(partWidth); + ASSERT(partHeight); + + /* luma */ + partData = data + 16*partY + partX; + + xFrac = mv->hor & 0x3; + yFrac = mv->ver & 0x3; + + width = 16 * refPic->width; + height = 16 * refPic->height; + + xInt = (i32)xA + (i32)partX + (mv->hor >> 2); + yInt = (i32)yA + (i32)partY + (mv->ver >> 2); + + x0 = (xFrac) ? xInt-2 : xInt; + y0 = (yFrac) ? yInt-2 : yInt; + + if (xFrac) + { + if (partWidth == 16) + fillWidth = 32; + else + fillWidth = 16; + } + else + fillWidth = (partWidth*2); + if (yFrac) + fillHeight = partHeight+5; + else + fillHeight = partHeight; + + + if ((x0 < 0) || ((u32)x0+fillWidth > width) || + (y0 < 0) || ((u32)y0+fillHeight > height)) + { + h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height, + fillWidth, fillHeight, fillWidth); + + x0 = 0; + y0 = 0; + ref = pFill; + width = fillWidth; + if (yFrac) + ref += 2*width; + if (xFrac) + ref += 2; + } + else + { + /*lint --e(737) Loss of sign */ + ref = refPic->data + yInt*width + xInt; + } + /* Luma interpolation */ + roi.width = (i32)partWidth; + roi.height = (i32)partHeight; + + res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16, + (i32)xFrac, (i32)yFrac, roi); + ASSERT(res == 0); + + /* Chroma */ + width = 8 * refPic->width; + height = 8 * refPic->height; + + x0 = ((xA + partX) >> 1) + (mv->hor >> 3); + y0 = ((yA + partY) >> 1) + (mv->ver >> 3); + xFrac = mv->hor & 0x7; + yFrac = mv->ver & 0x7; + + ref = refPic->data + 256 * refPic->width * refPic->height; + + roi.width = (i32)(partWidth >> 1); + fillWidth = ((partWidth >> 1) + 8) & ~0x7; + roi.height = (i32)(partHeight >> 1); + fillHeight = (partHeight >> 1) + 1; + + if ((x0 < 0) || ((u32)x0+fillWidth > width) || + (y0 < 0) || ((u32)y0+fillHeight > height)) + { + h264bsdFillBlock(ref, pFill, x0, y0, width, height, + fillWidth, fillHeight, fillWidth); + ref += width * height; + h264bsdFillBlock(ref, pFill + fillWidth*fillHeight, + x0, y0, width, height, fillWidth, + fillHeight, fillWidth); + + ref = pFill; + x0 = 0; + y0 = 0; + width = fillWidth; + height = fillHeight; + } + + partData = data + 16*16 + (partY>>1)*8 + (partX>>1); + + /* Chroma interpolation */ + /*lint --e(737) Loss of sign */ + ref += y0 * width + x0; + res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, + (u32)roi.width, (u32)roi.height, xFrac, yFrac); + ASSERT(res == 0); + partData += 8 * 8; + ref += height * width; + res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, + (u32)roi.width, (u32)roi.height, xFrac, yFrac); + ASSERT(res == 0); + +} + +#endif /* H264DEC_OMXDL */ + + +/*------------------------------------------------------------------------------ + + Function: FillRow1 + + Functional description: + This function gets a row of reference pels in a 'normal' case when no + overfilling is necessary. + +------------------------------------------------------------------------------*/ + +static void FillRow1( + u8 *ref, + u8 *fill, + i32 left, + i32 center, + i32 right) +{ + + ASSERT(ref); + ASSERT(fill); + + H264SwDecMemcpy(fill, ref, (u32)center); + + /*lint -e(715) */ +} + + +/*------------------------------------------------------------------------------ + + Function: h264bsdFillRow7 + + Functional description: + This function gets a row of reference pels when horizontal coordinate + is partly negative or partly greater than reference picture width + (overfilling some pels on left and/or right edge). + Inputs: + ref pointer to reference samples + left amount of pixels to overfill on left-edge + center amount of pixels to copy + right amount of pixels to overfill on right-edge + Outputs: + fill pointer where samples are stored + +------------------------------------------------------------------------------*/ +#ifndef H264DEC_NEON +void h264bsdFillRow7( + u8 *ref, + u8 *fill, + i32 left, + i32 center, + i32 right) +{ + u8 tmp; + + ASSERT(ref); + ASSERT(fill); + + if (left) + tmp = *ref; + + for ( ; left; left--) + /*lint -esym(644,tmp) tmp is initialized if used */ + *fill++ = tmp; + + for ( ; center; center--) + *fill++ = *ref++; + + if (right) + tmp = ref[-1]; + + for ( ; right; right--) + /*lint -esym(644,tmp) tmp is initialized if used */ + *fill++ = tmp; +} +#endif +/*------------------------------------------------------------------------------ + + Function: h264bsdFillBlock + + Functional description: + This function gets a block of reference pels. It determines whether + overfilling is needed or not and repeatedly calls an appropriate + function (by using a function pointer) that fills one row the block. + Inputs: + ref pointer to reference frame + x0 x-coordinate for block + y0 y-coordinate for block + width width of reference frame + height height of reference frame + blockWidth width of block + blockHeight height of block + fillScanLength length of a line in output array (pixels) + Outputs: + fill pointer to array where output block is written + +------------------------------------------------------------------------------*/ + +void h264bsdFillBlock( + u8 *ref, + u8 *fill, + i32 x0, + i32 y0, + u32 width, + u32 height, + u32 blockWidth, + u32 blockHeight, + u32 fillScanLength) + +{ + +/* Variables */ + + i32 xstop, ystop; + void (*fp)(u8*, u8*, i32, i32, i32); + i32 left, x, right; + i32 top, y, bottom; + +/* Code */ + + ASSERT(ref); + ASSERT(fill); + ASSERT(width); + ASSERT(height); + ASSERT(fill); + ASSERT(blockWidth); + ASSERT(blockHeight); + + xstop = x0 + (i32)blockWidth; + ystop = y0 + (i32)blockHeight; + + /* Choose correct function whether overfilling on left-edge or right-edge + * is needed or not */ + if (x0 >= 0 && xstop <= (i32)width) + fp = FillRow1; + else + fp = h264bsdFillRow7; + + if (ystop < 0) + y0 = -(i32)blockHeight; + + if (xstop < 0) + x0 = -(i32)blockWidth; + + if (y0 > (i32)height) + y0 = (i32)height; + + if (x0 > (i32)width) + x0 = (i32)width; + + xstop = x0 + (i32)blockWidth; + ystop = y0 + (i32)blockHeight; + + if (x0 > 0) + ref += x0; + + if (y0 > 0) + ref += y0 * (i32)width; + + left = x0 < 0 ? -x0 : 0; + right = xstop > (i32)width ? xstop - (i32)width : 0; + x = (i32)blockWidth - left - right; + + top = y0 < 0 ? -y0 : 0; + bottom = ystop > (i32)height ? ystop - (i32)height : 0; + y = (i32)blockHeight - top - bottom; + + /* Top-overfilling */ + for ( ; top; top-- ) + { + (*fp)(ref, fill, left, x, right); + fill += fillScanLength; + } + + /* Lines inside reference image */ + for ( ; y; y-- ) + { + (*fp)(ref, fill, left, x, right); + ref += width; + fill += fillScanLength; + } + + ref -= width; + + /* Bottom-overfilling */ + for ( ; bottom; bottom-- ) + { + (*fp)(ref, fill, left, x, right); + fill += fillScanLength; + } +} + +/*lint +e701 +e702 */ + + |