summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/avc/dec/src/pred_inter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/avc/dec/src/pred_inter.cpp')
-rw-r--r--media/libstagefright/codecs/avc/dec/src/pred_inter.cpp2329
1 files changed, 0 insertions, 2329 deletions
diff --git a/media/libstagefright/codecs/avc/dec/src/pred_inter.cpp b/media/libstagefright/codecs/avc/dec/src/pred_inter.cpp
deleted file mode 100644
index ba36c37..0000000
--- a/media/libstagefright/codecs/avc/dec/src/pred_inter.cpp
+++ /dev/null
@@ -1,2329 +0,0 @@
-/* ------------------------------------------------------------------
- * Copyright (C) 1998-2009 PacketVideo
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- * -------------------------------------------------------------------
- */
-#include "avcdec_lib.h"
-
-
-#define CLIP_RESULT(x) if((uint)x > 0xFF){ \
- x = 0xFF & (~(x>>31));}
-
-/* (blkwidth << 2) + (dy << 1) + dx */
-static void (*const ChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) =
-{
- &ChromaFullMC_SIMD,
- &ChromaHorizontalMC_SIMD,
- &ChromaVerticalMC_SIMD,
- &ChromaDiagonalMC_SIMD,
- &ChromaFullMC_SIMD,
- &ChromaHorizontalMC2_SIMD,
- &ChromaVerticalMC2_SIMD,
- &ChromaDiagonalMC2_SIMD
-};
-/* Perform motion prediction and compensation with residue if exist. */
-void InterMBPrediction(AVCCommonObj *video)
-{
- AVCMacroblock *currMB = video->currMB;
- AVCPictureData *currPic = video->currPic;
- int mbPartIdx, subMbPartIdx;
- int ref_idx;
- int offset_MbPart_indx = 0;
- int16 *mv;
- uint32 x_pos, y_pos;
- uint8 *curL, *curCb, *curCr;
- uint8 *ref_l, *ref_Cb, *ref_Cr;
- uint8 *predBlock, *predCb, *predCr;
- int block_x, block_y, offset_x, offset_y, offsetP, offset;
- int x_position = (video->mb_x << 4);
- int y_position = (video->mb_y << 4);
- int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx;
- int picWidth = currPic->pitch;
- int picHeight = currPic->height;
- int16 *dataBlock;
- uint32 cbp4x4;
- uint32 tmp_word;
-
- tmp_word = y_position * picWidth;
- curL = currPic->Sl + tmp_word + x_position;
- offset = (tmp_word >> 2) + (x_position >> 1);
- curCb = currPic->Scb + offset;
- curCr = currPic->Scr + offset;
-
-#ifdef USE_PRED_BLOCK
- predBlock = video->pred + 84;
- predCb = video->pred + 452;
- predCr = video->pred + 596;
-#else
- predBlock = curL;
- predCb = curCb;
- predCr = curCr;
-#endif
-
- GetMotionVectorPredictor(video, false);
-
- for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
- {
- MbHeight = currMB->SubMbPartHeight[mbPartIdx];
- MbWidth = currMB->SubMbPartWidth[mbPartIdx];
- mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1);
- mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1;
- ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X];
- offset_indx = 0;
-
- ref_l = video->RefPicList0[ref_idx]->Sl;
- ref_Cb = video->RefPicList0[ref_idx]->Scb;
- ref_Cr = video->RefPicList0[ref_idx]->Scr;
-
- for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
- {
- block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1); // check this
- block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1);
- mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2));
- offset_x = x_position + (block_x << 2);
- offset_y = y_position + (block_y << 2);
- x_pos = (offset_x << 2) + *mv++; /*quarter pel */
- y_pos = (offset_y << 2) + *mv; /*quarter pel */
-
- //offset = offset_y * currPic->width;
- //offsetC = (offset >> 2) + (offset_x >> 1);
-#ifdef USE_PRED_BLOCK
- offsetP = (block_y * 80) + (block_x << 2);
- LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos,
- /*comp_Sl + offset + offset_x,*/
- predBlock + offsetP, 20, MbWidth, MbHeight);
-#else
- offsetP = (block_y << 2) * picWidth + (block_x << 2);
- LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos,
- /*comp_Sl + offset + offset_x,*/
- predBlock + offsetP, picWidth, MbWidth, MbHeight);
-#endif
-
-#ifdef USE_PRED_BLOCK
- offsetP = (block_y * 24) + (block_x << 1);
- ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
- /*comp_Scb + offsetC,*/
- predCb + offsetP, 12, MbWidth >> 1, MbHeight >> 1);
- ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
- /*comp_Scr + offsetC,*/
- predCr + offsetP, 12, MbWidth >> 1, MbHeight >> 1);
-#else
- offsetP = (block_y * picWidth) + (block_x << 1);
- ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
- /*comp_Scb + offsetC,*/
- predCb + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1);
- ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
- /*comp_Scr + offsetC,*/
- predCr + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1);
-#endif
-
- offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3;
- }
- offset_MbPart_indx = currMB->MbPartWidth >> 4;
- }
-
- /* used in decoder, used to be if(!encFlag) */
-
- /* transform in raster scan order */
- dataBlock = video->block;
- cbp4x4 = video->cbp4x4;
- /* luma */
- for (block_y = 4; block_y > 0; block_y--)
- {
- for (block_x = 4; block_x > 0; block_x--)
- {
-#ifdef USE_PRED_BLOCK
- if (cbp4x4&1)
- {
- itrans(dataBlock, predBlock, predBlock, 20);
- }
-#else
- if (cbp4x4&1)
- {
- itrans(dataBlock, curL, curL, picWidth);
- }
-#endif
- cbp4x4 >>= 1;
- dataBlock += 4;
-#ifdef USE_PRED_BLOCK
- predBlock += 4;
-#else
- curL += 4;
-#endif
- }
- dataBlock += 48;
-#ifdef USE_PRED_BLOCK
- predBlock += 64;
-#else
- curL += ((picWidth << 2) - 16);
-#endif
- }
-
- /* chroma */
- picWidth = (picWidth >> 1);
- for (block_y = 2; block_y > 0; block_y--)
- {
- for (block_x = 2; block_x > 0; block_x--)
- {
-#ifdef USE_PRED_BLOCK
- if (cbp4x4&1)
- {
- ictrans(dataBlock, predCb, predCb, 12);
- }
-#else
- if (cbp4x4&1)
- {
- ictrans(dataBlock, curCb, curCb, picWidth);
- }
-#endif
- cbp4x4 >>= 1;
- dataBlock += 4;
-#ifdef USE_PRED_BLOCK
- predCb += 4;
-#else
- curCb += 4;
-#endif
- }
- for (block_x = 2; block_x > 0; block_x--)
- {
-#ifdef USE_PRED_BLOCK
- if (cbp4x4&1)
- {
- ictrans(dataBlock, predCr, predCr, 12);
- }
-#else
- if (cbp4x4&1)
- {
- ictrans(dataBlock, curCr, curCr, picWidth);
- }
-#endif
- cbp4x4 >>= 1;
- dataBlock += 4;
-#ifdef USE_PRED_BLOCK
- predCr += 4;
-#else
- curCr += 4;
-#endif
- }
- dataBlock += 48;
-#ifdef USE_PRED_BLOCK
- predCb += 40;
- predCr += 40;
-#else
- curCb += ((picWidth << 2) - 8);
- curCr += ((picWidth << 2) - 8);
-#endif
- }
-
-#ifdef MB_BASED_DEBLOCK
- SaveNeighborForIntraPred(video, offset);
-#endif
-
- return ;
-}
-
-
-/* preform the actual motion comp here */
-void LumaMotionComp(uint8 *ref, int picwidth, int picheight,
- int x_pos, int y_pos,
- uint8 *pred, int pred_pitch,
- int blkwidth, int blkheight)
-{
- int dx, dy;
- uint8 temp[24][24]; /* for padding, make the size multiple of 4 for packing */
- int temp2[21][21]; /* for intermediate results */
- uint8 *ref2;
-
- dx = x_pos & 3;
- dy = y_pos & 3;
- x_pos = x_pos >> 2; /* round it to full-pel resolution */
- y_pos = y_pos >> 2;
-
- /* perform actual motion compensation */
- if (dx == 0 && dy == 0)
- { /* fullpel position *//* G */
- if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight)
- {
- ref += y_pos * picwidth + x_pos;
- FullPelMC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight);
- }
- else
- {
- CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth, blkheight);
- FullPelMC(&temp[0][0], 24, pred, pred_pitch, blkwidth, blkheight);
- }
-
- } /* other positions */
- else if (dy == 0)
- { /* no vertical interpolation *//* a,b,c*/
-
- if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight)
- {
- ref += y_pos * picwidth + x_pos;
-
- HorzInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dx);
- }
- else /* need padding */
- {
- CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos, &temp[0][0], blkwidth + 5, blkheight);
-
- HorzInterp1MC(&temp[0][2], 24, pred, pred_pitch, blkwidth, blkheight, dx);
- }
- }
- else if (dx == 0)
- { /*no horizontal interpolation *//* d,h,n */
-
- if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
- {
- ref += y_pos * picwidth + x_pos;
-
- VertInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dy);
- }
- else /* need padding */
- {
- CreatePad(ref, picwidth, picheight, x_pos, y_pos - 2, &temp[0][0], blkwidth, blkheight + 5);
-
- VertInterp1MC(&temp[2][0], 24, pred, pred_pitch, blkwidth, blkheight, dy);
- }
- }
- else if (dy == 2)
- { /* horizontal cross *//* i, j, k */
-
- if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
- {
- ref += y_pos * picwidth + x_pos - 2; /* move to the left 2 pixels */
-
- VertInterp2MC(ref, picwidth, &temp2[0][0], 21, blkwidth + 5, blkheight);
-
- HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
- }
- else /* need padding */
- {
- CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5);
-
- VertInterp2MC(&temp[2][0], 24, &temp2[0][0], 21, blkwidth + 5, blkheight);
-
- HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
- }
- }
- else if (dx == 2)
- { /* vertical cross */ /* f,q */
-
- if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
- {
- ref += (y_pos - 2) * picwidth + x_pos; /* move to up 2 lines */
-
- HorzInterp3MC(ref, picwidth, &temp2[0][0], 21, blkwidth, blkheight + 5);
- VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
- }
- else /* need padding */
- {
- CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5);
- HorzInterp3MC(&temp[0][2], 24, &temp2[0][0], 21, blkwidth, blkheight + 5);
- VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
- }
- }
- else
- { /* diagonal *//* e,g,p,r */
-
- if (x_pos - 2 >= 0 && x_pos + 3 + (dx / 2) + blkwidth <= picwidth &&
- y_pos - 2 >= 0 && y_pos + 3 + blkheight + (dy / 2) <= picheight)
- {
- ref2 = ref + (y_pos + (dy / 2)) * picwidth + x_pos;
-
- ref += (y_pos * picwidth) + x_pos + (dx / 2);
-
- DiagonalInterpMC(ref2, ref, picwidth, pred, pred_pitch, blkwidth, blkheight);
- }
- else /* need padding */
- {
- CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5 + (dx / 2), blkheight + 5 + (dy / 2));
-
- ref2 = &temp[2 + (dy/2)][2];
-
- ref = &temp[2][2 + (dx/2)];
-
- DiagonalInterpMC(ref2, ref, 24, pred, pred_pitch, blkwidth, blkheight);
- }
- }
-
- return ;
-}
-
-void CreateAlign(uint8 *ref, int picwidth, int y_pos,
- uint8 *out, int blkwidth, int blkheight)
-{
- int i, j;
- int offset, out_offset;
- uint32 prev_pix, result, pix1, pix2, pix4;
-
- out_offset = 24 - blkwidth;
-
- //switch(x_pos&0x3){
- switch (((uint32)ref)&0x3)
- {
- case 1:
- ref += y_pos * picwidth;
- offset = picwidth - blkwidth - 3;
- for (j = 0; j < blkheight; j++)
- {
- pix1 = *ref++;
- pix2 = *((uint16*)ref);
- ref += 2;
- result = (pix2 << 8) | pix1;
-
- for (i = 3; i < blkwidth; i += 4)
- {
- pix4 = *((uint32*)ref);
- ref += 4;
- prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */
- result |= prev_pix;
- *((uint32*)out) = result; /* write 4 bytes */
- out += 4;
- result = pix4 >> 8; /* for the next loop */
- }
- ref += offset;
- out += out_offset;
- }
- break;
- case 2:
- ref += y_pos * picwidth;
- offset = picwidth - blkwidth - 2;
- for (j = 0; j < blkheight; j++)
- {
- result = *((uint16*)ref);
- ref += 2;
- for (i = 2; i < blkwidth; i += 4)
- {
- pix4 = *((uint32*)ref);
- ref += 4;
- prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */
- result |= prev_pix;
- *((uint32*)out) = result; /* write 4 bytes */
- out += 4;
- result = pix4 >> 16; /* for the next loop */
- }
- ref += offset;
- out += out_offset;
- }
- break;
- case 3:
- ref += y_pos * picwidth;
- offset = picwidth - blkwidth - 1;
- for (j = 0; j < blkheight; j++)
- {
- result = *ref++;
- for (i = 1; i < blkwidth; i += 4)
- {
- pix4 = *((uint32*)ref);
- ref += 4;
- prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */
- result |= prev_pix;
- *((uint32*)out) = result; /* write 4 bytes */
- out += 4;
- result = pix4 >> 24; /* for the next loop */
- }
- ref += offset;
- out += out_offset;
- }
- break;
- }
-}
-
-void CreatePad(uint8 *ref, int picwidth, int picheight, int x_pos, int y_pos,
- uint8 *out, int blkwidth, int blkheight)
-{
- int x_inc0, x_mid;
- int y_inc, y_inc0, y_inc1, y_mid;
- int i, j;
- int offset;
-
- if (x_pos < 0)
- {
- x_inc0 = 0; /* increment for the first part */
- x_mid = ((blkwidth + x_pos > 0) ? -x_pos : blkwidth); /* stopping point */
- x_pos = 0;
- }
- else if (x_pos + blkwidth > picwidth)
- {
- x_inc0 = 1; /* increasing */
- x_mid = ((picwidth > x_pos) ? picwidth - x_pos - 1 : 0); /* clip negative to zero, encode fool proof! */
- }
- else /* normal case */
- {
- x_inc0 = 1;
- x_mid = blkwidth; /* just one run */
- }
-
-
- /* boundary for y_pos, taking the result from x_pos into account */
- if (y_pos < 0)
- {
- y_inc0 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* offset depending on x_inc1 and x_inc0 */
- y_inc1 = picwidth + y_inc0;
- y_mid = ((blkheight + y_pos > 0) ? -y_pos : blkheight); /* clip to prevent memory corruption */
- y_pos = 0;
- }
- else if (y_pos + blkheight > picheight)
- {
- y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* saturate */
- y_inc0 = picwidth + y_inc1; /* increasing */
- y_mid = ((picheight > y_pos) ? picheight - 1 - y_pos : 0);
- }
- else /* normal case */
- {
- y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid);
- y_inc0 = picwidth + y_inc1;
- y_mid = blkheight;
- }
-
- /* clip y_pos and x_pos */
- if (y_pos > picheight - 1) y_pos = picheight - 1;
- if (x_pos > picwidth - 1) x_pos = picwidth - 1;
-
- ref += y_pos * picwidth + x_pos;
-
- y_inc = y_inc0; /* start with top half */
-
- offset = 24 - blkwidth; /* to use in offset out */
- blkwidth -= x_mid; /* to use in the loop limit */
-
- if (x_inc0 == 0)
- {
- for (j = 0; j < blkheight; j++)
- {
- if (j == y_mid) /* put a check here to reduce the code size (for unrolling the loop) */
- {
- y_inc = y_inc1; /* switch to lower half */
- }
- for (i = x_mid; i > 0; i--) /* first or third quarter */
- {
- *out++ = *ref;
- }
- for (i = blkwidth; i > 0; i--) /* second or fourth quarter */
- {
- *out++ = *ref++;
- }
- out += offset;
- ref += y_inc;
- }
- }
- else
- {
- for (j = 0; j < blkheight; j++)
- {
- if (j == y_mid) /* put a check here to reduce the code size (for unrolling the loop) */
- {
- y_inc = y_inc1; /* switch to lower half */
- }
- for (i = x_mid; i > 0; i--) /* first or third quarter */
- {
- *out++ = *ref++;
- }
- for (i = blkwidth; i > 0; i--) /* second or fourth quarter */
- {
- *out++ = *ref;
- }
- out += offset;
- ref += y_inc;
- }
- }
-
- return ;
-}
-
-void HorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
- int blkwidth, int blkheight, int dx)
-{
- uint8 *p_ref;
- uint32 *p_cur;
- uint32 tmp, pkres;
- int result, curr_offset, ref_offset;
- int j;
- int32 r0, r1, r2, r3, r4, r5;
- int32 r13, r6;
-
- p_cur = (uint32*)out; /* assume it's word aligned */
- curr_offset = (outpitch - blkwidth) >> 2;
- p_ref = in;
- ref_offset = inpitch - blkwidth;
-
- if (dx&1)
- {
- dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
- p_ref -= 2;
- r13 = 0;
- for (j = blkheight; j > 0; j--)
- {
- tmp = (uint32)(p_ref + blkwidth);
- r0 = p_ref[0];
- r1 = p_ref[2];
- r0 |= (r1 << 16); /* 0,c,0,a */
- r1 = p_ref[1];
- r2 = p_ref[3];
- r1 |= (r2 << 16); /* 0,d,0,b */
- while ((uint32)p_ref < tmp)
- {
- r2 = *(p_ref += 4); /* move pointer to e */
- r3 = p_ref[2];
- r2 |= (r3 << 16); /* 0,g,0,e */
- r3 = p_ref[1];
- r4 = p_ref[3];
- r3 |= (r4 << 16); /* 0,h,0,f */
-
- r4 = r0 + r3; /* c+h, a+f */
- r5 = r0 + r1; /* c+d, a+b */
- r6 = r2 + r3; /* g+h, e+f */
- r5 >>= 16;
- r5 |= (r6 << 16); /* e+f, c+d */
- r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
- r4 += 0x100010; /* +16, +16 */
- r5 = r1 + r2; /* d+g, b+e */
- r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
- r4 >>= 5;
- r13 |= r4; /* check clipping */
-
- r5 = p_ref[dx+2];
- r6 = p_ref[dx+4];
- r5 |= (r6 << 16);
- r4 += r5;
- r4 += 0x10001;
- r4 = (r4 >> 1) & 0xFF00FF;
-
- r5 = p_ref[4]; /* i */
- r6 = (r5 << 16);
- r5 = r6 | (r2 >> 16);/* 0,i,0,g */
- r5 += r1; /* d+i, b+g */ /* r5 not free */
- r1 >>= 16;
- r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
- r1 += r2; /* f+g, d+e */
- r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
- r0 >>= 16;
- r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
- r0 += r3; /* e+h, c+f */
- r5 += 0x100010; /* 16,16 */
- r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
- r5 >>= 5;
- r13 |= r5; /* check clipping */
-
- r0 = p_ref[dx+3];
- r1 = p_ref[dx+5];
- r0 |= (r1 << 16);
- r5 += r0;
- r5 += 0x10001;
- r5 = (r5 >> 1) & 0xFF00FF;
-
- r4 |= (r5 << 8); /* pack them together */
- *p_cur++ = r4;
- r1 = r3;
- r0 = r2;
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
-
- if (r13&0xFF000700) /* need clipping */
- {
- /* move back to the beginning of the line */
- p_ref -= (ref_offset + blkwidth); /* input */
- p_cur -= (outpitch >> 2);
-
- tmp = (uint32)(p_ref + blkwidth);
- for (; (uint32)p_ref < tmp;)
- {
-
- r0 = *p_ref++;
- r1 = *p_ref++;
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dx] + 1);
- pkres = (result >> 1) ;
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dx] + 1);
- result = (result >> 1);
- pkres |= (result << 8);
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dx] + 1);
- result = (result >> 1);
- pkres |= (result << 16);
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dx] + 1);
- result = (result >> 1);
- pkres |= (result << 24);
- *p_cur++ = pkres; /* write 4 pixels */
- p_ref -= 5; /* offset back to the middle of filter */
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset; /* move to the next line */
- }
- }
- }
- else
- {
- p_ref -= 2;
- r13 = 0;
- for (j = blkheight; j > 0; j--)
- {
- tmp = (uint32)(p_ref + blkwidth);
- r0 = p_ref[0];
- r1 = p_ref[2];
- r0 |= (r1 << 16); /* 0,c,0,a */
- r1 = p_ref[1];
- r2 = p_ref[3];
- r1 |= (r2 << 16); /* 0,d,0,b */
- while ((uint32)p_ref < tmp)
- {
- r2 = *(p_ref += 4); /* move pointer to e */
- r3 = p_ref[2];
- r2 |= (r3 << 16); /* 0,g,0,e */
- r3 = p_ref[1];
- r4 = p_ref[3];
- r3 |= (r4 << 16); /* 0,h,0,f */
-
- r4 = r0 + r3; /* c+h, a+f */
- r5 = r0 + r1; /* c+d, a+b */
- r6 = r2 + r3; /* g+h, e+f */
- r5 >>= 16;
- r5 |= (r6 << 16); /* e+f, c+d */
- r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
- r4 += 0x100010; /* +16, +16 */
- r5 = r1 + r2; /* d+g, b+e */
- r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
- r4 >>= 5;
- r13 |= r4; /* check clipping */
- r4 &= 0xFF00FF; /* mask */
-
- r5 = p_ref[4]; /* i */
- r6 = (r5 << 16);
- r5 = r6 | (r2 >> 16);/* 0,i,0,g */
- r5 += r1; /* d+i, b+g */ /* r5 not free */
- r1 >>= 16;
- r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
- r1 += r2; /* f+g, d+e */
- r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
- r0 >>= 16;
- r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
- r0 += r3; /* e+h, c+f */
- r5 += 0x100010; /* 16,16 */
- r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
- r5 >>= 5;
- r13 |= r5; /* check clipping */
- r5 &= 0xFF00FF; /* mask */
-
- r4 |= (r5 << 8); /* pack them together */
- *p_cur++ = r4;
- r1 = r3;
- r0 = r2;
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
-
- if (r13&0xFF000700) /* need clipping */
- {
- /* move back to the beginning of the line */
- p_ref -= (ref_offset + blkwidth); /* input */
- p_cur -= (outpitch >> 2);
-
- tmp = (uint32)(p_ref + blkwidth);
- for (; (uint32)p_ref < tmp;)
- {
-
- r0 = *p_ref++;
- r1 = *p_ref++;
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres = result;
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 8);
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 16);
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 24);
- *p_cur++ = pkres; /* write 4 pixels */
- p_ref -= 5;
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset;
- }
- }
- }
-
- return ;
-}
-
-void HorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
- int blkwidth, int blkheight, int dx)
-{
- int *p_ref;
- uint32 *p_cur;
- uint32 tmp, pkres;
- int result, result2, curr_offset, ref_offset;
- int j, r0, r1, r2, r3, r4, r5;
-
- p_cur = (uint32*)out; /* assume it's word aligned */
- curr_offset = (outpitch - blkwidth) >> 2;
- p_ref = in;
- ref_offset = inpitch - blkwidth;
-
- if (dx&1)
- {
- dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
-
- for (j = blkheight; j > 0 ; j--)
- {
- tmp = (uint32)(p_ref + blkwidth);
- for (; (uint32)p_ref < tmp;)
- {
-
- r0 = p_ref[-2];
- r1 = p_ref[-1];
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dx] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- pkres = (result >> 1);
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dx] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- pkres |= (result << 8);
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dx] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- pkres |= (result << 16);
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dx] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- pkres |= (result << 24);
- *p_cur++ = pkres; /* write 4 pixels */
- p_ref -= 3; /* offset back to the middle of filter */
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset; /* move to the next line */
- }
- }
- else
- {
- for (j = blkheight; j > 0 ; j--)
- {
- tmp = (uint32)(p_ref + blkwidth);
- for (; (uint32)p_ref < tmp;)
- {
-
- r0 = p_ref[-2];
- r1 = p_ref[-1];
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- pkres = result;
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- pkres |= (result << 8);
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- pkres |= (result << 16);
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- pkres |= (result << 24);
- *p_cur++ = pkres; /* write 4 pixels */
- p_ref -= 3; /* offset back to the middle of filter */
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset; /* move to the next line */
- }
- }
-
- return ;
-}
-
-void HorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
- int blkwidth, int blkheight)
-{
- uint8 *p_ref;
- int *p_cur;
- uint32 tmp;
- int result, curr_offset, ref_offset;
- int j, r0, r1, r2, r3, r4, r5;
-
- p_cur = out;
- curr_offset = (outpitch - blkwidth);
- p_ref = in;
- ref_offset = inpitch - blkwidth;
-
- for (j = blkheight; j > 0 ; j--)
- {
- tmp = (uint32)(p_ref + blkwidth);
- for (; (uint32)p_ref < tmp;)
- {
-
- r0 = p_ref[-2];
- r1 = p_ref[-1];
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- *p_cur++ = result;
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- *p_cur++ = result;
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- *p_cur++ = result;
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- *p_cur++ = result;
- p_ref -= 3; /* move back to the middle of the filter */
- }
- p_cur += curr_offset; /* move to the next line */
- p_ref += ref_offset;
- }
-
- return ;
-}
-void VertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
- int blkwidth, int blkheight, int dy)
-{
- uint8 *p_cur, *p_ref;
- uint32 tmp;
- int result, curr_offset, ref_offset;
- int j, i;
- int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13;
- uint8 tmp_in[24][24];
-
- /* not word-aligned */
- if (((uint32)in)&0x3)
- {
- CreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
- in = &tmp_in[2][0];
- inpitch = 24;
- }
- p_cur = out;
- curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
- ref_offset = blkheight * inpitch; /* for limit */
-
- curr_offset += 3;
-
- if (dy&1)
- {
- dy = (dy >> 1) ? 0 : -inpitch;
-
- for (j = 0; j < blkwidth; j += 4, in += 4)
- {
- r13 = 0;
- p_ref = in;
- p_cur -= outpitch; /* compensate for the first offset */
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp) /* the loop un-rolled */
- {
- r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
- p_ref += inpitch;
- r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
- r0 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
-
- r0 += r1;
- r6 += r7;
-
- r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 += 20 * r1;
- r6 += 20 * r7;
- r0 += 0x100010;
- r6 += 0x100010;
-
- r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 -= 5 * r1;
- r6 -= 5 * r7;
-
- r0 >>= 5;
- r6 >>= 5;
- /* clip */
- r13 |= r6;
- r13 |= r0;
- //CLIPPACK(r6,result)
-
- r1 = *((uint32*)(p_ref + dy));
- r2 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r0 += r1;
- r6 += r2;
- r0 += 0x10001;
- r6 += 0x10001;
- r0 = (r0 >> 1) & 0xFF00FF;
- r6 = (r6 >> 1) & 0xFF00FF;
-
- r0 |= (r6 << 8); /* pack it back */
- *((uint32*)(p_cur += outpitch)) = r0;
- }
- p_cur += curr_offset; /* offset to the next pixel */
- if (r13 & 0xFF000700) /* this column need clipping */
- {
- p_cur -= 4;
- for (i = 0; i < 4; i++)
- {
- p_ref = in + i;
- p_cur -= outpitch; /* compensate for the first offset */
-
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp)
- { /* loop un-rolled */
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dy-(inpitch<<1)] + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dy-(inpitch<<1)] + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dy-(inpitch<<1)] + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- /* 3/4 pel, no need to clip */
- result = (result + p_ref[dy-(inpitch<<1)] + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += (curr_offset - 3);
- }
- }
- }
- }
- else
- {
- for (j = 0; j < blkwidth; j += 4, in += 4)
- {
- r13 = 0;
- p_ref = in;
- p_cur -= outpitch; /* compensate for the first offset */
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp) /* the loop un-rolled */
- {
- r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
- p_ref += inpitch;
- r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
- r0 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
-
- r0 += r1;
- r6 += r7;
-
- r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 += 20 * r1;
- r6 += 20 * r7;
- r0 += 0x100010;
- r6 += 0x100010;
-
- r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 -= 5 * r1;
- r6 -= 5 * r7;
-
- r0 >>= 5;
- r6 >>= 5;
- /* clip */
- r13 |= r6;
- r13 |= r0;
- //CLIPPACK(r6,result)
- r0 &= 0xFF00FF;
- r6 &= 0xFF00FF;
- r0 |= (r6 << 8); /* pack it back */
- *((uint32*)(p_cur += outpitch)) = r0;
- }
- p_cur += curr_offset; /* offset to the next pixel */
- if (r13 & 0xFF000700) /* this column need clipping */
- {
- p_cur -= 4;
- for (i = 0; i < 4; i++)
- {
- p_ref = in + i;
- p_cur -= outpitch; /* compensate for the first offset */
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp)
- { /* loop un-rolled */
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += (curr_offset - 3);
- }
- }
- }
- }
-
- return ;
-}
-
-void VertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
- int blkwidth, int blkheight)
-{
- int *p_cur;
- uint8 *p_ref;
- uint32 tmp;
- int result, curr_offset, ref_offset;
- int j, r0, r1, r2, r3, r4, r5;
-
- p_cur = out;
- curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
- ref_offset = blkheight * inpitch; /* for limit */
-
- for (j = 0; j < blkwidth; j++)
- {
- p_cur -= outpitch; /* compensate for the first offset */
- p_ref = in++;
-
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp)
- { /* loop un-rolled */
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += curr_offset;
- }
-
- return ;
-}
-
-void VertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
- int blkwidth, int blkheight, int dy)
-{
- uint8 *p_cur;
- int *p_ref;
- uint32 tmp;
- int result, result2, curr_offset, ref_offset;
- int j, r0, r1, r2, r3, r4, r5;
-
- p_cur = out;
- curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
- ref_offset = blkheight * inpitch; /* for limit */
-
- if (dy&1)
- {
- dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch;
-
- for (j = 0; j < blkwidth; j++)
- {
- p_cur -= outpitch; /* compensate for the first offset */
- p_ref = in++;
-
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp)
- { /* loop un-rolled */
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dy] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dy] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dy] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- result2 = ((p_ref[dy] + 16) >> 5);
- CLIP_RESULT(result2)
- /* 3/4 pel, no need to clip */
- result = (result + result2 + 1);
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += curr_offset;
- }
- }
- else
- {
- for (j = 0; j < blkwidth; j++)
- {
- p_cur -= outpitch; /* compensate for the first offset */
- p_ref = in++;
-
- tmp = (uint32)(p_ref + ref_offset); /* limit */
- while ((uint32)p_ref < tmp)
- { /* loop un-rolled */
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 512) >> 10;
- CLIP_RESULT(result)
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += curr_offset;
- }
- }
-
- return ;
-}
-
-void DiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
- uint8 *out, int outpitch,
- int blkwidth, int blkheight)
-{
- int j, i;
- int result;
- uint8 *p_cur, *p_ref, *p_tmp8;
- int curr_offset, ref_offset;
- uint8 tmp_res[24][24], tmp_in[24][24];
- uint32 *p_tmp;
- uint32 tmp, pkres, tmp_result;
- int32 r0, r1, r2, r3, r4, r5;
- int32 r6, r7, r8, r9, r10, r13;
-
- ref_offset = inpitch - blkwidth;
- p_ref = in1 - 2;
- /* perform horizontal interpolation */
- /* not word-aligned */
- /* It is faster to read 1 byte at time to avoid calling CreateAlign */
- /* if(((uint32)p_ref)&0x3)
- {
- CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight);
- p_ref = &tmp_in[0][0];
- ref_offset = 24-blkwidth;
- }*/
-
- p_tmp = (uint32*) & (tmp_res[0][0]);
- for (j = blkheight; j > 0; j--)
- {
- r13 = 0;
- tmp = (uint32)(p_ref + blkwidth);
-
- //r0 = *((uint32*)p_ref); /* d,c,b,a */
- //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */
- //r0 &= 0xFF00FF; /* 0,c,0,a */
- /* It is faster to read 1 byte at a time, */
- r0 = p_ref[0];
- r1 = p_ref[2];
- r0 |= (r1 << 16); /* 0,c,0,a */
- r1 = p_ref[1];
- r2 = p_ref[3];
- r1 |= (r2 << 16); /* 0,d,0,b */
-
- while ((uint32)p_ref < tmp)
- {
- //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */
- //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */
- //r2 &= 0xFF00FF; /* 0,g,0,e */
- /* It is faster to read 1 byte at a time, */
- r2 = *(p_ref += 4);
- r3 = p_ref[2];
- r2 |= (r3 << 16); /* 0,g,0,e */
- r3 = p_ref[1];
- r4 = p_ref[3];
- r3 |= (r4 << 16); /* 0,h,0,f */
-
- r4 = r0 + r3; /* c+h, a+f */
- r5 = r0 + r1; /* c+d, a+b */
- r6 = r2 + r3; /* g+h, e+f */
- r5 >>= 16;
- r5 |= (r6 << 16); /* e+f, c+d */
- r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */
- r4 += 0x100010; /* +16, +16 */
- r5 = r1 + r2; /* d+g, b+e */
- r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
- r4 >>= 5;
- r13 |= r4; /* check clipping */
- r4 &= 0xFF00FF; /* mask */
-
- r5 = p_ref[4]; /* i */
- r6 = (r5 << 16);
- r5 = r6 | (r2 >> 16);/* 0,i,0,g */
- r5 += r1; /* d+i, b+g */ /* r5 not free */
- r1 >>= 16;
- r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
- r1 += r2; /* f+g, d+e */
- r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */
- r0 >>= 16;
- r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
- r0 += r3; /* e+h, c+f */
- r5 += 0x100010; /* 16,16 */
- r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
- r5 >>= 5;
- r13 |= r5; /* check clipping */
- r5 &= 0xFF00FF; /* mask */
-
- r4 |= (r5 << 8); /* pack them together */
- *p_tmp++ = r4;
- r1 = r3;
- r0 = r2;
- }
- p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
- p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
-
- if (r13&0xFF000700) /* need clipping */
- {
- /* move back to the beginning of the line */
- p_ref -= (ref_offset + blkwidth); /* input */
- p_tmp -= 6; /* intermediate output */
- tmp = (uint32)(p_ref + blkwidth);
- while ((uint32)p_ref < tmp)
- {
- r0 = *p_ref++;
- r1 = *p_ref++;
- r2 = *p_ref++;
- r3 = *p_ref++;
- r4 = *p_ref++;
- /* first pixel */
- r5 = *p_ref++;
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres = result;
- /* second pixel */
- r0 = *p_ref++;
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 8);
- /* third pixel */
- r1 = *p_ref++;
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 16);
- /* fourth pixel */
- r2 = *p_ref++;
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- pkres |= (result << 24);
-
- *p_tmp++ = pkres; /* write 4 pixel */
- p_ref -= 5;
- }
- p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
- p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */
- }
- }
-
- /* perform vertical interpolation */
- /* not word-aligned */
- if (((uint32)in2)&0x3)
- {
- CreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
- in2 = &tmp_in[2][0];
- inpitch = 24;
- }
-
- p_cur = out;
- curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */
- pkres = blkheight * inpitch; /* reuse it for limit */
-
- curr_offset += 3;
-
- for (j = 0; j < blkwidth; j += 4, in2 += 4)
- {
- r13 = 0;
- p_ref = in2;
- p_tmp8 = &(tmp_res[0][j]); /* intermediate result */
- p_tmp8 -= 24; /* compensate for the first offset */
- p_cur -= outpitch; /* compensate for the first offset */
- tmp = (uint32)(p_ref + pkres); /* limit */
- while ((uint32)p_ref < tmp) /* the loop un-rolled */
- {
- /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign, */
- /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2];
- r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3];
- r6 |= (r1<<16); p_ref+=inpitch; */
- r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
- p_ref += inpitch;
- r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
- r0 &= 0xFF00FF;
-
- /*p_ref8 = p_ref+(inpitch<<1);
- r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16);
- r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/
- r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
-
- r0 += r1;
- r6 += r7;
-
- /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16);
- r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/
- r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2];
- r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1];
- r2 = p_ref8[3]; r7 |= (r2<<16);*/
- r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 += 20 * r1;
- r6 += 20 * r7;
- r0 += 0x100010;
- r6 += 0x100010;
-
- /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2];
- r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/
- r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
- r8 = (r2 >> 8) & 0xFF00FF;
- r2 &= 0xFF00FF;
-
- /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2];
- r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1];
- r2 = p_ref8[3]; r7 |= (r2<<16);*/
- r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
- r7 = (r1 >> 8) & 0xFF00FF;
- r1 &= 0xFF00FF;
- r1 += r2;
-
- r7 += r8;
-
- r0 -= 5 * r1;
- r6 -= 5 * r7;
-
- r0 >>= 5;
- r6 >>= 5;
- /* clip */
- r13 |= r6;
- r13 |= r0;
- //CLIPPACK(r6,result)
- /* add with horizontal results */
- r10 = *((uint32*)(p_tmp8 += 24));
- r9 = (r10 >> 8) & 0xFF00FF;
- r10 &= 0xFF00FF;
-
- r0 += r10;
- r0 += 0x10001;
- r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */
-
- r6 += r9;
- r6 += 0x10001;
- r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */
-
- r0 |= (r6 << 8); /* pack it back */
- *((uint32*)(p_cur += outpitch)) = r0;
- }
- p_cur += curr_offset; /* offset to the next pixel */
- if (r13 & 0xFF000700) /* this column need clipping */
- {
- p_cur -= 4;
- for (i = 0; i < 4; i++)
- {
- p_ref = in2 + i;
- p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */
- p_tmp8 -= 24; /* compensate for the first offset */
- p_cur -= outpitch; /* compensate for the first offset */
- tmp = (uint32)(p_ref + pkres); /* limit */
- while ((uint32)p_ref < tmp) /* the loop un-rolled */
- {
- r0 = *(p_ref - (inpitch << 1));
- r1 = *(p_ref - inpitch);
- r2 = *p_ref;
- r3 = *(p_ref += inpitch); /* modify pointer before loading */
- r4 = *(p_ref += inpitch);
- /* first pixel */
- r5 = *(p_ref += inpitch);
- result = (r0 + r5);
- r0 = (r1 + r4);
- result -= (r0 * 5);//result -= r0; result -= (r0<<2);
- r0 = (r2 + r3);
- result += (r0 * 20);//result += (r0<<4); result += (r0<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */
- result = (result + tmp_result + 1); /* no clip */
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* second pixel */
- r0 = *(p_ref += inpitch);
- result = (r1 + r0);
- r1 = (r2 + r5);
- result -= (r1 * 5);//result -= r1; result -= (r1<<2);
- r1 = (r3 + r4);
- result += (r1 * 20);//result += (r1<<4); result += (r1<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- tmp_result = *(p_tmp8 += 24); /* intermediate result */
- result = (result + tmp_result + 1); /* no clip */
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* third pixel */
- r1 = *(p_ref += inpitch);
- result = (r2 + r1);
- r2 = (r3 + r0);
- result -= (r2 * 5);//result -= r2; result -= (r2<<2);
- r2 = (r4 + r5);
- result += (r2 * 20);//result += (r2<<4); result += (r2<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- tmp_result = *(p_tmp8 += 24); /* intermediate result */
- result = (result + tmp_result + 1); /* no clip */
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- /* fourth pixel */
- r2 = *(p_ref += inpitch);
- result = (r3 + r2);
- r3 = (r4 + r1);
- result -= (r3 * 5);//result -= r3; result -= (r3<<2);
- r3 = (r5 + r0);
- result += (r3 * 20);//result += (r3<<4); result += (r3<<2);
- result = (result + 16) >> 5;
- CLIP_RESULT(result)
- tmp_result = *(p_tmp8 += 24); /* intermediate result */
- result = (result + tmp_result + 1); /* no clip */
- result = (result >> 1);
- *(p_cur += outpitch) = result;
- p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */
- }
- p_cur += (curr_offset - 3);
- }
- }
- }
-
- return ;
-}
-
-/* position G */
-void FullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch,
- int blkwidth, int blkheight)
-{
- int i, j;
- int offset_in = inpitch - blkwidth;
- int offset_out = outpitch - blkwidth;
- uint32 temp;
- uint8 byte;
-
- if (((uint32)in)&3)
- {
- for (j = blkheight; j > 0; j--)
- {
- for (i = blkwidth; i > 0; i -= 4)
- {
- temp = *in++;
- byte = *in++;
- temp |= (byte << 8);
- byte = *in++;
- temp |= (byte << 16);
- byte = *in++;
- temp |= (byte << 24);
-
- *((uint32*)out) = temp; /* write 4 bytes */
- out += 4;
- }
- out += offset_out;
- in += offset_in;
- }
- }
- else
- {
- for (j = blkheight; j > 0; j--)
- {
- for (i = blkwidth; i > 0; i -= 4)
- {
- temp = *((uint32*)in);
- *((uint32*)out) = temp;
- in += 4;
- out += 4;
- }
- out += offset_out;
- in += offset_in;
- }
- }
- return ;
-}
-
-void ChromaMotionComp(uint8 *ref, int picwidth, int picheight,
- int x_pos, int y_pos,
- uint8 *pred, int pred_pitch,
- int blkwidth, int blkheight)
-{
- int dx, dy;
- int offset_dx, offset_dy;
- int index;
- uint8 temp[24][24];
-
- dx = x_pos & 7;
- dy = y_pos & 7;
- offset_dx = (dx + 7) >> 3;
- offset_dy = (dy + 7) >> 3;
- x_pos = x_pos >> 3; /* round it to full-pel resolution */
- y_pos = y_pos >> 3;
-
- if ((x_pos >= 0 && x_pos + blkwidth + offset_dx <= picwidth) && (y_pos >= 0 && y_pos + blkheight + offset_dy <= picheight))
- {
- ref += y_pos * picwidth + x_pos;
- }
- else
- {
- CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth + offset_dx, blkheight + offset_dy);
- ref = &temp[0][0];
- picwidth = 24;
- }
-
- index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7);
-
- (*(ChromaMC_SIMD[index]))(ref, picwidth , dx, dy, pred, pred_pitch, blkwidth, blkheight);
- return ;
-}
-
-
-/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */
-void ChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- int32 r0, r1, r2, r3, result0, result1;
- uint8 temp[288];
- uint8 *ref, *out;
- int i, j;
- int dx_8 = 8 - dx;
- int dy_8 = 8 - dy;
-
- /* horizontal first */
- out = temp;
- for (i = 0; i < blkheight + 1; i++)
- {
- ref = pRef;
- r0 = ref[0];
- for (j = 0; j < blkwidth; j += 4)
- {
- r0 |= (ref[2] << 16);
- result0 = dx_8 * r0;
-
- r1 = ref[1] | (ref[3] << 16);
- result0 += dx * r1;
- *(int32 *)out = result0;
-
- result0 = dx_8 * r1;
-
- r2 = ref[4];
- r0 = r0 >> 16;
- r1 = r0 | (r2 << 16);
- result0 += dx * r1;
- *(int32 *)(out + 16) = result0;
-
- ref += 4;
- out += 4;
- r0 = r2;
- }
- pRef += srcPitch;
- out += (32 - blkwidth);
- }
-
-// pRef -= srcPitch*(blkheight+1);
- ref = temp;
-
- for (j = 0; j < blkwidth; j += 4)
- {
- r0 = *(int32 *)ref;
- r1 = *(int32 *)(ref + 16);
- ref += 32;
- out = pOut;
- for (i = 0; i < (blkheight >> 1); i++)
- {
- result0 = dy_8 * r0 + 0x00200020;
- r2 = *(int32 *)ref;
- result0 += dy * r2;
- result0 >>= 6;
- result0 &= 0x00FF00FF;
- r0 = r2;
-
- result1 = dy_8 * r1 + 0x00200020;
- r3 = *(int32 *)(ref + 16);
- result1 += dy * r3;
- result1 >>= 6;
- result1 &= 0x00FF00FF;
- r1 = r3;
- *(int32 *)out = result0 | (result1 << 8);
- out += predPitch;
- ref += 32;
-
- result0 = dy_8 * r0 + 0x00200020;
- r2 = *(int32 *)ref;
- result0 += dy * r2;
- result0 >>= 6;
- result0 &= 0x00FF00FF;
- r0 = r2;
-
- result1 = dy_8 * r1 + 0x00200020;
- r3 = *(int32 *)(ref + 16);
- result1 += dy * r3;
- result1 >>= 6;
- result1 &= 0x00FF00FF;
- r1 = r3;
- *(int32 *)out = result0 | (result1 << 8);
- out += predPitch;
- ref += 32;
- }
- pOut += 4;
- ref = temp + 4; /* since it can only iterate twice max */
- }
- return;
-}
-
-void ChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(dy);
- int32 r0, r1, r2, result0, result1;
- uint8 *ref, *out;
- int i, j;
- int dx_8 = 8 - dx;
-
- /* horizontal first */
- for (i = 0; i < blkheight; i++)
- {
- ref = pRef;
- out = pOut;
-
- r0 = ref[0];
- for (j = 0; j < blkwidth; j += 4)
- {
- r0 |= (ref[2] << 16);
- result0 = dx_8 * r0 + 0x00040004;
-
- r1 = ref[1] | (ref[3] << 16);
- result0 += dx * r1;
- result0 >>= 3;
- result0 &= 0x00FF00FF;
-
- result1 = dx_8 * r1 + 0x00040004;
-
- r2 = ref[4];
- r0 = r0 >> 16;
- r1 = r0 | (r2 << 16);
- result1 += dx * r1;
- result1 >>= 3;
- result1 &= 0x00FF00FF;
-
- *(int32 *)out = result0 | (result1 << 8);
-
- ref += 4;
- out += 4;
- r0 = r2;
- }
-
- pRef += srcPitch;
- pOut += predPitch;
- }
- return;
-}
-
-void ChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(dx);
- int32 r0, r1, r2, r3, result0, result1;
- int i, j;
- uint8 *ref, *out;
- int dy_8 = 8 - dy;
- /* vertical first */
- for (i = 0; i < blkwidth; i += 4)
- {
- ref = pRef;
- out = pOut;
-
- r0 = ref[0] | (ref[2] << 16);
- r1 = ref[1] | (ref[3] << 16);
- ref += srcPitch;
- for (j = 0; j < blkheight; j++)
- {
- result0 = dy_8 * r0 + 0x00040004;
- r2 = ref[0] | (ref[2] << 16);
- result0 += dy * r2;
- result0 >>= 3;
- result0 &= 0x00FF00FF;
- r0 = r2;
-
- result1 = dy_8 * r1 + 0x00040004;
- r3 = ref[1] | (ref[3] << 16);
- result1 += dy * r3;
- result1 >>= 3;
- result1 &= 0x00FF00FF;
- r1 = r3;
- *(int32 *)out = result0 | (result1 << 8);
- ref += srcPitch;
- out += predPitch;
- }
- pOut += 4;
- pRef += 4;
- }
- return;
-}
-
-void ChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(blkwidth);
- int32 r0, r1, temp0, temp1, result;
- int32 temp[9];
- int32 *out;
- int i, r_temp;
- int dy_8 = 8 - dy;
-
- /* horizontal first */
- out = temp;
- for (i = 0; i < blkheight + 1; i++)
- {
- r_temp = pRef[1];
- temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]);
- temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp);
- r0 = temp0 | (temp1 << 16);
- *out++ = r0;
- pRef += srcPitch;
- }
-
- pRef -= srcPitch * (blkheight + 1);
-
- out = temp;
-
- r0 = *out++;
-
- for (i = 0; i < blkheight; i++)
- {
- result = dy_8 * r0 + 0x00200020;
- r1 = *out++;
- result += dy * r1;
- result >>= 6;
- result &= 0x00FF00FF;
- *(int16 *)pOut = (result >> 8) | (result & 0xFF);
- r0 = r1;
- pOut += predPitch;
- }
- return;
-}
-
-void ChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(dy);
- OSCL_UNUSED_ARG(blkwidth);
- int i, temp, temp0, temp1;
-
- /* horizontal first */
- for (i = 0; i < blkheight; i++)
- {
- temp = pRef[1];
- temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3;
- temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3;
-
- *(int16 *)pOut = temp0 | (temp1 << 8);
- pRef += srcPitch;
- pOut += predPitch;
-
- }
- return;
-}
-void ChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(dx);
- OSCL_UNUSED_ARG(blkwidth);
- int32 r0, r1, result;
- int i;
- int dy_8 = 8 - dy;
- r0 = pRef[0] | (pRef[1] << 16);
- pRef += srcPitch;
- for (i = 0; i < blkheight; i++)
- {
- result = dy_8 * r0 + 0x00040004;
- r1 = pRef[0] | (pRef[1] << 16);
- result += dy * r1;
- result >>= 3;
- result &= 0x00FF00FF;
- *(int16 *)pOut = (result >> 8) | (result & 0xFF);
- r0 = r1;
- pRef += srcPitch;
- pOut += predPitch;
- }
- return;
-}
-
-void ChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
- uint8 *pOut, int predPitch, int blkwidth, int blkheight)
-{
- OSCL_UNUSED_ARG(dx);
- OSCL_UNUSED_ARG(dy);
- int i, j;
- int offset_in = srcPitch - blkwidth;
- int offset_out = predPitch - blkwidth;
- uint16 temp;
- uint8 byte;
-
- if (((uint32)pRef)&1)
- {
- for (j = blkheight; j > 0; j--)
- {
- for (i = blkwidth; i > 0; i -= 2)
- {
- temp = *pRef++;
- byte = *pRef++;
- temp |= (byte << 8);
- *((uint16*)pOut) = temp; /* write 2 bytes */
- pOut += 2;
- }
- pOut += offset_out;
- pRef += offset_in;
- }
- }
- else
- {
- for (j = blkheight; j > 0; j--)
- {
- for (i = blkwidth; i > 0; i -= 2)
- {
- temp = *((uint16*)pRef);
- *((uint16*)pOut) = temp;
- pRef += 2;
- pOut += 2;
- }
- pOut += offset_out;
- pRef += offset_in;
- }
- }
- return ;
-}