/* ------------------------------------------------------------------ * Copyright (C) 1998-2009 PacketVideo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. * See the License for the specific language governing permissions * and limitations under the License. * ------------------------------------------------------------------- */ #include "mp4enc_lib.h" #include "mp4lib_int.h" #include "dct_inline.h" #define FDCT_SHIFT 10 #ifdef __cplusplus extern "C" { #endif /**************************************************************************/ /* Function: BlockDCT_AANwSub Date: 7/31/01 Input: Output: out[64] ==> next block Purpose: Do subtraction for zero MV first Modified: **************************************************************************/ Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x0188053A; Int abs_sum; Int mask; Int tmp, tmp2; Int ColTh; dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { /* assuming the block is word-aligned */ mask = 0x1FE; tmp = *((Int*) cur); /* contains 4 pixels */ tmp2 = *((Int*) pred); /* prediction 4 pixels */ k0 = tmp2 & 0xFF; k1 = mask & (tmp << 1); k0 = k1 - (k0 << 1); k1 = (tmp2 >> 8) & 0xFF; k2 = mask & (tmp >> 7); k1 = k2 - (k1 << 1); k2 = (tmp2 >> 16) & 0xFF; k3 = mask & (tmp >> 15); k2 = k3 - (k2 << 1); k3 = (tmp2 >> 24) & 0xFF; k4 = mask & (tmp >> 23); k3 = k4 - (k3 << 1); tmp = *((Int*)(cur + 4)); /* another 4 pixels */ tmp2 = *((Int*)(pred + 4)); k4 = tmp2 & 0xFF; k5 = mask & (tmp << 1); k4 = k5 - (k4 << 1); k5 = (tmp2 >> 8) & 0xFF; k6 = mask & (tmp >> 7); k5 = k6 - (k5 << 1); k6 = (tmp2 >> 16) & 0xFF; k7 = mask & (tmp >> 15); k6 = k7 - (k6 << 1); k7 = (tmp2 >> 24) & 0xFF; tmp = mask & (tmp >> 23); k7 = tmp - (k7 << 1); cur += width; pred += 16; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; k1 = k0 - (k1 << 1); /**********/ dst[0] = k0; dst[4] = k1; /* col. 4 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; k3 = (k3 << 1) - k2; /********/ dst[2] = k2; /* col. 2 */ k3 <<= 1; /* scale up col. 6 */ dst[6] = k3; /* col. 6 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k4 = k4 + k7; k7 = (k7 << 1) - k4; k5 = k5 + k6; k4 <<= 1; /* scale up col.5 */ k6 = k5 - (k6 << 1); /********/ dst[5] = k4; /* col. 5 */ k6 <<= 2; /* scale up col. 7 */ dst[1] = k5; /* col. 1 */ dst[7] = k6; /* col. 7 */ dst[3] = k7; /* col. 3 */ dst += 8; } while (dst < out); out -= 64; dst = out + 8; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; /* deadzone thresholding for column */ abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; k1 = k0 - (k1 << 1); /**********/ out[32] = k1; /* row 4 */ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; k3 = (k3 << 1) - k2; k3 <<= 1; /* scale up col. 6 */ /********/ out[48] = k3; /* row 6 */ out[16] = k2; /* row 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k4 = k4 + k7; k7 = (k7 << 1) - k4; k5 = k5 + k6; k4 <<= 1; /* scale up col. 5 */ k6 = k5 - (k6 << 1); /********/ out[24] = k7 ; /* row 3 */ k6 <<= 2; /* scale up col. 7 */ out[56] = k6 ; /* row 7 */ out[8] = k5 ; /* row 1 */ out[40] = k4 ; /* row 5 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: Block4x4DCT_AANwSub Date: 7/31/01 Input: Output: out[64] ==> next block Purpose: Do subtraction for zero MV first before 4x4 DCT Modified: **************************************************************************/ Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x0188053A; Int mask; Int tmp, tmp2; Int abs_sum; Int ColTh; dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { /* assuming the block is word-aligned */ mask = 0x1FE; tmp = *((Int*) cur); /* contains 4 pixels */ tmp2 = *((Int*) pred); /* prediction 4 pixels */ k0 = tmp2 & 0xFF; k1 = mask & (tmp << 1); k0 = k1 - (k0 << 1); k1 = (tmp2 >> 8) & 0xFF; k2 = mask & (tmp >> 7); k1 = k2 - (k1 << 1); k2 = (tmp2 >> 16) & 0xFF; k3 = mask & (tmp >> 15); k2 = k3 - (k2 << 1); k3 = (tmp2 >> 24) & 0xFF; k4 = mask & (tmp >> 23); k3 = k4 - (k3 << 1); tmp = *((Int*)(cur + 4)); /* another 4 pixels */ tmp2 = *((Int*)(pred + 4)); k4 = tmp2 & 0xFF; k5 = mask & (tmp << 1); k4 = k5 - (k4 << 1); k5 = (tmp2 >> 8) & 0xFF; k6 = mask & (tmp >> 7); k5 = k6 - (k5 << 1); k6 = (tmp2 >> 16) & 0xFF; k7 = mask & (tmp >> 15); k6 = k7 - (k6 << 1); k7 = (tmp2 >> 24) & 0xFF; tmp = mask & (tmp >> 23); k7 = tmp - (k7 << 1); cur += width; pred += 16; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ dst[0] = k0; /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; /********/ dst[2] = k2; /* col. 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k7 = k7 - k4; k5 = k5 + k6; /********/ dst[1] = k5; /* col. 1 */ dst[3] = k7; /* col. 3 */ dst += 8; } while (dst < out); out -= 64; dst = out + 4; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; /********/ out[16] = k2; /* row 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k7 = k7 - k4 ; k5 = k5 + k6; /********/ out[24] = k7 ; /* row 3 */ out[8] = k5 ; /* row 1 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: Block2x2DCT_AANwSub Date: 7/31/01 Input: Output: out[64] ==> next block Purpose: Do subtraction for zero MV first before 2x2 DCT Modified: **************************************************************************/ Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x018803B2; Int mask; Int tmp, tmp2; Int abs_sum; Int ColTh; dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { /* assuming the block is word-aligned */ mask = 0x1FE; tmp = *((Int*) cur); /* contains 4 pixels */ tmp2 = *((Int*) pred); /* prediction 4 pixels */ k0 = tmp2 & 0xFF; k1 = mask & (tmp << 1); k0 = k1 - (k0 << 1); k1 = (tmp2 >> 8) & 0xFF; k2 = mask & (tmp >> 7); k1 = k2 - (k1 << 1); k2 = (tmp2 >> 16) & 0xFF; k3 = mask & (tmp >> 15); k2 = k3 - (k2 << 1); k3 = (tmp2 >> 24) & 0xFF; k4 = mask & (tmp >> 23); k3 = k4 - (k3 << 1); tmp = *((Int*)(cur + 4)); /* another 4 pixels */ tmp2 = *((Int*)(pred + 4)); k4 = tmp2 & 0xFF; k5 = mask & (tmp << 1); k4 = k5 - (k4 << 1); k5 = (tmp2 >> 8) & 0xFF; k6 = mask & (tmp >> 7); k5 = k6 - (k5 << 1); k6 = (tmp2 >> 16) & 0xFF; k7 = mask & (tmp >> 15); k6 = k7 - (k6 << 1); k7 = (tmp2 >> 24) & 0xFF; tmp = mask & (tmp >> 23); k7 = tmp - (k7 << 1); cur += width; pred += 16; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ dst[0] = k0; /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k5 = k1 >> FDCT_SHIFT; /*****************/ /********/ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k1 = mla392(k4, k14, round); k1 = mla946(k6, k14, k1); k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k5 = k5 + k6; /********/ dst[1] = k5; dst += 8; } while (dst < out); out -= 64; dst = out + 2; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k5 = k1 >> FDCT_SHIFT; /*****************/ /********/ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k1 = mla392(k4, k14, round); k1 = mla946(k6, k14, k1); k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k5 = k5 + k6; /********/ out[8] = k5 ; /* row 1 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: BlockDCT_AANIntra Date: 8/9/01 Input: rec Output: out[64] ==> next block Purpose: Input directly from rec frame. Modified: **************************************************************************/ Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x0188053A; Int abs_sum; Int mask; Int *curInt, tmp; Int ColTh; OSCL_UNUSED_ARG(dummy2); dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { mask = 0x1FE; curInt = (Int*) cur; tmp = curInt[0]; /* contains 4 pixels */ k0 = mask & (tmp << 1); k1 = mask & (tmp >> 7); k2 = mask & (tmp >> 15); k3 = mask & (tmp >> 23); tmp = curInt[1]; /* another 4 pixels */ k4 = mask & (tmp << 1); k5 = mask & (tmp >> 7); k6 = mask & (tmp >> 15); k7 = mask & (tmp >> 23); cur += width; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; k1 = k0 - (k1 << 1); /**********/ dst[0] = k0; dst[4] = k1; /* col. 4 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; k3 = (k3 << 1) - k2; /********/ dst[2] = k2; /* col. 2 */ k3 <<= 1; /* scale up col. 6 */ dst[6] = k3; /* col. 6 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k4 = k4 + k7; k7 = (k7 << 1) - k4; k5 = k5 + k6; k4 <<= 1; /* scale up col.5 */ k6 = k5 - (k6 << 1); /********/ dst[5] = k4; /* col. 5 */ k6 <<= 2; /* scale up col. 7 */ dst[1] = k5; /* col. 1 */ dst[7] = k6; /* col. 7 */ dst[3] = k7; /* col. 3 */ dst += 8; } while (dst < out); out -= 64; dst = out + 8; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; /* deadzone thresholding for column */ abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; k1 = k0 - (k1 << 1); /**********/ out[32] = k1; /* row 4 */ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; k3 = (k3 << 1) - k2; k3 <<= 1; /* scale up col. 6 */ /********/ out[48] = k3; /* row 6 */ out[16] = k2; /* row 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k4 = k4 + k7; k7 = (k7 << 1) - k4; k5 = k5 + k6; k4 <<= 1; /* scale up col. 5 */ k6 = k5 - (k6 << 1); /********/ out[24] = k7 ; /* row 3 */ k6 <<= 2; /* scale up col. 7 */ out[56] = k6 ; /* row 7 */ out[8] = k5 ; /* row 1 */ out[40] = k4 ; /* row 5 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: Block4x4DCT_AANIntra Date: 8/9/01 Input: prev Output: out[64] ==> next block Purpose: Input directly from prev frame. output 2x2 DCT Modified: **************************************************************************/ Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x0188053A; Int mask; Int *curInt, tmp; Int abs_sum; Int ColTh; OSCL_UNUSED_ARG(dummy2); dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { mask = 0x1FE; curInt = (Int*) cur; tmp = curInt[0]; /* contains 4 pixels */ k0 = mask & (tmp << 1); k1 = mask & (tmp >> 7); k2 = mask & (tmp >> 15); k3 = mask & (tmp >> 23); tmp = curInt[1]; /* another 4 pixels */ k4 = mask & (tmp << 1); k5 = mask & (tmp >> 7); k6 = mask & (tmp >> 15); k7 = mask & (tmp >> 23); cur += width; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ dst[0] = k0; /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; /********/ dst[2] = k2; /* col. 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k7 = k7 - k4; k5 = k5 + k6; /********/ dst[1] = k5; /* col. 1 */ dst[3] = k7; /* col. 3 */ dst += 8; } while (dst < out); out -= 64; dst = out + 4; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; k2 = k2 + k3; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k0 = mla724(k12, k2, round); k5 = k1 >> FDCT_SHIFT; k2 = k0 >> FDCT_SHIFT; /*****************/ k2 = k2 + k3; /********/ out[16] = k2; /* row 2 */ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k0 = k4 - k6; k1 = mla392(k0, k14, round); k0 = mla554(k4, k12, k1); k1 = mla1338(k6, k14, k1); k4 = k0 >> FDCT_SHIFT; k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k7 = (k7 << 1) - k5; k7 = k7 - k4 ; k5 = k5 + k6; /********/ out[24] = k7 ; /* row 3 */ out[8] = k5 ; /* row 1 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: Block2x2DCT_AANIntra Date: 8/9/01 Input: prev Output: out[64] ==> next block Purpose: Input directly from prev frame. output 2x2 DCT Modified: **************************************************************************/ Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) { Short *dst; Int k0, k1, k2, k3, k4, k5, k6, k7; Int round; Int k12 = 0x022A02D4; Int k14 = 0x018803B2; Int mask; Int *curInt, tmp; Int abs_sum; Int ColTh; OSCL_UNUSED_ARG(dummy2); dst = out + 64 ; ColTh = *dst; out += 128; round = 1 << (FDCT_SHIFT - 1); do /* fdct_nextrow */ { mask = 0x1FE; curInt = (Int*) cur; tmp = curInt[0]; /* contains 4 pixels */ k0 = mask & (tmp << 1); k1 = mask & (tmp >> 7); k2 = mask & (tmp >> 15); k3 = mask & (tmp >> 23); tmp = curInt[1]; /* another 4 pixels */ k4 = mask & (tmp << 1); k5 = mask & (tmp >> 7); k6 = mask & (tmp >> 15); k7 = mask & (tmp >> 23); cur += width; /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ dst[0] = k0; /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k5 = k1 >> FDCT_SHIFT; /*****************/ /********/ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k1 = mla392(k4, k14, round); k1 = mla946(k6, k14, k1); k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k5 = k5 + k6; /********/ dst[1] = k5; dst += 8; } while (dst < out); out -= 64; dst = out + 2; /* Vertical Block Loop */ do /* Vertical 8xDCT loop */ { k0 = out[0]; k1 = out[8]; k2 = out[16]; k3 = out[24]; k4 = out[32]; k5 = out[40]; k6 = out[48]; k7 = out[56]; abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); if (abs_sum < ColTh) { out[0] = 0x7fff; out++; continue; } /* fdct_1 */ k0 = k0 + k7; k7 = k0 - (k7 << 1); k1 = k1 + k6; k6 = k1 - (k6 << 1); k2 = k2 + k5; k5 = k2 - (k5 << 1); k3 = k3 + k4; k4 = k3 - (k4 << 1); k0 = k0 + k3; k3 = k0 - (k3 << 1); k1 = k1 + k2; k2 = k1 - (k2 << 1); k0 = k0 + k1; /**********/ out[0] = k0; /* row 0 */ /* fdct_2 */ k4 = k4 + k5; k5 = k5 + k6; k6 = k6 + k7; /* MUL2C k2,k5,724,FDCT_SHIFT */ /* k0, k1 become scratch */ /* assume FAST MULTIPLY */ k1 = mla724(k12, k5, round); k5 = k1 >> FDCT_SHIFT; /*****************/ /********/ /* fdct_3 */ /* ROTATE k4,k6,392,946, FDCT_SHIFT */ /* assume FAST MULTIPLY */ /* k0, k1 are output */ k1 = mla392(k4, k14, round); k1 = mla946(k6, k14, k1); k6 = k1 >> FDCT_SHIFT; /***********************/ k5 = k5 + k7; k5 = k5 + k6; /********/ out[8] = k5 ; /* row 1 */ out++; } while ((uintptr_t)out < (uintptr_t)dst) ; return ; } /**************************************************************************/ /* Function: Block1x1DCTwSub Date: 8/9/01 Input: block Output: y Purpose: Compute DC value only Modified: **************************************************************************/ void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width) { UChar *end; Int temp = 0; Int offset2; offset2 = width - 8; end = pred + (16 << 3); do { temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); temp += (*cur++ - *pred++); cur += offset2; pred += 8; } while (pred < end) ; out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; out[0] = temp >> 3; return ; } /**************************************************************************/ /* Function: Block1x1DCTIntra Date: 8/9/01 Input: prev Output: out Purpose: Compute DC value only Modified: **************************************************************************/ void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width) { UChar *end; Int temp = 0; ULong word; OSCL_UNUSED_ARG(dummy2); end = cur + (width << 3); do { word = *((ULong*)cur); temp += (word >> 24); temp += ((word >> 16) & 0xFF); temp += ((word >> 8) & 0xFF); temp += (word & 0xFF); word = *((ULong*)(cur + 4)); temp += (word >> 24); temp += ((word >> 16) & 0xFF); temp += ((word >> 8) & 0xFF); temp += (word & 0xFF); cur += width; } while (cur < end) ; out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; out[0] = temp >> 3; return ; } #ifdef __cplusplus } #endif