diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src')
18 files changed, 3682 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s new file mode 100644 index 0000000..7801e57 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s @@ -0,0 +1,75 @@ +; /** +; * +; * File Name: armVCM4P2_Clip8_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for Clipping 16 bit value to [0,255] Range +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments + +pSrc RN 0 +pDst RN 1 +step RN 2 + +;// Local variables + +x0 RN 3 +x1 RN 4 +x2 RN 5 +x3 RN 6 + +Count RN 14 + + + M_START armVCM4P2_Clip8,r6 + + + MOV Count,#8 +loop + + LDMIA pSrc!,{x0, x1} + SUBS Count,Count, #1 ;// count down + LDMIA pSrc!,{x2, x3} + USAT16 x0, #8, x0 ;// clip two samples to [0,255] + USAT16 x1, #8, x1 ;// clip two samples to [0,255] + STRB x0, [pDst] + MOV x0, x0, LSR #16 + STRB x0, [pDst,#1] + STRB x1, [pDst,#2] + MOV x1, x1, LSR #16 + STRB x1, [pDst,#3] + + USAT16 x2, #8, x2 ;// clip two samples to [0,255] + USAT16 x3, #8, x3 ;// clip two samples to [0,255] + STRB x2, [pDst,#4] + MOV x2, x2, LSR #16 + STRB x2, [pDst,#5] + STRB x3, [pDst,#6] + MOV x3, x3, LSR #16 + STRB x3, [pDst,#7] + ADD pDst,pDst,step ;// Increment pDst by step value + + BGT loop ;// Continue loop until Count reaches 64 + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s new file mode 100644 index 0000000..9e30900 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s @@ -0,0 +1,398 @@ +;/** +; * +; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter, intra block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan +; * +; * +; * +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 + +ftype RN 0 +temp3 RN 4 +temp RN 5 +Count RN 6 +Escape RN 5 + +;// armVCM4P2_FillVLDBuffer +zigzag RN 0 +storeLevel RN 1 +temp2 RN 4 +temp1 RN 5 +sign RN 5 +Last RN 7 +storeRun RN 14 + + +packRetIndex RN 5 + + +markerbit RN 5 + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppOffset,4 + M_ALLOC4 pLinkRegister,4 + + M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe + + ;// get the table addresses from stack + M_ARG ppVlcTableL0L1,4 + M_ARG ppLMAXTableL0L1,4 + M_ARG ppRMAXTableL0L1,4 + M_ARG ppZigzagTable,4 + + ;// Store ALL zeros at pDst + + MOV temp1,#0 ;// Initialize Count to zero + MOV Last,#0 + M_STR LR,pLinkRegister ;// Store Link Register on Stack + MOV temp2,#0 + MOV LR,#0 + + ;// Initialize the Macro and Store all zeros to pDst + + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT1 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT2 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack + STM pDst!,{temp2,temp1,Last,LR} + M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack + STM pDst!,{temp2,temp1,Last,LR} + + STM pDst!,{temp2,temp1,Last,LR} + STM pDst!,{temp2,temp1,Last,LR} + + + SUB pDst,pDst,#128 ;// Restore pDst + + ;// The armVCM4P2_GetVLCBits begins + +getVLCbits + + M_BD_LOOK8 Escape,7 ;// Load Escape Value + LSR Escape,Escape,#25 + CMP Escape,#3 ;// check for escape mode + MOVNE ftype,#0 + BNE notEscapemode ;// Branch if not in Escape mode 3 + + M_BD_VSKIP8 #7,T1 + CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode + BEQ endFillVLD + + ;// Escape Mode 4 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + M_BD_READ8 storeLevel,8,T1 + + + ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so + + TEQ storeLevel,#0 + TEQNE storeLevel,#128 + BEQ ExitError + + ADD temp2,storeRun,Count + CMP temp2,#64 + BGE ExitError ;// error if Count+storeRun >= 64 + + + ;// Load address of zigzagTable + + M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table + + + ;// armVCM4P2_FillVLDBuffer + + SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits + + + ;// To Reflect Runlength + + ADD Count,Count,storeRun + LDRB zigzag,[pZigzagTable,Count] + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] ;// store Level + + B ExitOk + + + +endFillVLD + + + ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream + + M_BD_READ8 temp1,1,T1 + CMP temp1,#0 + MOVEQ ftype,#1 + BEQ notEscapemode + M_BD_READ8 temp1,1,T1 + CMP temp1,#1 + MOVEQ ftype,#3 + MOVNE ftype,#2 + + +notEscapemode + + ;// Load optimized packed VLC table with last=0 and Last=1 + + M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table + + + CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3) + BGE EscapeMode3 ;// Else continue normal VLC Decoding + + ;// Variable lengh decoding, "armUnPackVLC32" + + + M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2 + + + LDR temp3,=0xFFF + + CMP packRetIndex,temp3 ;// Check for invalid symbol + BEQ ExitError ;// if invalid symbol occurs exit with an error message + + AND Last,packRetIndex,#2 ;// Get Last from packed Index + + + + + LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index + AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 + + + M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table + + + LSR storeLevel,storeLevel,#2 ;// Level value + + CMP ftype,#1 + BNE ftype2 + + ;// ftype==1; Escape mode =1 + + + ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address + LDRB temp1,[temp1,storeRun] + + + ADD storeLevel,temp1,storeLevel + +ftype2 + + ;// ftype =2; Escape mode =2 + + M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table + + CMP ftype,#2 + BNE FillVLDL1 + + ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address + SUB temp2,storeLevel,#1 + LDRB temp1,[temp1,temp2] + + + ADD storeRun,storeRun,#1 + ADD storeRun,temp1 + +FillVLDL1 + + + ;// armVCM4P2_FillVLDBuffer + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 sign,1,T1 + + CMP sign,#1 + RSBEQ storeLevel,storeLevel,#0 + + ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp1,#64 + BGE ExitError + + + + + + + ;// To Reflect Runlenght + + ADD Count,Count,storeRun + +storeLevelL1 + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#2 ;// Check if the Level val is Last non zero val + ADD Count,Count,#1 + LSR Last,Last,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + + + + ;// Fixed Lengh Decoding Escape Mode 3 + +EscapeMode3 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + + ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp2,#64 + BGE ExitError + + M_BD_READ8 markerbit,1,T1 + TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero + BEQ ExitError + + M_BD_READ16 storeLevel,12,T1 + + TST storeLevel,#0x800 ;// test if the level is negative + SUBNE storeLevel,storeLevel,#4096 + CMP storeLevel,#0 + CMPNE storeLevel,#-2048 + BEQ ExitError ;// Exit with an error message if Level==0 or -2048 + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 markerbit,1,T1 + + + ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed) + + + + ;// To Reflect Run Length + + ADD Count,Count,storeRun + + + +storeLevelLast + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#1 + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + +end + + CMP Count,#64 ;//Run the Loop untill Count reaches 64 + + BLT getVLCbits + + +ExitOk + ;// Exit When VLC Decoding is done Successfully + + ;// Loading ppBitStream and pBitOffset from stack + + CMP Last,#1 + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + + MOVEQ Return,#OMX_Sts_NoErr + MOVNE Return,#OMX_Sts_Err + M_LDR LR,pLinkRegister ;// Load the Link Register Back + B exit2 + +ExitError + ;// Exit When an Error occurs + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + M_LDR LR,pLinkRegister + MOV Return,#OMX_Sts_Err + +exit2 + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c new file mode 100644 index 0000000..ba4d058 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c @@ -0,0 +1,211 @@ + /** + * + * File Name: armVCM4P2_Huff_Tables_VLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Huff_Tables_VLC.c + * Description: Contains all the Huffman tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM_Bitstream.h" + + + + +// Contains optimized and Packed VLC tables with Last=0 and Last=1 + +// optimized Packed VLC table Entry Format +// --------------------------------------- +// +// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +// +------------------------------------------------+ +// | Len | Run | Level |L | 1 | +// +------------------------------------------------+ +// | Offset | 0 | +// +------------------------------------------------+ +// If the table entry is a leaf entry then bit 0 set: +// Len = Number of bits overread (0 to 7) 3 bits +// Run = RunLength of the Symbol (0 to 63) 6 bits +// Level = Level of the Symbol (0 to 31) 5 bits +// L = Last Value of the Symbol (0 or 1) 1 bit +// +// If the table entry is an internal node then bit 0 is clear: +// Offset = Number of (16-bit) half words from the table +// start to the next table node +// +// The table is accessed by successive lookup up on the +// next Step bits of the input bitstream until a leaf node +// is obtained. The Step sizes are supplied to the VLD macro. + +// The VLC tables used for Intra and non inta coefficients in non Escape mode +// contains symbols with both Last=0 and Last=1. +// If a symbol is not found in the table it will be coded as 0xFFF + + +const OMX_U16 armVCM4P2_InterVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d, + 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d, + 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809, + 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519, + 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d, + 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d, + 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015, + 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309, + 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09, + 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031, + 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09, + 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168, + 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d, + 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021, + 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609, + 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309 +}; + + +const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035, + 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09, + 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911, + 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149, + 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615, + 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d, + 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09, + 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319, + 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081, + 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069, + 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311, + 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168, + 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d, + 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049, + 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039, + 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021 +}; + +const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = { + 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001, + 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011, + 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019, + + 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005, + 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001, + 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f, + 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017 +}; + + +const OMX_U16 armVCM4P2_aVlcMVD[124] = { + 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041, + 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0, + 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040, + 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005, + 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068, + 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b, + 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f, + 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017, + 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b, + 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023, + 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8, + 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d, + 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031, + 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039, + 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037, + 0x2045, 0x2045, 0x203d, 0x203d +}; + +/* LMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = +{ + 12, 6, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, + 3, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = +{ + 26, 10, 6, 2, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 40, 1, 0 +}; + +/* LMAX table for non Intra (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = +{ + 27, 10, 5, 4, 3, 3, 3, + 3, 2, 2, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 8, 3, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] = +{ + 14, 9, 7, 3, 2, 1, 1, + 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + + 20, 6, 1, 0, 0, 0, 0, 0 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c new file mode 100644 index 0000000..25cf8db --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c @@ -0,0 +1,75 @@ + /** + * + * File Name: armVCM4P2_Lookup_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Lookup_Tables.c + * Description: Contains all the Lookup tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + + /* * Table Entries contain Dc Scaler values + * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36 + * = 2*i for i=5 to 8 + * = i+8 for i=9 to 25 + * = 2*i-16 for i=26 to 31 + * = (i-32+13)/2 for i=37 to 59 + * = i-6-32 for i=60 to 63 + * = 255 for i=0 and i=32 + */ + +const OMX_U8 armVCM4P2_DCScaler[64]={ + 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, + 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa, + 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, + 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + +}; + + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i) + * armVCM4P2_Reciprocal_QP_S16[0]= 0 + */ + +const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={ + 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249, + 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888, + 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591, + 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421, + 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348, + 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9, + 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254, + 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208 + +}; + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i) + * armVCM4P2_Reciprocal_QP_S32[0]= 0 + */ + +const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={ + 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924, + 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222, + 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643, + 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084, + 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21, + 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5, + 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f, + 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s new file mode 100644 index 0000000..3f92d85 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s @@ -0,0 +1,104 @@ +;// +;// +;// File Name: armVCM4P2_SetPredDir_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +; ** +; * Function: armVCM4P2_SetPredDir +; * +; * Description: +; * Performs detecting the prediction direction +; * +; * Remarks: +; * +; * Parameters: +; * [in] blockIndex block index indicating the component type and +; * position as defined in subclause 6.1.3.8, of ISO/IEC +; * 14496-2. Furthermore, indexes 6 to 9 indicate the +; * alpha blocks spatially corresponding to luminance +; * blocks 0 to 3 in the same macroblock. +; * [in] pCoefBufRow pointer to the coefficient row buffer +; * [in] pQpBuf pointer to the quantization parameter buffer +; * [out]predQP quantization parameter of the predictor block +; * [out]predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VC_HORIZONTAL predict horizontally +; * OMX_VC_VERTICAL predict vertically +; * +; * Return Value: +; * Standard OMXResult result. See enumeration for possible result codes. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE omxVC_s.h + + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;// Input Arguments +BlockIndex RN 0 +pCoefBufRow RN 1 +pCoefBufCol RN 2 +predDir RN 3 +predQP RN 4 +pQpBuf RN 5 + +;// Local Variables + +Return RN 0 +blockDCLeft RN 6 +blockDCTop RN 7 +blockDCTopLeft RN 8 +temp1 RN 9 +temp2 RN 14 + + M_START armVCM4P2_SetPredDir,r9 + + M_ARG ppredQP,4 + M_ARG ppQpBuf,4 + + LDRH blockDCTopLeft,[pCoefBufRow,#-16] + LDRH blockDCLeft,[pCoefBufCol] + + TEQ BlockIndex,#3 + LDREQH blockDCTop,[pCoefBufCol,#-16] + LDRNEH blockDCTop,[pCoefBufRow] + + SUBS temp1,blockDCLeft,blockDCTopLeft + RSBLT temp1,temp1,#0 + SUBS temp2,blockDCTopLeft,blockDCTop + RSBLT temp2,temp2,#0 + + M_LDR pQpBuf,ppQpBuf + M_LDR predQP,ppredQP + CMP temp1,temp2 + MOV temp2,#OMX_VC_VERTICAL + LDRLTB temp1,[pQpBuf,#1] + STRLT temp2,[predDir] + STRLT temp1,[predQP] + MOV temp2,#OMX_VC_HORIZONTAL + LDRGEB temp1,[pQpBuf] + STRGE temp2,[predDir] + MOV Return,#OMX_Sts_NoErr + STRGE temp1,[predQP] + + + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c new file mode 100644 index 0000000..ed17f9b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c @@ -0,0 +1,61 @@ +/** + * + * File Name: armVCM4P2_Zigzag_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_ZigZag_Tables.c + * Description: Contains the zigzag tables + * + */ + +#include "omxtypes.h" + +/* Contains Double the values in the reference Zigzag Table + * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array + */ + +const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = +{ + 0, 2, 16, 32, 18, 4, 6, 20, + 34, 48, 64, 50, 36, 22, 8, 10, + 24, 38, 52, 66, 80, 96, 82, 68, + 54, 40, 26, 12, 14, 28, 42, 56, + 70, 84, 98, 112, 114, 100, 86, 72, + 58, 44, 30, 46, 60, 74, 88, 102, + 116, 118, 104, 90, 76, 62, 78, 92, + 106, 120, 122, 104, 94, 110, 124, 126, + + 0, 16, 32, 48, 2, 18, 4, 20, + 34, 50, 64, 80, 96, 112, 114, 98, + 82, 66, 52, 36, 6, 22, 8, 24, + 38, 54, 68, 84, 100, 116, 70, 86, + 102, 118, 40, 56, 10, 26, 12, 28, + 42, 58, 72, 88, 104, 120, 74, 90, + 106, 122, 44, 60, 14, 30, 46, 62, + 76, 92, 108, 124, 78, 94, 110, 126, + + 0, 2, 4, 6, 16, 18, 32, 34, + 20, 22, 8, 10, 12, 14, 30, 28, + 26, 24, 38, 36, 48, 50, 64, 66, + 52, 54, 40, 42, 44, 46, 56, 58, + 60, 62, 68, 70, 80, 82, 96, 98, + 84, 86, 72, 74, 76, 78, 88, 90, + 92, 94, 100, 102, 112, 114, 116, 118, + 104, 106, 108, 110, 120, 122, 124, 126 + + +}; + + + + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c new file mode 100644 index 0000000..b63d295 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c @@ -0,0 +1,102 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter reconstruction + * + */ + + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter + * + * Description: + * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag + * positioning and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results (residuals) are placed in a contiguous array + * of 64 elements. For INTER block, the output buffer holds the residuals for + * further reconstruction. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7] + * [in] QP quantization parameter + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 indicates using quantization method defined in short + * video header mode, and shortVideoHeader==0 indicates normail quantization method. + * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the + * current byte in the bit stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the + * byte pointed by *ppBitStream + * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of + * OMX_S16 data type). Must be 16-byte aligned. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst + * - At least one of the below case: + * - *pBitOffset exceeds [0,7], QP <= 0; + * - pDst not 16-byte aligned + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 15 more elements of padding */ + OMX_S16 tempBuf[79]; + OMX_S16 *pTempBuf1; + OMXResult errorCode; + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf); + + + /* VLD and zigzag */ + errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, + pTempBuf1,shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvInter_I( + pTempBuf1, + QP); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c new file mode 100644 index 0000000..c609a60 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c @@ -0,0 +1,208 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra reconstruction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag + * positioning, and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results are then placed in the output frame/plane on + * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and + * written to corresponding block buffer within the destination plane. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] step width of the destination plane + * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer + * [out] updated coefficient rwo buffer + * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer + * [out] updated coefficient column buffer + * [in] curQP quantization parameter of the macroblock which + * the current block belongs to + * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to + * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the + * current block(QPc). + * Note, in case the corresponding block is out of VOP bound, the QP value will have + * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive + * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description. + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, + * Figure 6-5 of ISO/IEC 14496-2. + * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP. + * This allows a mechanism to switch between two VLC + * for coding of Intra DC coefficients as per Table + * 6-21 of ISO/IEC 14496-2. + * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating + * if the ac coefficients of the first row or first + * column are differentially coded for intra coded + * macroblock. + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, + * and shortVideoHeader==0 selects nonlinear intra DC mode. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the block in the destination plane. + * pDst should be 16-byte aligned. + * [out] pCoefBufRow pointer to the updated coefficient row buffer. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, + * pCoefBufRow, pCoefBufCol, pQPBuf, pDst. + * or + * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31), + * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while + * blockIndex greater than 5. + * or + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error + * + */ + +OMXResult omxVCM4P2_DecodeBlockCoef_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader + ) +{ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 *pTempBuf1, *pTempBuf2; + OMX_INT predDir, predACDir; + OMX_INT predQP; + OMXVCM4P2VideoComponent videoComp; + OMXResult errorCode; + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + + /* Setting the AC prediction direction and prediction direction */ + armVCM4P2_SetPredDir( + blockIndex, + pCoefBufRow, + pCoefBufCol, + &predDir, + &predQP, + pQPBuf); + + predACDir = predDir; + + + if (ACPredFlag == 0) + { + predACDir = OMX_VC_NONE; + } + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + + + /* VLD and zigzag */ + if (intraDCVLC == 1) + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + else + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + + /* AC DC prediction */ + errorCode = omxVCM4P2_PredictReconCoefIntra( + pTempBuf1, + pCoefBufRow, + pCoefBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQP, + videoComp, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Placing the linear array into the destination plane and clipping + it to 0 to 255 */ + + armVCM4P2_Clip8(pTempBuf2,pDst,step); + + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s new file mode 100644 index 0000000..a1861da --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s @@ -0,0 +1,364 @@ +; ********** +; * +; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; ** +; * Function: omxVCM4P2_DecodePadMV_PVOP +; * +; * Description: +; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP. +; * The motion vector padding process is specified in subclause 7.6.1.6 of +; * ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bit stream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within +; * [0-7]. +; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the +; * macroblocks specially at the left side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper-right side of the current macroblock +; * respectively. +; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4 +; * bit stream syntax +; * [in] MBType the type of the current macroblock. If MBType +; * is not equal to OMX_VC_INTER4V, the destination +; * motion vector buffer is still filled with the +; * same decoded vector. +; * [out] ppBitStream *ppBitStream is updated after the block is decoded, +; * so that it points to the current byte in the bit +; * stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDstMVCurMB pointer to the motion vector buffer of the current +; * macroblock which contains four decoded motion vectors +; * +; * Return Value: +; * OMX_Sts_NoErr -no error +; * +; * +; * OMX_Sts_Err - status error +; * +; * + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + INCLUDE omxVC_s.h + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pSrcMVLeftMB RN 2 +pSrcMVUpperMB RN 3 +pSrcMVUpperRightMB RN 4 +pDstMVCurMB RN 5 +fcodeForward RN 6 +MBType RN 7 + +;//Local Variables + +zero RN 4 +one RN 4 +scaleFactor RN 1 + + +Return RN 0 + +VlcMVD RN 0 +index RN 4 +Count RN 7 + +mvHorData RN 4 +mvHorResidual RN 0 + +mvVerData RN 4 +mvVerResidual RN 0 + +temp RN 1 + +temp1 RN 3 +High RN 4 +Low RN 2 +Range RN 1 + +BlkCount RN 14 + +diffMVdx RN 0 +diffMVdy RN 1 + +;// Scratch Registers + +RBitStream RN 8 +RBitCount RN 9 +RBitBuffer RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + IMPORT armVCM4P2_aVlcMVD + IMPORT omxVCM4P2_FindMVpred + + ;// Allocate stack memory + + M_ALLOC4 ppDstMVCurMB,4 + M_ALLOC4 pDstMVPredME,4 + M_ALLOC4 pBlkCount,4 + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppBitOffset,4 + M_ALLOC4 ppSrcMVLeftMB,4 + M_ALLOC4 ppSrcMVUpperMB,4 + + M_ALLOC4 pdiffMVdx,4 + M_ALLOC4 pdiffMVdy,4 + M_ALLOC4 pHigh,4 + + + + + M_START omxVCM4P2_DecodePadMV_PVOP,r11 + + M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack + M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack + M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack + M_ARG MBTypeonStack,4 ;// pointer to MBType on stack + + + + + + ;// Initializing the BitStream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR MBType,MBTypeonStack ;// Load MBType from stack + M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack + MOV zero,#0 + + TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA + TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q + STREQ zero,[pDstMVCurMB] + M_BD_INIT1 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + M_BD_INIT2 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + MOVEQ Return,#OMX_Sts_NoErr + MOV BlkCount,#0 + STREQ zero,[pDstMVCurMB,#4] + + BEQ ExitOK + + TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V + TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q + MOVEQ Count,#4 + + TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q + MOVEQ Count,#1 + + M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack + + ;// Storing the values temporarily on stack + + M_STR ppBitStream,pppBitStream + M_STR pBitOffset,ppBitOffset + + + SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1 + MOV one,#1 + M_STR pSrcMVLeftMB,ppSrcMVLeftMB + LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1) + M_STR pSrcMVUpperMB,ppSrcMVUpperMB + LSL scaleFactor,scaleFactor,#5 + M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor + + ;// VLD Decoding + + +Loop + + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table + + ;// Horizontal Data and Residual calculation + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol + + SUB mvHorData,index,#32 ;// mvHorData=index-32 + MOV mvHorResidual,#1 ;// mvHorResidual=1 + CMP fcodeForward,#1 + TEQNE mvHorData,#0 + MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData + BEQ VerticalData + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0 + + CMP mvHorData,#0 + RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData) + SUB mvHorResidual,mvHorResidual,fcodeForward + SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward + ADD diffMVdx,diffMVdx,#1 + RSBLT diffMVdx,diffMVdx,#0 + + ;// Vertical Data and Residual calculation + +VerticalData + + M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs + + SUB mvVerData,index,#32 ;// mvVerData=index-32 + MOV mvVerResidual,#1 + CMP fcodeForward,#1 + TEQNE mvVerData,#0 + MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0 + BEQ FindMVPred + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0 + + + CMP mvVerData,#0 + RSBLT mvVerData,mvVerData,#0 + SUB mvVerResidual,mvVerResidual,fcodeForward + SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward + ADD diffMVdy,diffMVdy,#1 + RSBLT diffMVdy,diffMVdy,#0 + + ;//Calling the Function omxVCM4P2_FindMVpred + +FindMVPred + + M_STR diffMVdy,pdiffMVdy + ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount] + M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred + + MOV temp,#0 + M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument + M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack + + MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB + M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack + MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB + MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB + MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB + BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred + + ;// Store Horizontal Motion Vector + + M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack + M_LDR High,pHigh ;// High=32*scaleFactor + LSL temp1,BlkCount,#2 ;// temp=BlkCount*4 + M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx + + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount] + + + RSB Low,High,#0 ;// Low = -32*scaleFactor + ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx + ADD Range,High,High ;// Range=64*ScaleFactor + SUB High,High,#1 ;// High= 32*scaleFactor-1 + + CMP diffMVdx,Low ;// If diffMVdx<Low + ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range + + CMP diffMVdx,High + SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + ;// Store Vertical + + ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2 + M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy + ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1 + ADD diffMVdx,temp,diffMVdx + CMP diffMVdx,Low + ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range + CMP diffMVdx,High + SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + CMP BlkCount,Count + M_LDR pSrcMVLeftMB,ppSrcMVLeftMB + M_LDR pSrcMVUpperMB,ppSrcMVUpperMB + + BLT Loop ;// If BlkCount<Count Continue the Loop + + + ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to + ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3] + + M_LDR MBType,MBTypeonStack + + TEQ MBType,#OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q + LDREQ temp,[pDstMVCurMB] + M_LDR ppBitStream,pppBitStream + STREQ temp,[pDstMVCurMB,#4] + + STREQ temp,[pDstMVCurMB,#8] + STREQ temp,[pDstMVCurMB,#12] + + + M_LDR pBitOffset,ppBitOffset + ;//Ending the macro + M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro + + + MOV Return,#OMX_Sts_NoErr + B ExitOK + +ExitError + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppBitOffset + ;//Ending the macro + M_BD_FINI ppBitStream,pBitOffset + + MOV Return,#OMX_Sts_Err + +ExitOK + + M_END + ENDIF + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s new file mode 100644 index 0000000..c43b253 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s @@ -0,0 +1,132 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one inter coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 + + + + ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables + + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + + + M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12 + + + + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table + M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function + LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack + LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack + LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables + MOV Count,#0 ;// set start=0 + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack + + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding + + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s new file mode 100644 index 0000000..166729e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s @@ -0,0 +1,136 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one intra coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +PredDir RN 3 +shortVideoHeader RN 3 + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 + + + + ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + + + M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12 + + M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table + ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir + + M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack + LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack + LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack + LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table + MOV Count,#0 ;// Set Start=0 + + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack + + + + M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function + + + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s new file mode 100644 index 0000000..d19cb13 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s @@ -0,0 +1,224 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one intra coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS :LOR: CortexA8 + + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values + IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries + + + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +PredDir RN 3 +shortVideoHeader RN 3 +videoComp RN 5 +;//Local Variables + +Return RN 0 + +pDCLumaChromaIndex RN 4 +pDCChromaIndex RN 7 +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 +DCValueSize RN 6 +powOfSize RN 7 +temp1 RN 5 + + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +DCVal RN 14 + + + ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + M_ALLOC4 pDCCoeff,4 + + + + M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12 + + M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack + M_ARG videoComponstack,4 ;// Pointer to argument on stack + + + ;// Decode DC Coefficient + + + LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance + + ;// Initializing the Bitstream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR videoComp,videoComponstack + M_BD_INIT1 T1, T2, T2 + ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6 + M_BD_INIT2 T1, T2, T2 + + + M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table + + + + +DecodeDC + + CMP DCValueSize,#12 + BGT ExitError + + CMP DCValueSize,#0 + MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0 + BEQ ACDecode ;// Branch to perform AC Coeff Decoding + + M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream + + + MOV powOfSize,#1 + LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize) + CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2 + ADDLT DCVal,DCVal,#1 + SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1 + ;// Else DCVal= fetchbits from bit stream + +CheckDCValueSize + + CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit + + BLE ACDecode + + M_BD_READ8 temp1,1,T1 + TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message + BEQ ExitError + + + + ;// Decode AC Coefficient + +ACDecode + + M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack + M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address + ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir + + M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack + LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack + LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack + LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table + MOV Count,#1 ;// Set Start =1 + + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack + + + M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function + + M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack + STRH DCVal,[pDst] ;// Store the DC Value + B ExitOK + + + +ExitError + + M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error + MOV Return,#OMX_Sts_Err ;// Exit with an Error Message +ExitOK + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s new file mode 100644 index 0000000..a4bfa71 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s @@ -0,0 +1,194 @@ +;// +;// +;// File Name: omxVCM4P2_FindMVpred_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P2_FindMVpred +;// + ;// Include headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armVCCOMM_s.h + + ;// Define cpu variants + M_VARIANTS ARM1136JS + + + IF ARM1136JS + + M_TABLE armVCM4P2_pBlkIndexTable + DCD OMXVCBlk0, OMXVCBlk1 + DCD OMXVCBlk2, OMXVCBlk3 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- + +pSrcMVCurMB RN 0 +pSrcCandMV1 RN 1 +pSrcCandMV2 RN 2 +pSrcCandMV3 RN 3 +pDstMVPred RN 4 +pDstMVPredME RN 5 +iBlk RN 6 + +pTable RN 4 +CandMV RN 12 + +pCandMV1 RN 7 +pCandMV2 RN 8 +pCandMV3 RN 9 + +CandMV1dx RN 0 +CandMV1dy RN 1 +CandMV2dx RN 2 +CandMV2dy RN 3 +CandMV3dx RN 10 +CandMV3dy RN 11 + +temp RN 14 + +zero RN 14 +return RN 0 + +; ---------------------------------------------- +; Main routine +; ---------------------------------------------- + + M_ALLOC4 MV, 4 + + ;// Function header + M_START omxVCM4P2_FindMVpred, r11 + + ;// Define stack arguments + M_ARG ppDstMVPred, 4 + M_ARG ppDstMVPredME, 4 + M_ARG Blk, 4 + + M_ADR CandMV, MV + MOV zero, #0 + M_LDR iBlk, Blk + + ;// Set the default value for these + ;// to be used if pSrcCandMV[1|2|3] == NULL + MOV pCandMV1, CandMV + MOV pCandMV2, CandMV + MOV pCandMV3, CandMV + + STR zero, [CandMV] + + ;// Branch to the case based on blk number + M_SWITCH iBlk + M_CASE OMXVCBlk0 ;// iBlk=0 + M_CASE OMXVCBlk1 ;// iBlk=0 + M_CASE OMXVCBlk2 ;// iBlk=0 + M_CASE OMXVCBlk3 ;// iBlk=0 + M_ENDSWITCH + +OMXVCBlk0 + CMP pSrcCandMV1, #0 + ADDNE pCandMV1, pSrcCandMV1, #4 + + CMP pSrcCandMV2, #0 + ADDNE pCandMV2, pSrcCandMV2, #8 + + CMP pSrcCandMV3, #0 + ADDNE pCandMV3, pSrcCandMV3, #8 + CMPEQ pSrcCandMV1, #0 + + MOVEQ pCandMV3, pCandMV2 + MOVEQ pCandMV1, pCandMV2 + + CMP pSrcCandMV1, #0 + CMPEQ pSrcCandMV2, #0 + + MOVEQ pCandMV1, pCandMV3 + MOVEQ pCandMV2, pCandMV3 + + CMP pSrcCandMV2, #0 + CMPEQ pSrcCandMV3, #0 + + MOVEQ pCandMV2, pCandMV1 + MOVEQ pCandMV3, pCandMV1 + + B BlkEnd + +OMXVCBlk1 + MOV pCandMV1, pSrcMVCurMB + CMP pSrcCandMV3, #0 + ADDNE pCandMV3, pSrcCandMV3, #8 + + CMP pSrcCandMV2, #0 + ADDNE pCandMV2, pSrcCandMV2, #12 + + CMPEQ pSrcCandMV3, #0 + + MOVEQ pCandMV2, pCandMV1 + MOVEQ pCandMV3, pCandMV1 + + B BlkEnd + +OMXVCBlk2 + CMP pSrcCandMV1, #0 + MOV pCandMV2, pSrcMVCurMB + ADD pCandMV3, pSrcMVCurMB, #4 + ADDNE pCandMV1, pSrcCandMV1, #12 + B BlkEnd + +OMXVCBlk3 + ADD pCandMV1, pSrcMVCurMB, #8 + MOV pCandMV2, pSrcMVCurMB + ADD pCandMV3, pSrcMVCurMB, #4 + +BlkEnd + + ;// Using the transperancy info, zero + ;// out the candidate MV if neccesary + LDRSH CandMV1dx, [pCandMV1], #2 + LDRSH CandMV2dx, [pCandMV2], #2 + LDRSH CandMV3dx, [pCandMV3], #2 + + ;// Load argument from the stack + M_LDR pDstMVPredME, ppDstMVPredME + + LDRSH CandMV1dy, [pCandMV1] + LDRSH CandMV2dy, [pCandMV2] + LDRSH CandMV3dy, [pCandMV3] + + CMP pDstMVPredME, #0 + + ;// Store the candidate MV's into the pDstMVPredME, + ;// these can be used in the fast algorithm if implemented + + STRHNE CandMV1dx, [pDstMVPredME], #2 + STRHNE CandMV1dy, [pDstMVPredME], #2 + STRHNE CandMV2dx, [pDstMVPredME], #2 + STRHNE CandMV2dy, [pDstMVPredME], #2 + STRHNE CandMV3dx, [pDstMVPredME], #2 + STRHNE CandMV3dy, [pDstMVPredME] + + ; Find the median of the 3 candidate MV's + M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp + + ;// Load argument from the stack + M_LDR pDstMVPred, ppDstMVPred + + M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp + + STRH CandMV3dx, [pDstMVPred], #2 + STRH CandMV3dy, [pDstMVPred] + + MOV return, #OMX_Sts_NoErr + + M_END + ENDIF ;// ARM1136JS :LOR: CortexA8 + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s new file mode 100644 index 0000000..bfeb540 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s @@ -0,0 +1,73 @@ +;// +;// +;// File Name: omxVCM4P2_IDCT8x8blk_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P2_IDCT8x8blk +;// + ;// Include headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + ;// Define cpu variants + M_VARIANTS ARM1136JS + + INCLUDE armCOMM_IDCT_s.h + + IMPORT armCOMM_IDCTPreScale + ;// + ;// Function prototype + ;// + ;// OMXResult + ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc, + ;// OMX_S16* pDst) + ;// + + IF ARM1136JS :LOR: CortexA8 + M_ALLOC4 ppDest, 4 + M_ALLOC4 pStride, 4 + M_ALLOC8 pBlk, 2*8*8 + ENDIF + + IF ARM1136JS + M_START omxVCM4P2_IDCT8x8blk, r11 + ENDIF + + + IF ARM1136JS :LOR: CortexA8 + +;// Declare input registers +pSrc RN 0 +pDst RN 1 + +;// Declare other intermediate registers +Result RN 0 + +;// Prototype for macro M_IDCT +;// pSrc RN 0 ;// source data buffer +;// Stride RN 1 ;// destination stride in bytes +;// pDest RN 2 ;// destination data buffer +;// pScale RN 3 ;// pointer to scaling table + +pSrc RN 0 +Stride RN 1 +pDest RN 2 +pScale RN 3 + + MOV pDest, pDst + LDR pScale, =armCOMM_IDCTPreScale + M_IDCT s9, s16, 16 + MOV Result, #OMX_Sts_NoErr + M_END + ENDIF + ;// ARM1136JS :LOR: CortexA8 + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s new file mode 100644 index 0000000..20965bf --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s @@ -0,0 +1,713 @@ +;// +;// +;// File Name: omxVCM4P2_MCReconBlock_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// +;// + +;// Include standard headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import symbols required from other files + + M_VARIANTS ARM1136JS + +;// *************************************************************************** +;// ARM1136JS implementation +;// *************************************************************************** + IF ARM1136JS + +;// *************************************************************************** +;// MACRO DEFINITIONS +;// *************************************************************************** + ;// Description: + ;// + ;// dest[j] = (x[j] + y[j] + round) >> 1, j=0..3 + ;// + ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to + ;// each sum before dividing by two, if round is 1 + ;// + ;// Syntax: + ;// M_UHADD8R $dest, $x, $y, $round, $mask + ;// + ;// Inputs: + ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0] + ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0] + ;// $round 0 if no rounding to be added, 1 if rounding to be done + ;// $mask some register set to 0x80808080 + ;// + ;// Outputs: + ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0] + + MACRO + M_UHADD8R $dest, $x, $y, $round, $mask + IF $round = 1 + IF $dest /= $y + MVN $dest, $x + UHSUB8 $dest, $y, $dest + EOR $dest, $dest, $mask + ELSE + MVN $dest, $y + UHSUB8 $dest, $x, $dest + EOR $dest, $dest, $mask + ENDIF + ELSE + UHADD8 $dest, $x, $y + ENDIF + MEND +;// *************************************************************************** + ;// Description: + ;// Load 8 bytes from $pSrc (aligned or unaligned locations) + ;// + ;// Syntax: + ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// $srcStep The stride on source + ;// $scratch A scratch register, used internally for temp calculations + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc In case the macro accepts stride, it increments the pSrc by + ;// that value, else unchanged + ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0] + ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4] + ;// + ;// Note: {$out0, $out1, $scratch} should be registers with ascending + ;// register numbering. In case offset is 0, $scratch is not modified. + + MACRO + M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + IF $offset = 0 + LDM $pSrc, {$out0, $out1} + ADD $pSrc, $pSrc, $srcStep + ELSE + LDM $pSrc, {$out0, $out1, $scratch} + ADD $pSrc, $pSrc, $srcStep + + MOV $out0, $out0, LSR #8 * $offset + ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset)) + MOV $out1, $out1, LSR #8 * $offset + ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset)) + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Loads three words for X interpolation, update pointer to next row. For + ;// X interpolation, given a truncated-4byteAligned source pointer, + ;// invariably three continous words are required from there to get the + ;// nine bytes from the source pointer for filtering. + ;// + ;// Syntax: + ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// + ;// $srcStep The stride on source + ;// + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc Incremented by $srcStep + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are outputs based on the $offset parameter. + ;// The outputs are specifically generated to be processed by + ;// the M_EXT_XINT macro. Following is the illustration to show + ;// how the nine bytes are spanned for different offsets from + ;// notTruncatedForAlignmentSourcePointer. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | | + ;// | 1 | -1 | x012 | 3456 | 78xx | | + ;// | 2 | -2 | xx01 | 2345 | 678x | | + ;// | 3 | -3 | xxx0 | | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// where the numbering (0-8) is to designate the 9 bytes from + ;// start of a particular row. The illustration doesn't take in + ;// account the positioning of bytes with in the word and the + ;// macro combination with M_EXT_XINT will work only in little + ;// endian environs + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + IF $offset /= 3 + LDM $pSrc, {$word0, $word1, $word2} + ELSE + LDM $pSrc, {$word0, $word2, $word3} + ENDIF + ADD $pSrc, $pSrc, $srcStep + MEND + +;// *************************************************************************** + ;// Description: + ;// Extract four registers of four pixels for X interpolation + ;// + ;// Syntax: + ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are inputs based on the $offset parameter. + ;// The inputs are specifically selected to be processed by + ;// the M_EXT_XINT macro. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy | + ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy | + ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy | + ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// Outputs: + ;// $word0, $word1, $word2, $word3 + ;// Bytes from the original source pointer (not truncated for + ;// 4 byte alignment) as shown in the table. + ;// ------------------------------- + ;// | word0 | word1 | word2 | word3 | + ;// |-------------------------------| + ;// | 0123 | 4567 | 1234 | 5678 | + ;// ------------------------------- + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + IF $offset = 0 + ; $word0 and $word1 are ok + ; $word2, $word3 are just 8 shifted versions + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #24 + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ELIF $offset = 3 + ; $word2 and $word3 are ok (taken care while loading itself) + ; set $word0 & $word1 + MOV $word0, $word0, LSR #24 + ORR $word0, $word0, $word2, LSL #8 + MOV $word1, $word2, LSR #24 + ORR $word1, $word1, $word3, LSL #8 + ELSE + MOV $word0, $word0, LSR #8 * $offset + ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset)) + MOV $word1, $word1, LSR #8 * $offset + ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset)) + + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1)) + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Computes half-sum and xor of two inputs and puts them in the input + ;// registers in that order + ;// + ;// Syntax: + ;// M_HSUM_XOR $v0, $v1, $tmp + ;// + ;// Inputs: + ;// $v0 a, first input + ;// $v1 b, second input + ;// $tmp scratch register + ;// + ;// Outputs: + ;// $v0 (a + b)/2 + ;// $v1 a ^ b + + MACRO + M_HSUM_XOR $v0, $v1, $tmp + UHADD8 $tmp, $v0, $v1 ;// s0 = a + b + EOR $v1, $v0, $v1 ;// l0 = a ^ b + MOV $v0, $tmp ;// s0 + MEND +;// *************************************************************************** + ;// Description: + ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in + ;// mcReconBlock module. Very specific to the implementation of + ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and + ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are + ;// not significant and are used by the callee for row counter (y) + ;// + ;// Some points to note are: + ;// 1. Input is pair of pair-averages and Xors + ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another + ;// running average + ;// 3. Output is in the first argument + ;// + ;// Syntax: + ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + ;// + ;// Inputs: + ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged + ;// $lsb0 (a ^ b) + ;// $sum1 (c + d) >> 1. Not modified + ;// $lsb1 (c ^ d) Not modified + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// + ;// Outputs: + ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding + ;// (a + b + c + d + 2) / 4 : If rounding + + MACRO + M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + LCLS OP1 + LCLS OP2 + IF $rndVal = 0 ;// rounding case +OP1 SETS "AND" +OP2 SETS "ORR" + ELSE ;// Not rounding case +OP1 SETS "ORR" +OP2 SETS "AND" + ENDIF + + LCLS lsb2 + LCLS sum2 + LCLS dest + +lsb2 SETS "tmp" +sum2 SETS "$lsb0" +dest SETS "$sum0" + + $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1 + EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1 + $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0 + AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask + UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2 + UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2 + MEND +;// *************************************************************************** +;// Motion compensation handler macros +;// *************************************************************************** + ;// Description: + ;// Implement motion compensation routines using the named registers in + ;// callee function. Each of the following 4 implement the 4 predict type + ;// Each handles 8 cases each ie all the combinations of 4 types of source + ;// alignment offsets and 2 types of rounding flag + ;// + ;// Syntax: + ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// + ;// Inputs: + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location. + ;// + ;// Outputs: + ;// Outputs come in the named registers of the callee functions + ;// The macro loads the data from the source pointer, processes it and + ;// stores in the destination pointer. Does the whole prediction cycle + ;// of Motion Compensation routine for a particular predictType + ;// After this only residue addition to the predicted values remain + + MACRO + M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for IntegerPixel predictType. Both + ;// rounding cases are handled by the same code base. It is just a copy + ;// from source to destination. Two lines are done per loop to reduce + ;// stalls. Loop has been software pipelined as well for that purpose. + ;// + ;// M_LOAD_X loads a whole row in two registers and then they are stored + +CaseIntegerPixelRnd0Offset$offset +CaseIntegerPixelRnd1Offset$offset + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset +YloopIntegerPixelOffset$offset + SUBS y, y, #2 + STRD tmp1, tmp2, [pDst], dstStep + STRD tmp3, tmp4, [pDst], dstStep + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + BGT YloopIntegerPixelOffset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelX predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. Loop has been software pipelined to reduce + ;// stalls. + ;// + ;// Filtering involves averaging a pixel with the next horizontal pixel. + ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with + ;// all pixels in a row with 4 pixel in each register and another 2 + ;// registers with pixels corresponding to one horizontally shifted pixel + ;// corresponding to the initial row pixels. These are set of packed + ;// registers appropriate to do 4 lane SIMD. + ;// After that M_UHADD8R macro does the averaging taking care of the + ;// rounding as required + +CaseHalfPixelXRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 +YloopHalfPixelXRnd$rndVal.Offset$offset + SUBS y, y, #1 + M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4 + M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask + STRD tmp5, tmp6, [pDst], dstStep + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 + BGT YloopHalfPixelXRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical pixel. + ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in + ;// each register. These are set of packed registers appropriate to do + ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care + ;// of the rounding as required + +CaseHalfPixelYRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load +YloopHalfPixelYRnd$rndVal.Offset$offset + SUBS y, y, #2 + ;// Processing one line + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask + STRD tmp1, tmp2, [pDst], dstStep + ;// Processing another line + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset + M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask + M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask + STRD tmp3, tmp4, [pDst], dstStep + + BGT YloopHalfPixelYRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelXY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical, horizontal + ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT + ;// and M_EXT_XINT combination generates 4 registers with a row and its + ;// 1 pixel right shifted version, with 4 pixels in one register. Another + ;// call of that macro-combination gets another row. Then M_HSUM_XOR is + ;// called to get mutual half-sum and xor combinations of a row with its + ;// shifted version as they are inputs to the M_AVG4 macro which computes + ;// the 4 element average with rounding. Note that it is the half-sum/xor + ;// values that are preserved for next row as they can be re-used in the + ;// next call to the M_AVG4 and saves recomputation. + ;// Due to lack of register, the row counter and a masking value required + ;// in M_AVG4 are packed into a single register yMask where the last nibble + ;// holds the row counter values and rest holds the masking variable left + ;// shifted by 4 + +CaseHalfPixelXYRnd$rndVal.Offset$offset + LDR yMask, =((0x01010101 << 4) + 8) + + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp ;// s0, l0 + M_HSUM_XOR t01, t11, tmp ;// s0', l0' + +YloopHalfPixelXYRnd$rndVal.Offset$offset + ;// Processsing one line + ;// t00, t01, t10, t11 required from previous loop + M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d' + SUB yMask, yMask, #2 + M_EXT_XINT $offset, t20, t21, t30, t31 + M_HSUM_XOR t20, t30, tmp ;// s1, l1 + M_HSUM_XOR t21, t31, tmp ;// s1', l1' + M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1 + M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1' + STRD t00, t01, [pDst], dstStep ;// store the average + + ;// Processsing another line + ;// t20, t21, t30, t31 required from above + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + TST yMask, #7 + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp + M_HSUM_XOR t01, t11, tmp + M_AVG4 t20, t30, t00, t10, $rndVal + M_AVG4 t21, t31, t01, t11, $rndVal + STRD t20, t21, [pDst], dstStep + + BGT YloopHalfPixelXYRnd$rndVal.Offset$offset + + IF $offset/=3 :LOR: $rndVal/=1 + B SwitchPredictTypeEnd + ENDIF + MEND +;// *************************************************************************** +;// Motion compensation handler macros end here +;// *************************************************************************** + ;// Description: + ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal + ;// combination in the "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $rnd, $predictType + ;// + ;// Inputs: + ;// $rnd 0 for rounding, 1 for no rounding + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE"s for the "M_SWITCH" macro + + MACRO + M_CASE_OFFSET $rnd, $predictType + M_CASE Case$predictType.Rnd$rnd.Offset0 + M_CASE Case$predictType.Rnd$rnd.Offset1 + M_CASE Case$predictType.Rnd$rnd.Offset2 + M_CASE Case$predictType.Rnd$rnd.Offset3 + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 2 kinds of rounding "cases" for each predictType in the + ;// "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE_OFFSET" macros + + MACRO + M_CASE_MCRECONBLOCK $predictType + M_CASE_OFFSET 0, $predictType ;// 0 for rounding + M_CASE_OFFSET 1, $predictType ;// 1 for no rounding + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 8 kinds of rounding and offset combinations handling macros + ;// for the specified predictType. In case of "IntegerPixel" predictType, + ;// rounding is not required so same code segment handles both cases + ;// + ;// Syntax: + ;// M_MCRECONBLOCK $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified + ;// predictType. Each + ;// M_MCRECONBLOCK_<predictType> $rnd, $offset + ;// is an code segment (starting with a label indicating the predictType, + ;// rounding and offset combination) + ;// Four calls of this macro with the 4 prediction modes populate all the 32 + ;// handlers + + MACRO + M_MCRECONBLOCK $predictType + M_MCRECONBLOCK_$predictType 0, 0 + M_MCRECONBLOCK_$predictType 0, 1 + M_MCRECONBLOCK_$predictType 0, 2 + M_MCRECONBLOCK_$predictType 0, 3 + IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference + M_MCRECONBLOCK_$predictType 1, 0 + M_MCRECONBLOCK_$predictType 1, 1 + M_MCRECONBLOCK_$predictType 1, 2 + M_MCRECONBLOCK_$predictType 1, 3 + ENDIF + MEND +;// *************************************************************************** +;// Input/Output Registers +pSrc RN 0 +srcStep RN 1 +arg_pSrcResidue RN 2 +pSrcResidue RN 12 +pDst RN 3 +dstStep RN 2 +predictType RN 10 +rndVal RN 11 +mask RN 11 + +;// Local Scratch Registers +zero RN 12 +y RN 14 + +tmp1 RN 4 +tmp2 RN 5 +tmp3 RN 6 +tmp4 RN 7 +tmp5 RN 8 +tmp6 RN 9 +tmp7 RN 10 +tmp8 RN 11 +tmp9 RN 12 + +t00 RN 4 +t01 RN 5 +t10 RN 6 +t11 RN 7 +t20 RN 8 +t21 RN 9 +t30 RN 10 +t31 RN 11 +tmp RN 12 + +yMask RN 14 + +dst RN 1 +return RN 0 + + ;// Allocate memory on stack + M_ALLOC4 Stk_pDst, 4 + M_ALLOC4 Stk_pSrcResidue, 4 + ;// Function header + M_START omxVCM4P2_MCReconBlock, r11 + ;// Define stack arguments + M_ARG Arg_dstStep, 4 + M_ARG Arg_predictType, 4 + M_ARG Arg_rndVal, 4 + ;// Save on stack + M_STR pDst, Stk_pDst + M_STR arg_pSrcResidue, Stk_pSrcResidue + ;// Load argument from the stack + M_LDR dstStep, Arg_dstStep + M_LDR predictType, Arg_predictType + M_LDR rndVal, Arg_rndVal + + MOV y, #8 + + AND tmp1, pSrc, #3 + ORR predictType, tmp1, predictType, LSL #3 + ORR predictType, predictType, rndVal, LSL #2 + ;// Truncating source pointer to align to 4 byte location + BIC pSrc, pSrc, #3 + + ;// Implementation takes care of all combinations of different + ;// predictTypes, rounding cases and source pointer offsets to alignment + ;// of 4 bytes in different code bases unless one of these parameter wasn't + ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK + ;// macros branch into 8 M_CASE macros for all combinations of the 2 + ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte + ;// alignment. + M_SWITCH predictType + M_CASE_MCRECONBLOCK IntegerPixel + M_CASE_MCRECONBLOCK HalfPixelX + M_CASE_MCRECONBLOCK HalfPixelY + M_CASE_MCRECONBLOCK HalfPixelXY + M_ENDSWITCH + + ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8 + ;// particular macros (4 in case of IntegerPixel as rounding makes no + ;// difference there) to generate the code for all cases of rounding and + ;// offsets. LTORG is used to segment the code as code size bloated beyond + ;// 4KB. + M_MCRECONBLOCK IntegerPixel + M_MCRECONBLOCK HalfPixelX + LTORG + M_MCRECONBLOCK HalfPixelY + M_MCRECONBLOCK HalfPixelXY +SwitchPredictTypeEnd + + ;// Residue Addition + ;// This is done in 2 lane SIMD though loads are further optimized and + ;// 4 bytes are loaded in case of destination buffer. Algorithmic + ;// details are in inlined comments + M_LDR pSrcResidue, Stk_pSrcResidue + CMP pSrcResidue, #0 + BEQ pSrcResidueConditionEnd +pSrcResidueNotNull + M_LDR pDst, Stk_pDst + MOV y, #8 + SUB dstStep, dstStep, #4 +Yloop_pSrcResidueNotNull + SUBS y, y, #1 + LDR dst, [pDst] ;// dst = [dcba] + LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA] + PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A] + PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B] + UXTB16 tmp1, dst ;// tmp1 = [0c0a] + UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b] + QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2) + ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba] + STR tmp1, [pDst], #4 + + LDR dst, [pDst] + LDMIA pSrcResidue!, {tmp1, tmp2} + PKHBT tmp3, tmp1, tmp2, LSL #16 + PKHTB tmp4, tmp2, tmp1, ASR #16 + UXTB16 tmp1, dst + UXTB16 tmp2, dst, ROR #8 + QADD16 tmp1, tmp1, tmp3 + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 + ORR tmp1, tmp1, tmp2, LSL #8 + STR tmp1, [pDst], dstStep + + BGT Yloop_pSrcResidueNotNull +pSrcResidueConditionEnd + + MOV return, #OMX_Sts_NoErr + + M_END + ENDIF ;// ARM1136JS + +;// *************************************************************************** +;// CortexA8 implementation +;// *************************************************************************** + END +;// *************************************************************************** +;// omxVCM4P2_MCReconBlock ends +;// *************************************************************************** diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s new file mode 100644 index 0000000..213444a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s @@ -0,0 +1,283 @@ +; ********** +; * +; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for DC/AC coefficient prediction +; * +; * +; * Function: omxVCM4P2_PredictReconCoefIntra +; * +; * Description: +; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior +; * to the function call, prediction direction (predDir) should be selected +; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the coefficient buffer which contains the +; * quantized coefficient residuals (PQF) of the current +; * block; must be aligned on a 4-byte boundary. The +; * output coefficients are saturated to the range +; * [-2048, 2047]. +; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned +; * on a 4-byte boundary. +; * [in] pPredBufCol pointer to the coefficient column buffer; must be +; * aligned on a 4-byte boundary. +; * [in] curQP quantization parameter of the current block. curQP may +; * equal to predQP especially when the current block and +; * the predictor block are in the same macroblock. +; * [in] predQP quantization parameter of the predictor block +; * [in] predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VIDEO_HORIZONTAL predict horizontally +; * OMX_VIDEO_VERTICAL predict vertically +; * [in] ACPredFlag a flag indicating if AC prediction should be +; * performed. It is equal to ac_pred_flag in the bit +; * stream syntax of MPEG-4 +; * [in] videoComp video component type (luminance, chrominance or +; * alpha) of the current block +; * [out] pSrcDst pointer to the coefficient buffer which contains +; * the quantized coefficients (QF) of the current +; * block +; * [out] pPredBufRow pointer to the updated coefficient row buffer +; * [out] pPredBufCol pointer to the updated coefficient column buffer +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - Bad arguments +; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. +; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, +; * predQP > 31, preDir exceeds [1,2]. +; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not +; * 4-byte aligned. +; * +; ********* + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IMPORT armVCM4P2_Reciprocal_QP_S32 + IMPORT armVCM4P2_Reciprocal_QP_S16 + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + + +;// Input Arguments + +pSrcDst RN 0 +pPredBufRow RN 1 +pPredBufCol RN 2 +curQP RN 3 +QP RN 3 +predQP RN 4 +predDir RN 5 +ACPredFlag RN 6 +videoComp RN 7 + +;// Local Variables + +temp2 RN 5 +negCurQP RN 7 +negdcScaler RN 7 +tempPred RN 8 + +dcScaler RN 4 +CoeffTable RN 9 +absCoeffDC RN 9 +temp3 RN 6 +absCoeffAC RN 6 + +shortVideoHeader RN 9 +predCoeffTable RN 10 +Count RN 10 +temp1 RN 12 +index RN 12 +Rem RN 14 +temp RN 11 +Return RN 0 + + + + M_START omxVCM4P2_PredictReconCoefIntra,r12 + + ;// Assigning pointers to Input arguments on Stack + + M_ARG predQPonStack,4 + M_ARG predDironStack,4 + M_ARG ACPredFlagonStack,4 + M_ARG videoComponStack,4 + + ;// DC Prediction + + M_LDR videoComp,videoComponStack ;// Load videoComp From Stack + + M_LDR predDir,predDironStack ;// Load Prediction direction + + ;// dcScaler Calculation + + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + +calDCVal + + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) + + CMP predDir,#2 ;// Check if the Prediction direction is vertical + + ;// Caulucate temp pred by performing Division + + LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer + LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer + + RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler + + MOV temp1,absCoeffDC ;// temp1=prediction coeff + CMP temp1,#0 + RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1) + + ADD temp,dcScaler,dcScaler + LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication + + SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler + ADD temp3,dcScaler,#1 + LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler + LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) + + MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler + + + LDRH temp,[pPredBufCol] + CMP Rem,temp3 + ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred + + + STRH temp,[pPredBufRow,#-16] + + LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] + M_LDR ACPredFlag,ACPredFlagonStack + ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred + SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] + + SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler + M_LDR predQP,predQPonStack + STRH temp,[pSrcDst] + CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not + STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol + + ;// AC Prediction + + + BNE Exit ;// If not set Exit + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) + MOV temp1,#4 + MUL temp1,curQP,temp1 + CMP predDir,#2 ;// Check the Prediction direction + RSB negCurQP,curQP,#0 + LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP + ADD curQP,curQP,#1 ;// curQP=curQP+1 + LSR curQP,curQP,#1 ;// curQP=round(curQP/2) + MOV Count,#2 ;// Initializing the Loop Count + BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal + + + +loop1 + ;// Calculate tempPred + + LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1=<i<=7 + MOV temp1,absCoeffAC + CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7 + RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC= abs(pPredBufRow[i]) + + SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufRow[i]*predQP + MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP + LSR tempPred,tempPred,#17 + + MLA Rem,negCurQP,tempPred,absCoeffAC ;// Rem=abs(pPredBufRow[i])-tempPred*curQP + LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i],1<=i<8 + + CMP Rem,curQP + ADDGE tempPred,#1 ;// if Rem>=round(curQP/2); tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i] + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] + STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp + ADD Count,Count,#2 ;// i=i+1 + CMP Count,#16 ;// compare if i=8 + BLT loop1 + B Exit ;// Branch to exit + +Horizontal + + MOV Count,#16 ;// Initializing i=8 + +loop2 + + LSR temp2,Count,#3 ;// temp2=i>>3 + + ;// Calculate tempPred + + LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3] + MOV temp1,absCoeffAC + CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7 + RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC=abs(pPredBufCol[i>>3]) + + SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP + MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP + LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP + + MLA Rem,negCurQP,tempPred,absCoeffAC + LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i] + + CMP Rem,curQP ;// Compare Rem with round(curQP/2) + ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2) + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value + STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp + ADD Count,Count,#16 ;// i=i+8 + CMP Count,#128 ;// compare i with 64 + BLT loop2 + + +Exit + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s new file mode 100644 index 0000000..c9591cb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s @@ -0,0 +1,141 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvInter_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvInter_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 + +;//Local Variables +Return RN 0 +Count RN 4 +tempVal21 RN 2 +tempVal43 RN 3 +QP1 RN 5 +X2 RN 6 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 7 + + M_START omxVCM4P2_QuantInvInter_I,r9 + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s new file mode 100644 index 0000000..6328e01 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s @@ -0,0 +1,188 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvIntra_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvIntra_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 +videoComp RN 2 +shortVideoHeader RN 3 + +;//Local Variables +Return RN 0 +dcScaler RN 4 +temp RN 12 +index RN 6 + +tempVal21 RN 4 +tempVal43 RN 5 +QP1 RN 6 +X2 RN 7 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 10 +Count RN 11 + + + + + M_START omxVCM4P2_QuantInvIntra_I,r11 + + + + ;// Perform Inverse Quantization for DC coefficient + + TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0 + MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8 + BNE calDCVal + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + + ;//M_CalDCScalar shortVideoHeader,videoComp, QP + +calDCVal + + LDRH temp,[pSrcDst] + SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory) + SSAT temp,#12,temp ;// Saturating to 12 bits + + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + SUB pSrcDst,pSrcDst,#128 + + ;// Storing the Inverse Quantized DC coefficient + + STRH temp,[pSrcDst],#2 + + + + MOV Return,#OMX_Sts_NoErr + + + + + M_END + ENDIF + END + |