summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h37
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h25
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s398
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c211
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s104
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c61
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c102
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c208
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s364
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s132
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s136
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s224
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s194
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s73
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s713
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s283
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s141
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s188
20 files changed, 3744 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..22115d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..d5f865c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ *
+ * File Name: armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100644
index 0000000..7801e57
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,75 @@
+; /**
+; *
+; * File Name: armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;// Local variables
+
+x0 RN 3
+x1 RN 4
+x2 RN 5
+x3 RN 6
+
+Count RN 14
+
+
+ M_START armVCM4P2_Clip8,r6
+
+
+ MOV Count,#8
+loop
+
+ LDMIA pSrc!,{x0, x1}
+ SUBS Count,Count, #1 ;// count down
+ LDMIA pSrc!,{x2, x3}
+ USAT16 x0, #8, x0 ;// clip two samples to [0,255]
+ USAT16 x1, #8, x1 ;// clip two samples to [0,255]
+ STRB x0, [pDst]
+ MOV x0, x0, LSR #16
+ STRB x0, [pDst,#1]
+ STRB x1, [pDst,#2]
+ MOV x1, x1, LSR #16
+ STRB x1, [pDst,#3]
+
+ USAT16 x2, #8, x2 ;// clip two samples to [0,255]
+ USAT16 x3, #8, x3 ;// clip two samples to [0,255]
+ STRB x2, [pDst,#4]
+ MOV x2, x2, LSR #16
+ STRB x2, [pDst,#5]
+ STRB x3, [pDst,#6]
+ MOV x3, x3, LSR #16
+ STRB x3, [pDst,#7]
+ ADD pDst,pDst,step ;// Increment pDst by step value
+
+ BGT loop ;// Continue loop until Count reaches 64
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100644
index 0000000..9e30900
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; *
+; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan
+; *
+; *
+; *
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+
+ftype RN 0
+temp3 RN 4
+temp RN 5
+Count RN 6
+Escape RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag RN 0
+storeLevel RN 1
+temp2 RN 4
+temp1 RN 5
+sign RN 5
+Last RN 7
+storeRun RN 14
+
+
+packRetIndex RN 5
+
+
+markerbit RN 5
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppOffset,4
+ M_ALLOC4 pLinkRegister,4
+
+ M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+ ;// get the table addresses from stack
+ M_ARG ppVlcTableL0L1,4
+ M_ARG ppLMAXTableL0L1,4
+ M_ARG ppRMAXTableL0L1,4
+ M_ARG ppZigzagTable,4
+
+ ;// Store ALL zeros at pDst
+
+ MOV temp1,#0 ;// Initialize Count to zero
+ MOV Last,#0
+ M_STR LR,pLinkRegister ;// Store Link Register on Stack
+ MOV temp2,#0
+ MOV LR,#0
+
+ ;// Initialize the Macro and Store all zeros to pDst
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT1 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT2 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ STM pDst!,{temp2,temp1,Last,LR}
+
+
+ SUB pDst,pDst,#128 ;// Restore pDst
+
+ ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+
+ M_BD_LOOK8 Escape,7 ;// Load Escape Value
+ LSR Escape,Escape,#25
+ CMP Escape,#3 ;// check for escape mode
+ MOVNE ftype,#0
+ BNE notEscapemode ;// Branch if not in Escape mode 3
+
+ M_BD_VSKIP8 #7,T1
+ CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode
+ BEQ endFillVLD
+
+ ;// Escape Mode 4
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+ M_BD_READ8 storeLevel,8,T1
+
+
+ ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+ TEQ storeLevel,#0
+ TEQNE storeLevel,#128
+ BEQ ExitError
+
+ ADD temp2,storeRun,Count
+ CMP temp2,#64
+ BGE ExitError ;// error if Count+storeRun >= 64
+
+
+ ;// Load address of zigzagTable
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits
+
+
+ ;// To Reflect Runlength
+
+ ADD Count,Count,storeRun
+ LDRB zigzag,[pZigzagTable,Count]
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag] ;// store Level
+
+ B ExitOk
+
+
+
+endFillVLD
+
+
+ ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#0
+ MOVEQ ftype,#1
+ BEQ notEscapemode
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#1
+ MOVEQ ftype,#3
+ MOVNE ftype,#2
+
+
+notEscapemode
+
+ ;// Load optimized packed VLC table with last=0 and Last=1
+
+ M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table
+
+
+ CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+ BGE EscapeMode3 ;// Else continue normal VLC Decoding
+
+ ;// Variable lengh decoding, "armUnPackVLC32"
+
+
+ M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2
+
+
+ LDR temp3,=0xFFF
+
+ CMP packRetIndex,temp3 ;// Check for invalid symbol
+ BEQ ExitError ;// if invalid symbol occurs exit with an error message
+
+ AND Last,packRetIndex,#2 ;// Get Last from packed Index
+
+
+
+
+ LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index
+ AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0
+
+
+ M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table
+
+
+ LSR storeLevel,storeLevel,#2 ;// Level value
+
+ CMP ftype,#1
+ BNE ftype2
+
+ ;// ftype==1; Escape mode =1
+
+
+ ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address
+ LDRB temp1,[temp1,storeRun]
+
+
+ ADD storeLevel,temp1,storeLevel
+
+ftype2
+
+ ;// ftype =2; Escape mode =2
+
+ M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table
+
+ CMP ftype,#2
+ BNE FillVLDL1
+
+ ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address
+ SUB temp2,storeLevel,#1
+ LDRB temp1,[temp1,temp2]
+
+
+ ADD storeRun,storeRun,#1
+ ADD storeRun,temp1
+
+FillVLDL1
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 sign,1,T1
+
+ CMP sign,#1
+ RSBEQ storeLevel,storeLevel,#0
+
+ ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp1,#64
+ BGE ExitError
+
+
+
+
+
+
+ ;// To Reflect Runlenght
+
+ ADD Count,Count,storeRun
+
+storeLevelL1
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#2 ;// Check if the Level val is Last non zero val
+ ADD Count,Count,#1
+ LSR Last,Last,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+
+
+ ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+
+ ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp2,#64
+ BGE ExitError
+
+ M_BD_READ8 markerbit,1,T1
+ TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero
+ BEQ ExitError
+
+ M_BD_READ16 storeLevel,12,T1
+
+ TST storeLevel,#0x800 ;// test if the level is negative
+ SUBNE storeLevel,storeLevel,#4096
+ CMP storeLevel,#0
+ CMPNE storeLevel,#-2048
+ BEQ ExitError ;// Exit with an error message if Level==0 or -2048
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 markerbit,1,T1
+
+
+ ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+
+
+
+ ;// To Reflect Run Length
+
+ ADD Count,Count,storeRun
+
+
+
+storeLevelLast
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#1
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+end
+
+ CMP Count,#64 ;//Run the Loop untill Count reaches 64
+
+ BLT getVLCbits
+
+
+ExitOk
+ ;// Exit When VLC Decoding is done Successfully
+
+ ;// Loading ppBitStream and pBitOffset from stack
+
+ CMP Last,#1
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOVNE Return,#OMX_Sts_Err
+ M_LDR LR,pLinkRegister ;// Load the Link Register Back
+ B exit2
+
+ExitError
+ ;// Exit When an Error occurs
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+ M_LDR LR,pLinkRegister
+ MOV Return,#OMX_Sts_Err
+
+exit2
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..ba4d058
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+// optimized Packed VLC table Entry Format
+// ---------------------------------------
+//
+// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+// +------------------------------------------------+
+// | Len | Run | Level |L | 1 |
+// +------------------------------------------------+
+// | Offset | 0 |
+// +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+// Len = Number of bits overread (0 to 7) 3 bits
+// Run = RunLength of the Symbol (0 to 63) 6 bits
+// Level = Level of the Symbol (0 to 31) 5 bits
+// L = Last Value of the Symbol (0 or 1) 1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+// Offset = Number of (16-bit) half words from the table
+// start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+ 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+ 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+ 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+ 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+ 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+ 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+ 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+ 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+ 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+ 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+ 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+ 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+ 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+ 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+ 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+ 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+ 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+ 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+ 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+ 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+ 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+ 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+ 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+ 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+ 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+ 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+ 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+ 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+ 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+ 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+ 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+ 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+ 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+ 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+ 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+ 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+ 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+ 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+ 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+ 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+ 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+ 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+ 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+ 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+ 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+ 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+ 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+ 0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] =
+{
+ 12, 6, 4, 3, 3, 3, 3, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0,
+ 3, 2, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] =
+{
+ 26, 10, 6, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0, 40, 1, 0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] =
+{
+ 27, 10, 5, 4, 3, 3, 3,
+ 3, 2, 2, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 8, 3, 2, 2, 2, 2, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+ 14, 9, 7, 3, 2, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+
+ 20, 6, 1, 0, 0, 0, 0, 0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100644
index 0000000..25cf8db
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ *
+ * File Name: armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+ /* * Table Entries contain Dc Scaler values
+ * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36
+ * = 2*i for i=5 to 8
+ * = i+8 for i=9 to 25
+ * = 2*i-16 for i=26 to 31
+ * = (i-32+13)/2 for i=37 to 59
+ * = i-6-32 for i=60 to 63
+ * = 255 for i=0 and i=32
+ */
+
+const OMX_U8 armVCM4P2_DCScaler[64]={
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa,
+ 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe,
+ 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+ * armVCM4P2_Reciprocal_QP_S16[0]= 0
+ */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+ 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+ 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+ 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+ 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+ 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+ 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+ 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+ 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+
+};
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+ * armVCM4P2_Reciprocal_QP_S32[0]= 0
+ */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+ 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+ 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+ 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+ 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+ 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+ 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+ 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+ 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100644
index 0000000..3f92d85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;//
+;// File Name: armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex block index indicating the component type and
+; * position as defined in subclause 6.1.3.8, of ISO/IEC
+; * 14496-2. Furthermore, indexes 6 to 9 indicate the
+; * alpha blocks spatially corresponding to luminance
+; * blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf pointer to the quantization parameter buffer
+; * [out]predQP quantization parameter of the predictor block
+; * [out]predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VC_HORIZONTAL predict horizontally
+; * OMX_VC_VERTICAL predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE omxVC_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;// Input Arguments
+BlockIndex RN 0
+pCoefBufRow RN 1
+pCoefBufCol RN 2
+predDir RN 3
+predQP RN 4
+pQpBuf RN 5
+
+;// Local Variables
+
+Return RN 0
+blockDCLeft RN 6
+blockDCTop RN 7
+blockDCTopLeft RN 8
+temp1 RN 9
+temp2 RN 14
+
+ M_START armVCM4P2_SetPredDir,r9
+
+ M_ARG ppredQP,4
+ M_ARG ppQpBuf,4
+
+ LDRH blockDCTopLeft,[pCoefBufRow,#-16]
+ LDRH blockDCLeft,[pCoefBufCol]
+
+ TEQ BlockIndex,#3
+ LDREQH blockDCTop,[pCoefBufCol,#-16]
+ LDRNEH blockDCTop,[pCoefBufRow]
+
+ SUBS temp1,blockDCLeft,blockDCTopLeft
+ RSBLT temp1,temp1,#0
+ SUBS temp2,blockDCTopLeft,blockDCTop
+ RSBLT temp2,temp2,#0
+
+ M_LDR pQpBuf,ppQpBuf
+ M_LDR predQP,ppredQP
+ CMP temp1,temp2
+ MOV temp2,#OMX_VC_VERTICAL
+ LDRLTB temp1,[pQpBuf,#1]
+ STRLT temp2,[predDir]
+ STRLT temp1,[predQP]
+ MOV temp2,#OMX_VC_HORIZONTAL
+ LDRGEB temp1,[pQpBuf]
+ STRGE temp2,[predDir]
+ MOV Return,#OMX_Sts_NoErr
+ STRGE temp1,[predQP]
+
+
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..ed17f9b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ *
+ * File Name: armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] =
+{
+ 0, 2, 16, 32, 18, 4, 6, 20,
+ 34, 48, 64, 50, 36, 22, 8, 10,
+ 24, 38, 52, 66, 80, 96, 82, 68,
+ 54, 40, 26, 12, 14, 28, 42, 56,
+ 70, 84, 98, 112, 114, 100, 86, 72,
+ 58, 44, 30, 46, 60, 74, 88, 102,
+ 116, 118, 104, 90, 76, 62, 78, 92,
+ 106, 120, 122, 104, 94, 110, 124, 126,
+
+ 0, 16, 32, 48, 2, 18, 4, 20,
+ 34, 50, 64, 80, 96, 112, 114, 98,
+ 82, 66, 52, 36, 6, 22, 8, 24,
+ 38, 54, 68, 84, 100, 116, 70, 86,
+ 102, 118, 40, 56, 10, 26, 12, 28,
+ 42, 58, 72, 88, 104, 120, 74, 90,
+ 106, 122, 44, 60, 14, 30, 46, 62,
+ 76, 92, 108, 124, 78, 94, 110, 126,
+
+ 0, 2, 4, 6, 16, 18, 32, 34,
+ 20, 22, 8, 10, 12, 14, 30, 28,
+ 26, 24, 38, 36, 48, 50, 64, 66,
+ 52, 54, 40, 42, 44, 46, 56, 58,
+ 60, 62, 68, 70, 80, 82, 96, 98,
+ 84, 86, 72, 74, 76, 78, 88, 90,
+ 92, 94, 100, 102, 112, 114, 116, 118,
+ 104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..b63d295
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter reconstruction
+ *
+ */
+
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7]
+ * [in] QP quantization parameter
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 indicates using quantization method defined in short
+ * video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the
+ * current byte in the bit stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the
+ * byte pointed by *ppBitStream
+ * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ * OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - At least one of the below case:
+ * - *pBitOffset exceeds [0,7], QP <= 0;
+ * - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 15 more elements of padding */
+ OMX_S16 tempBuf[79];
+ OMX_S16 *pTempBuf1;
+ OMXResult errorCode;
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf);
+
+
+ /* VLD and zigzag */
+ errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset,
+ pTempBuf1,shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvInter_I(
+ pTempBuf1,
+ QP);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..c609a60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,208 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra reconstruction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] step width of the destination plane
+ * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer
+ * [out] updated coefficient rwo buffer
+ * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer
+ * [out] updated coefficient column buffer
+ * [in] curQP quantization parameter of the macroblock which
+ * the current block belongs to
+ * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ * current block(QPc).
+ * Note, in case the corresponding block is out of VOP bound, the QP value will have
+ * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive
+ * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8,
+ * Figure 6-5 of ISO/IEC 14496-2.
+ * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP.
+ * This allows a mechanism to switch between two VLC
+ * for coding of Intra DC coefficients as per Table
+ * 6-21 of ISO/IEC 14496-2.
+ * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating
+ * if the ac coefficients of the first row or first
+ * column are differentially coded for intra coded
+ * macroblock.
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode,
+ * and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the block in the destination plane.
+ * pDst should be 16-byte aligned.
+ * [out] pCoefBufRow pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ * pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ * or
+ * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ * blockIndex greater than 5.
+ * or
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+ )
+{
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2;
+ OMX_INT predDir, predACDir;
+ OMX_INT predQP;
+ OMXVCM4P2VideoComponent videoComp;
+ OMXResult errorCode;
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+
+ /* Setting the AC prediction direction and prediction direction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pCoefBufRow,
+ pCoefBufCol,
+ &predDir,
+ &predQP,
+ pQPBuf);
+
+ predACDir = predDir;
+
+
+ if (ACPredFlag == 0)
+ {
+ predACDir = OMX_VC_NONE;
+ }
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+
+
+ /* VLD and zigzag */
+ if (intraDCVLC == 1)
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+ else
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+
+ /* AC DC prediction */
+ errorCode = omxVCM4P2_PredictReconCoefIntra(
+ pTempBuf1,
+ pCoefBufRow,
+ pCoefBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQP,
+ videoComp,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Placing the linear array into the destination plane and clipping
+ it to 0 to 255 */
+
+ armVCM4P2_Clip8(pTempBuf2,pDst,step);
+
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100644
index 0000000..a1861da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; *
+; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bit stream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within
+; * [0-7].
+; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the
+; * macroblocks specially at the left side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper-right side of the current macroblock
+; * respectively.
+; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4
+; * bit stream syntax
+; * [in] MBType the type of the current macroblock. If MBType
+; * is not equal to OMX_VC_INTER4V, the destination
+; * motion vector buffer is still filled with the
+; * same decoded vector.
+; * [out] ppBitStream *ppBitStream is updated after the block is decoded,
+; * so that it points to the current byte in the bit
+; * stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDstMVCurMB pointer to the motion vector buffer of the current
+; * macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; *
+; *
+; * OMX_Sts_Err - status error
+; *
+; *
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+ INCLUDE omxVC_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pSrcMVLeftMB RN 2
+pSrcMVUpperMB RN 3
+pSrcMVUpperRightMB RN 4
+pDstMVCurMB RN 5
+fcodeForward RN 6
+MBType RN 7
+
+;//Local Variables
+
+zero RN 4
+one RN 4
+scaleFactor RN 1
+
+
+Return RN 0
+
+VlcMVD RN 0
+index RN 4
+Count RN 7
+
+mvHorData RN 4
+mvHorResidual RN 0
+
+mvVerData RN 4
+mvVerResidual RN 0
+
+temp RN 1
+
+temp1 RN 3
+High RN 4
+Low RN 2
+Range RN 1
+
+BlkCount RN 14
+
+diffMVdx RN 0
+diffMVdy RN 1
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitCount RN 9
+RBitBuffer RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+ IMPORT armVCM4P2_aVlcMVD
+ IMPORT omxVCM4P2_FindMVpred
+
+ ;// Allocate stack memory
+
+ M_ALLOC4 ppDstMVCurMB,4
+ M_ALLOC4 pDstMVPredME,4
+ M_ALLOC4 pBlkCount,4
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppBitOffset,4
+ M_ALLOC4 ppSrcMVLeftMB,4
+ M_ALLOC4 ppSrcMVUpperMB,4
+
+ M_ALLOC4 pdiffMVdx,4
+ M_ALLOC4 pdiffMVdy,4
+ M_ALLOC4 pHigh,4
+
+
+
+
+ M_START omxVCM4P2_DecodePadMV_PVOP,r11
+
+ M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack
+ M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack
+ M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack
+ M_ARG MBTypeonStack,4 ;// pointer to MBType on stack
+
+
+
+
+
+ ;// Initializing the BitStream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR MBType,MBTypeonStack ;// Load MBType from stack
+ M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack
+ MOV zero,#0
+
+ TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA
+ TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q
+ STREQ zero,[pDstMVCurMB]
+ M_BD_INIT1 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ M_BD_INIT2 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOV BlkCount,#0
+ STREQ zero,[pDstMVCurMB,#4]
+
+ BEQ ExitOK
+
+ TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V
+ TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q
+ MOVEQ Count,#4
+
+ TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q
+ MOVEQ Count,#1
+
+ M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack
+
+ ;// Storing the values temporarily on stack
+
+ M_STR ppBitStream,pppBitStream
+ M_STR pBitOffset,ppBitOffset
+
+
+ SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1
+ MOV one,#1
+ M_STR pSrcMVLeftMB,ppSrcMVLeftMB
+ LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1)
+ M_STR pSrcMVUpperMB,ppSrcMVUpperMB
+ LSL scaleFactor,scaleFactor,#5
+ M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor
+
+ ;// VLD Decoding
+
+
+Loop
+
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table
+
+ ;// Horizontal Data and Residual calculation
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol
+
+ SUB mvHorData,index,#32 ;// mvHorData=index-32
+ MOV mvHorResidual,#1 ;// mvHorResidual=1
+ CMP fcodeForward,#1
+ TEQNE mvHorData,#0
+ MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData
+ BEQ VerticalData
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0
+
+ CMP mvHorData,#0
+ RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData)
+ SUB mvHorResidual,mvHorResidual,fcodeForward
+ SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+ ADD diffMVdx,diffMVdx,#1
+ RSBLT diffMVdx,diffMVdx,#0
+
+ ;// Vertical Data and Residual calculation
+
+VerticalData
+
+ M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs
+
+ SUB mvVerData,index,#32 ;// mvVerData=index-32
+ MOV mvVerResidual,#1
+ CMP fcodeForward,#1
+ TEQNE mvVerData,#0
+ MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+ BEQ FindMVPred
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+
+
+ CMP mvVerData,#0
+ RSBLT mvVerData,mvVerData,#0
+ SUB mvVerResidual,mvVerResidual,fcodeForward
+ SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+ ADD diffMVdy,diffMVdy,#1
+ RSBLT diffMVdy,diffMVdy,#0
+
+ ;//Calling the Function omxVCM4P2_FindMVpred
+
+FindMVPred
+
+ M_STR diffMVdy,pdiffMVdy
+ ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount]
+ M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred
+
+ MOV temp,#0
+ M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument
+ M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack
+
+ MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB
+ M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+ MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+ MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB
+ MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB
+ BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+ ;// Store Horizontal Motion Vector
+
+ M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack
+ M_LDR High,pHigh ;// High=32*scaleFactor
+ LSL temp1,BlkCount,#2 ;// temp=BlkCount*4
+ M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx
+
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount]
+
+
+ RSB Low,High,#0 ;// Low = -32*scaleFactor
+ ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+ ADD Range,High,High ;// Range=64*ScaleFactor
+ SUB High,High,#1 ;// High= 32*scaleFactor-1
+
+ CMP diffMVdx,Low ;// If diffMVdx<Low
+ ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range
+
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ ;// Store Vertical
+
+ ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2
+ M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+ ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1
+ ADD diffMVdx,temp,diffMVdx
+ CMP diffMVdx,Low
+ ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ CMP BlkCount,Count
+ M_LDR pSrcMVLeftMB,ppSrcMVLeftMB
+ M_LDR pSrcMVUpperMB,ppSrcMVUpperMB
+
+ BLT Loop ;// If BlkCount<Count Continue the Loop
+
+
+ ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+ ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3]
+
+ M_LDR MBType,MBTypeonStack
+
+ TEQ MBType,#OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q
+ LDREQ temp,[pDstMVCurMB]
+ M_LDR ppBitStream,pppBitStream
+ STREQ temp,[pDstMVCurMB,#4]
+
+ STREQ temp,[pDstMVCurMB,#8]
+ STREQ temp,[pDstMVCurMB,#12]
+
+
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro
+
+
+ MOV Return,#OMX_Sts_NoErr
+ B ExitOK
+
+ExitError
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOV Return,#OMX_Sts_Err
+
+ExitOK
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100644
index 0000000..c43b253
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+
+
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function
+ LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables
+ MOV Count,#0 ;// set start=0
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack
+
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100644
index 0000000..166729e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#0 ;// Set Start=0
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack
+
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function
+
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100644
index 0000000..d19cb13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries
+
+
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+videoComp RN 5
+;//Local Variables
+
+Return RN 0
+
+pDCLumaChromaIndex RN 4
+pDCChromaIndex RN 7
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+DCValueSize RN 6
+powOfSize RN 7
+temp1 RN 5
+
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+DCVal RN 14
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+ M_ALLOC4 pDCCoeff,4
+
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack
+ M_ARG videoComponstack,4 ;// Pointer to argument on stack
+
+
+ ;// Decode DC Coefficient
+
+
+ LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+ ;// Initializing the Bitstream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR videoComp,videoComponstack
+ M_BD_INIT1 T1, T2, T2
+ ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6
+ M_BD_INIT2 T1, T2, T2
+
+
+ M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+
+
+
+DecodeDC
+
+ CMP DCValueSize,#12
+ BGT ExitError
+
+ CMP DCValueSize,#0
+ MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0
+ BEQ ACDecode ;// Branch to perform AC Coeff Decoding
+
+ M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream
+
+
+ MOV powOfSize,#1
+ LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize)
+ CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2
+ ADDLT DCVal,DCVal,#1
+ SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+ ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+
+ CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit
+
+ BLE ACDecode
+
+ M_BD_READ8 temp1,1,T1
+ TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message
+ BEQ ExitError
+
+
+
+ ;// Decode AC Coefficient
+
+ACDecode
+
+ M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#1 ;// Set Start =1
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function
+
+ M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack
+ STRH DCVal,[pDst] ;// Store the DC Value
+ B ExitOK
+
+
+
+ExitError
+
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error
+ MOV Return,#OMX_Sts_Err ;// Exit with an Error Message
+ExitOK
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100644
index 0000000..a4bfa71
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;//
+;// File Name: omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_FindMVpred
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armVCCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+ M_TABLE armVCM4P2_pBlkIndexTable
+ DCD OMXVCBlk0, OMXVCBlk1
+ DCD OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+
+pSrcMVCurMB RN 0
+pSrcCandMV1 RN 1
+pSrcCandMV2 RN 2
+pSrcCandMV3 RN 3
+pDstMVPred RN 4
+pDstMVPredME RN 5
+iBlk RN 6
+
+pTable RN 4
+CandMV RN 12
+
+pCandMV1 RN 7
+pCandMV2 RN 8
+pCandMV3 RN 9
+
+CandMV1dx RN 0
+CandMV1dy RN 1
+CandMV2dx RN 2
+CandMV2dy RN 3
+CandMV3dx RN 10
+CandMV3dy RN 11
+
+temp RN 14
+
+zero RN 14
+return RN 0
+
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------
+
+ M_ALLOC4 MV, 4
+
+ ;// Function header
+ M_START omxVCM4P2_FindMVpred, r11
+
+ ;// Define stack arguments
+ M_ARG ppDstMVPred, 4
+ M_ARG ppDstMVPredME, 4
+ M_ARG Blk, 4
+
+ M_ADR CandMV, MV
+ MOV zero, #0
+ M_LDR iBlk, Blk
+
+ ;// Set the default value for these
+ ;// to be used if pSrcCandMV[1|2|3] == NULL
+ MOV pCandMV1, CandMV
+ MOV pCandMV2, CandMV
+ MOV pCandMV3, CandMV
+
+ STR zero, [CandMV]
+
+ ;// Branch to the case based on blk number
+ M_SWITCH iBlk
+ M_CASE OMXVCBlk0 ;// iBlk=0
+ M_CASE OMXVCBlk1 ;// iBlk=0
+ M_CASE OMXVCBlk2 ;// iBlk=0
+ M_CASE OMXVCBlk3 ;// iBlk=0
+ M_ENDSWITCH
+
+OMXVCBlk0
+ CMP pSrcCandMV1, #0
+ ADDNE pCandMV1, pSrcCandMV1, #4
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #8
+
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+ CMPEQ pSrcCandMV1, #0
+
+ MOVEQ pCandMV3, pCandMV2
+ MOVEQ pCandMV1, pCandMV2
+
+ CMP pSrcCandMV1, #0
+ CMPEQ pSrcCandMV2, #0
+
+ MOVEQ pCandMV1, pCandMV3
+ MOVEQ pCandMV2, pCandMV3
+
+ CMP pSrcCandMV2, #0
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk1
+ MOV pCandMV1, pSrcMVCurMB
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #12
+
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk2
+ CMP pSrcCandMV1, #0
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+ ADDNE pCandMV1, pSrcCandMV1, #12
+ B BlkEnd
+
+OMXVCBlk3
+ ADD pCandMV1, pSrcMVCurMB, #8
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+
+BlkEnd
+
+ ;// Using the transperancy info, zero
+ ;// out the candidate MV if neccesary
+ LDRSH CandMV1dx, [pCandMV1], #2
+ LDRSH CandMV2dx, [pCandMV2], #2
+ LDRSH CandMV3dx, [pCandMV3], #2
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPredME, ppDstMVPredME
+
+ LDRSH CandMV1dy, [pCandMV1]
+ LDRSH CandMV2dy, [pCandMV2]
+ LDRSH CandMV3dy, [pCandMV3]
+
+ CMP pDstMVPredME, #0
+
+ ;// Store the candidate MV's into the pDstMVPredME,
+ ;// these can be used in the fast algorithm if implemented
+
+ STRHNE CandMV1dx, [pDstMVPredME], #2
+ STRHNE CandMV1dy, [pDstMVPredME], #2
+ STRHNE CandMV2dx, [pDstMVPredME], #2
+ STRHNE CandMV2dy, [pDstMVPredME], #2
+ STRHNE CandMV3dx, [pDstMVPredME], #2
+ STRHNE CandMV3dy, [pDstMVPredME]
+
+ ; Find the median of the 3 candidate MV's
+ M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPred, ppDstMVPred
+
+ M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+
+ STRH CandMV3dx, [pDstMVPred], #2
+ STRH CandMV3dy, [pDstMVPred]
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS :LOR: CortexA8
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100644
index 0000000..bfeb540
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;//
+;// File Name: omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_IDCT8x8blk
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+ INCLUDE armCOMM_IDCT_s.h
+
+ IMPORT armCOMM_IDCTPreScale
+ ;//
+ ;// Function prototype
+ ;//
+ ;// OMXResult
+ ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+ ;// OMX_S16* pDst)
+ ;//
+
+ IF ARM1136JS :LOR: CortexA8
+ M_ALLOC4 ppDest, 4
+ M_ALLOC4 pStride, 4
+ M_ALLOC8 pBlk, 2*8*8
+ ENDIF
+
+ IF ARM1136JS
+ M_START omxVCM4P2_IDCT8x8blk, r11
+ ENDIF
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+;// Declare input registers
+pSrc RN 0
+pDst RN 1
+
+;// Declare other intermediate registers
+Result RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc RN 0 ;// source data buffer
+;// Stride RN 1 ;// destination stride in bytes
+;// pDest RN 2 ;// destination data buffer
+;// pScale RN 3 ;// pointer to scaling table
+
+pSrc RN 0
+Stride RN 1
+pDest RN 2
+pScale RN 3
+
+ MOV pDest, pDst
+ LDR pScale, =armCOMM_IDCTPreScale
+ M_IDCT s9, s16, 16
+ MOV Result, #OMX_Sts_NoErr
+ M_END
+ ENDIF
+ ;// ARM1136JS :LOR: CortexA8
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100644
index 0000000..20965bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,713 @@
+;//
+;//
+;// File Name: omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+ M_VARIANTS ARM1136JS
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+ IF ARM1136JS
+
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+ ;// Description:
+ ;//
+ ;// dest[j] = (x[j] + y[j] + round) >> 1, j=0..3
+ ;//
+ ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to
+ ;// each sum before dividing by two, if round is 1
+ ;//
+ ;// Syntax:
+ ;// M_UHADD8R $dest, $x, $y, $round, $mask
+ ;//
+ ;// Inputs:
+ ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0]
+ ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0]
+ ;// $round 0 if no rounding to be added, 1 if rounding to be done
+ ;// $mask some register set to 0x80808080
+ ;//
+ ;// Outputs:
+ ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0]
+
+ MACRO
+ M_UHADD8R $dest, $x, $y, $round, $mask
+ IF $round = 1
+ IF $dest /= $y
+ MVN $dest, $x
+ UHSUB8 $dest, $y, $dest
+ EOR $dest, $dest, $mask
+ ELSE
+ MVN $dest, $y
+ UHSUB8 $dest, $x, $dest
+ EOR $dest, $dest, $mask
+ ENDIF
+ ELSE
+ UHADD8 $dest, $x, $y
+ ENDIF
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Load 8 bytes from $pSrc (aligned or unaligned locations)
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;// $srcStep The stride on source
+ ;// $scratch A scratch register, used internally for temp calculations
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc In case the macro accepts stride, it increments the pSrc by
+ ;// that value, else unchanged
+ ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0]
+ ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4]
+ ;//
+ ;// Note: {$out0, $out1, $scratch} should be registers with ascending
+ ;// register numbering. In case offset is 0, $scratch is not modified.
+
+ MACRO
+ M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ IF $offset = 0
+ LDM $pSrc, {$out0, $out1}
+ ADD $pSrc, $pSrc, $srcStep
+ ELSE
+ LDM $pSrc, {$out0, $out1, $scratch}
+ ADD $pSrc, $pSrc, $srcStep
+
+ MOV $out0, $out0, LSR #8 * $offset
+ ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset))
+ MOV $out1, $out1, LSR #8 * $offset
+ ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset))
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Loads three words for X interpolation, update pointer to next row. For
+ ;// X interpolation, given a truncated-4byteAligned source pointer,
+ ;// invariably three continous words are required from there to get the
+ ;// nine bytes from the source pointer for filtering.
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;//
+ ;// $srcStep The stride on source
+ ;//
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc Incremented by $srcStep
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are outputs based on the $offset parameter.
+ ;// The outputs are specifically generated to be processed by
+ ;// the M_EXT_XINT macro. Following is the illustration to show
+ ;// how the nine bytes are spanned for different offsets from
+ ;// notTruncatedForAlignmentSourcePointer.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | |
+ ;// | 3 | -3 | xxx0 | | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// where the numbering (0-8) is to designate the 9 bytes from
+ ;// start of a particular row. The illustration doesn't take in
+ ;// account the positioning of bytes with in the word and the
+ ;// macro combination with M_EXT_XINT will work only in little
+ ;// endian environs
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ IF $offset /= 3
+ LDM $pSrc, {$word0, $word1, $word2}
+ ELSE
+ LDM $pSrc, {$word0, $word2, $word3}
+ ENDIF
+ ADD $pSrc, $pSrc, $srcStep
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Extract four registers of four pixels for X interpolation
+ ;//
+ ;// Syntax:
+ ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are inputs based on the $offset parameter.
+ ;// The inputs are specifically selected to be processed by
+ ;// the M_EXT_XINT macro.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy |
+ ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// Outputs:
+ ;// $word0, $word1, $word2, $word3
+ ;// Bytes from the original source pointer (not truncated for
+ ;// 4 byte alignment) as shown in the table.
+ ;// -------------------------------
+ ;// | word0 | word1 | word2 | word3 |
+ ;// |-------------------------------|
+ ;// | 0123 | 4567 | 1234 | 5678 |
+ ;// -------------------------------
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ IF $offset = 0
+ ; $word0 and $word1 are ok
+ ; $word2, $word3 are just 8 shifted versions
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #24
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ELIF $offset = 3
+ ; $word2 and $word3 are ok (taken care while loading itself)
+ ; set $word0 & $word1
+ MOV $word0, $word0, LSR #24
+ ORR $word0, $word0, $word2, LSL #8
+ MOV $word1, $word2, LSR #24
+ ORR $word1, $word1, $word3, LSL #8
+ ELSE
+ MOV $word0, $word0, LSR #8 * $offset
+ ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset))
+ MOV $word1, $word1, LSR #8 * $offset
+ ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset))
+
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1))
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Computes half-sum and xor of two inputs and puts them in the input
+ ;// registers in that order
+ ;//
+ ;// Syntax:
+ ;// M_HSUM_XOR $v0, $v1, $tmp
+ ;//
+ ;// Inputs:
+ ;// $v0 a, first input
+ ;// $v1 b, second input
+ ;// $tmp scratch register
+ ;//
+ ;// Outputs:
+ ;// $v0 (a + b)/2
+ ;// $v1 a ^ b
+
+ MACRO
+ M_HSUM_XOR $v0, $v1, $tmp
+ UHADD8 $tmp, $v0, $v1 ;// s0 = a + b
+ EOR $v1, $v0, $v1 ;// l0 = a ^ b
+ MOV $v0, $tmp ;// s0
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in
+ ;// mcReconBlock module. Very specific to the implementation of
+ ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and
+ ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are
+ ;// not significant and are used by the callee for row counter (y)
+ ;//
+ ;// Some points to note are:
+ ;// 1. Input is pair of pair-averages and Xors
+ ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another
+ ;// running average
+ ;// 3. Output is in the first argument
+ ;//
+ ;// Syntax:
+ ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ ;//
+ ;// Inputs:
+ ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged
+ ;// $lsb0 (a ^ b)
+ ;// $sum1 (c + d) >> 1. Not modified
+ ;// $lsb1 (c ^ d) Not modified
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;//
+ ;// Outputs:
+ ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding
+ ;// (a + b + c + d + 2) / 4 : If rounding
+
+ MACRO
+ M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ LCLS OP1
+ LCLS OP2
+ IF $rndVal = 0 ;// rounding case
+OP1 SETS "AND"
+OP2 SETS "ORR"
+ ELSE ;// Not rounding case
+OP1 SETS "ORR"
+OP2 SETS "AND"
+ ENDIF
+
+ LCLS lsb2
+ LCLS sum2
+ LCLS dest
+
+lsb2 SETS "tmp"
+sum2 SETS "$lsb0"
+dest SETS "$sum0"
+
+ $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1
+ EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1
+ $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0
+ AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask
+ UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2
+ UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros
+;// ***************************************************************************
+ ;// Description:
+ ;// Implement motion compensation routines using the named registers in
+ ;// callee function. Each of the following 4 implement the 4 predict type
+ ;// Each handles 8 cases each ie all the combinations of 4 types of source
+ ;// alignment offsets and 2 types of rounding flag
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;//
+ ;// Inputs:
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location.
+ ;//
+ ;// Outputs:
+ ;// Outputs come in the named registers of the callee functions
+ ;// The macro loads the data from the source pointer, processes it and
+ ;// stores in the destination pointer. Does the whole prediction cycle
+ ;// of Motion Compensation routine for a particular predictType
+ ;// After this only residue addition to the predicted values remain
+
+ MACRO
+ M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for IntegerPixel predictType. Both
+ ;// rounding cases are handled by the same code base. It is just a copy
+ ;// from source to destination. Two lines are done per loop to reduce
+ ;// stalls. Loop has been software pipelined as well for that purpose.
+ ;//
+ ;// M_LOAD_X loads a whole row in two registers and then they are stored
+
+CaseIntegerPixelRnd0Offset$offset
+CaseIntegerPixelRnd1Offset$offset
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+YloopIntegerPixelOffset$offset
+ SUBS y, y, #2
+ STRD tmp1, tmp2, [pDst], dstStep
+ STRD tmp3, tmp4, [pDst], dstStep
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ BGT YloopIntegerPixelOffset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelX predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. Loop has been software pipelined to reduce
+ ;// stalls.
+ ;//
+ ;// Filtering involves averaging a pixel with the next horizontal pixel.
+ ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with
+ ;// all pixels in a row with 4 pixel in each register and another 2
+ ;// registers with pixels corresponding to one horizontally shifted pixel
+ ;// corresponding to the initial row pixels. These are set of packed
+ ;// registers appropriate to do 4 lane SIMD.
+ ;// After that M_UHADD8R macro does the averaging taking care of the
+ ;// rounding as required
+
+CaseHalfPixelXRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+YloopHalfPixelXRnd$rndVal.Offset$offset
+ SUBS y, y, #1
+ M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4
+ M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp5, tmp6, [pDst], dstStep
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+ BGT YloopHalfPixelXRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical pixel.
+ ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in
+ ;// each register. These are set of packed registers appropriate to do
+ ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care
+ ;// of the rounding as required
+
+CaseHalfPixelYRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load
+YloopHalfPixelYRnd$rndVal.Offset$offset
+ SUBS y, y, #2
+ ;// Processing one line
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp1, tmp2, [pDst], dstStep
+ ;// Processing another line
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset
+ M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask
+ M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask
+ STRD tmp3, tmp4, [pDst], dstStep
+
+ BGT YloopHalfPixelYRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelXY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical, horizontal
+ ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT
+ ;// and M_EXT_XINT combination generates 4 registers with a row and its
+ ;// 1 pixel right shifted version, with 4 pixels in one register. Another
+ ;// call of that macro-combination gets another row. Then M_HSUM_XOR is
+ ;// called to get mutual half-sum and xor combinations of a row with its
+ ;// shifted version as they are inputs to the M_AVG4 macro which computes
+ ;// the 4 element average with rounding. Note that it is the half-sum/xor
+ ;// values that are preserved for next row as they can be re-used in the
+ ;// next call to the M_AVG4 and saves recomputation.
+ ;// Due to lack of register, the row counter and a masking value required
+ ;// in M_AVG4 are packed into a single register yMask where the last nibble
+ ;// holds the row counter values and rest holds the masking variable left
+ ;// shifted by 4
+
+CaseHalfPixelXYRnd$rndVal.Offset$offset
+ LDR yMask, =((0x01010101 << 4) + 8)
+
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp ;// s0, l0
+ M_HSUM_XOR t01, t11, tmp ;// s0', l0'
+
+YloopHalfPixelXYRnd$rndVal.Offset$offset
+ ;// Processsing one line
+ ;// t00, t01, t10, t11 required from previous loop
+ M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d'
+ SUB yMask, yMask, #2
+ M_EXT_XINT $offset, t20, t21, t30, t31
+ M_HSUM_XOR t20, t30, tmp ;// s1, l1
+ M_HSUM_XOR t21, t31, tmp ;// s1', l1'
+ M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1
+ M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1'
+ STRD t00, t01, [pDst], dstStep ;// store the average
+
+ ;// Processsing another line
+ ;// t20, t21, t30, t31 required from above
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ TST yMask, #7
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp
+ M_HSUM_XOR t01, t11, tmp
+ M_AVG4 t20, t30, t00, t10, $rndVal
+ M_AVG4 t21, t31, t01, t11, $rndVal
+ STRD t20, t21, [pDst], dstStep
+
+ BGT YloopHalfPixelXYRnd$rndVal.Offset$offset
+
+ IF $offset/=3 :LOR: $rndVal/=1
+ B SwitchPredictTypeEnd
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros end here
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal
+ ;// combination in the "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $rnd, $predictType
+ ;//
+ ;// Inputs:
+ ;// $rnd 0 for rounding, 1 for no rounding
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE"s for the "M_SWITCH" macro
+
+ MACRO
+ M_CASE_OFFSET $rnd, $predictType
+ M_CASE Case$predictType.Rnd$rnd.Offset0
+ M_CASE Case$predictType.Rnd$rnd.Offset1
+ M_CASE Case$predictType.Rnd$rnd.Offset2
+ M_CASE Case$predictType.Rnd$rnd.Offset3
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 2 kinds of rounding "cases" for each predictType in the
+ ;// "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE_OFFSET" macros
+
+ MACRO
+ M_CASE_MCRECONBLOCK $predictType
+ M_CASE_OFFSET 0, $predictType ;// 0 for rounding
+ M_CASE_OFFSET 1, $predictType ;// 1 for no rounding
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 8 kinds of rounding and offset combinations handling macros
+ ;// for the specified predictType. In case of "IntegerPixel" predictType,
+ ;// rounding is not required so same code segment handles both cases
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified
+ ;// predictType. Each
+ ;// M_MCRECONBLOCK_<predictType> $rnd, $offset
+ ;// is an code segment (starting with a label indicating the predictType,
+ ;// rounding and offset combination)
+ ;// Four calls of this macro with the 4 prediction modes populate all the 32
+ ;// handlers
+
+ MACRO
+ M_MCRECONBLOCK $predictType
+ M_MCRECONBLOCK_$predictType 0, 0
+ M_MCRECONBLOCK_$predictType 0, 1
+ M_MCRECONBLOCK_$predictType 0, 2
+ M_MCRECONBLOCK_$predictType 0, 3
+ IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference
+ M_MCRECONBLOCK_$predictType 1, 0
+ M_MCRECONBLOCK_$predictType 1, 1
+ M_MCRECONBLOCK_$predictType 1, 2
+ M_MCRECONBLOCK_$predictType 1, 3
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Input/Output Registers
+pSrc RN 0
+srcStep RN 1
+arg_pSrcResidue RN 2
+pSrcResidue RN 12
+pDst RN 3
+dstStep RN 2
+predictType RN 10
+rndVal RN 11
+mask RN 11
+
+;// Local Scratch Registers
+zero RN 12
+y RN 14
+
+tmp1 RN 4
+tmp2 RN 5
+tmp3 RN 6
+tmp4 RN 7
+tmp5 RN 8
+tmp6 RN 9
+tmp7 RN 10
+tmp8 RN 11
+tmp9 RN 12
+
+t00 RN 4
+t01 RN 5
+t10 RN 6
+t11 RN 7
+t20 RN 8
+t21 RN 9
+t30 RN 10
+t31 RN 11
+tmp RN 12
+
+yMask RN 14
+
+dst RN 1
+return RN 0
+
+ ;// Allocate memory on stack
+ M_ALLOC4 Stk_pDst, 4
+ M_ALLOC4 Stk_pSrcResidue, 4
+ ;// Function header
+ M_START omxVCM4P2_MCReconBlock, r11
+ ;// Define stack arguments
+ M_ARG Arg_dstStep, 4
+ M_ARG Arg_predictType, 4
+ M_ARG Arg_rndVal, 4
+ ;// Save on stack
+ M_STR pDst, Stk_pDst
+ M_STR arg_pSrcResidue, Stk_pSrcResidue
+ ;// Load argument from the stack
+ M_LDR dstStep, Arg_dstStep
+ M_LDR predictType, Arg_predictType
+ M_LDR rndVal, Arg_rndVal
+
+ MOV y, #8
+
+ AND tmp1, pSrc, #3
+ ORR predictType, tmp1, predictType, LSL #3
+ ORR predictType, predictType, rndVal, LSL #2
+ ;// Truncating source pointer to align to 4 byte location
+ BIC pSrc, pSrc, #3
+
+ ;// Implementation takes care of all combinations of different
+ ;// predictTypes, rounding cases and source pointer offsets to alignment
+ ;// of 4 bytes in different code bases unless one of these parameter wasn't
+ ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK
+ ;// macros branch into 8 M_CASE macros for all combinations of the 2
+ ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte
+ ;// alignment.
+ M_SWITCH predictType
+ M_CASE_MCRECONBLOCK IntegerPixel
+ M_CASE_MCRECONBLOCK HalfPixelX
+ M_CASE_MCRECONBLOCK HalfPixelY
+ M_CASE_MCRECONBLOCK HalfPixelXY
+ M_ENDSWITCH
+
+ ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8
+ ;// particular macros (4 in case of IntegerPixel as rounding makes no
+ ;// difference there) to generate the code for all cases of rounding and
+ ;// offsets. LTORG is used to segment the code as code size bloated beyond
+ ;// 4KB.
+ M_MCRECONBLOCK IntegerPixel
+ M_MCRECONBLOCK HalfPixelX
+ LTORG
+ M_MCRECONBLOCK HalfPixelY
+ M_MCRECONBLOCK HalfPixelXY
+SwitchPredictTypeEnd
+
+ ;// Residue Addition
+ ;// This is done in 2 lane SIMD though loads are further optimized and
+ ;// 4 bytes are loaded in case of destination buffer. Algorithmic
+ ;// details are in inlined comments
+ M_LDR pSrcResidue, Stk_pSrcResidue
+ CMP pSrcResidue, #0
+ BEQ pSrcResidueConditionEnd
+pSrcResidueNotNull
+ M_LDR pDst, Stk_pDst
+ MOV y, #8
+ SUB dstStep, dstStep, #4
+Yloop_pSrcResidueNotNull
+ SUBS y, y, #1
+ LDR dst, [pDst] ;// dst = [dcba]
+ LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA]
+ PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A]
+ PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B]
+ UXTB16 tmp1, dst ;// tmp1 = [0c0a]
+ UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b]
+ QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2)
+ ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba]
+ STR tmp1, [pDst], #4
+
+ LDR dst, [pDst]
+ LDMIA pSrcResidue!, {tmp1, tmp2}
+ PKHBT tmp3, tmp1, tmp2, LSL #16
+ PKHTB tmp4, tmp2, tmp1, ASR #16
+ UXTB16 tmp1, dst
+ UXTB16 tmp2, dst, ROR #8
+ QADD16 tmp1, tmp1, tmp3
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2
+ ORR tmp1, tmp1, tmp2, LSL #8
+ STR tmp1, [pDst], dstStep
+
+ BGT Yloop_pSrcResidueNotNull
+pSrcResidueConditionEnd
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+ END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100644
index 0000000..213444a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,283 @@
+; **********
+; *
+; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; *
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the coefficient buffer which contains the
+; * quantized coefficient residuals (PQF) of the current
+; * block; must be aligned on a 4-byte boundary. The
+; * output coefficients are saturated to the range
+; * [-2048, 2047].
+; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned
+; * on a 4-byte boundary.
+; * [in] pPredBufCol pointer to the coefficient column buffer; must be
+; * aligned on a 4-byte boundary.
+; * [in] curQP quantization parameter of the current block. curQP may
+; * equal to predQP especially when the current block and
+; * the predictor block are in the same macroblock.
+; * [in] predQP quantization parameter of the predictor block
+; * [in] predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VIDEO_HORIZONTAL predict horizontally
+; * OMX_VIDEO_VERTICAL predict vertically
+; * [in] ACPredFlag a flag indicating if AC prediction should be
+; * performed. It is equal to ac_pred_flag in the bit
+; * stream syntax of MPEG-4
+; * [in] videoComp video component type (luminance, chrominance or
+; * alpha) of the current block
+; * [out] pSrcDst pointer to the coefficient buffer which contains
+; * the quantized coefficients (QF) of the current
+; * block
+; * [out] pPredBufRow pointer to the updated coefficient row buffer
+; * [out] pPredBufCol pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
+; * predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
+; * 4-byte aligned.
+; *
+; *********
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IMPORT armVCM4P2_Reciprocal_QP_S32
+ IMPORT armVCM4P2_Reciprocal_QP_S16
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+
+;// Input Arguments
+
+pSrcDst RN 0
+pPredBufRow RN 1
+pPredBufCol RN 2
+curQP RN 3
+QP RN 3
+predQP RN 4
+predDir RN 5
+ACPredFlag RN 6
+videoComp RN 7
+
+;// Local Variables
+
+temp2 RN 5
+negCurQP RN 7
+negdcScaler RN 7
+tempPred RN 8
+
+dcScaler RN 4
+CoeffTable RN 9
+absCoeffDC RN 9
+temp3 RN 6
+absCoeffAC RN 6
+
+shortVideoHeader RN 9
+predCoeffTable RN 10
+Count RN 10
+temp1 RN 12
+index RN 12
+Rem RN 14
+temp RN 11
+Return RN 0
+
+
+
+ M_START omxVCM4P2_PredictReconCoefIntra,r12
+
+ ;// Assigning pointers to Input arguments on Stack
+
+ M_ARG predQPonStack,4
+ M_ARG predDironStack,4
+ M_ARG ACPredFlagonStack,4
+ M_ARG videoComponStack,4
+
+ ;// DC Prediction
+
+ M_LDR videoComp,videoComponStack ;// Load videoComp From Stack
+
+ M_LDR predDir,predDironStack ;// Load Prediction direction
+
+ ;// dcScaler Calculation
+
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+calDCVal
+
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63)
+
+ CMP predDir,#2 ;// Check if the Prediction direction is vertical
+
+ ;// Caulucate temp pred by performing Division
+
+ LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer
+ LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer
+
+ RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler
+
+ MOV temp1,absCoeffDC ;// temp1=prediction coeff
+ CMP temp1,#0
+ RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1)
+
+ ADD temp,dcScaler,dcScaler
+ LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication
+
+ SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler
+ ADD temp3,dcScaler,#1
+ LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler
+ LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2)
+
+ MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler
+
+
+ LDRH temp,[pPredBufCol]
+ CMP Rem,temp3
+ ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred
+
+
+ STRH temp,[pPredBufRow,#-16]
+
+ LDRH temp,[pSrcDst] ;// temp=pSrcDst[0]
+ M_LDR ACPredFlag,ACPredFlagonStack
+ ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred
+ SSAT16 temp,#12,temp ;// clip temp to [-2048,2047]
+
+ SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler
+ M_LDR predQP,predQPonStack
+ STRH temp,[pSrcDst]
+ CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not
+ STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol
+
+ ;// AC Prediction
+
+
+ BNE Exit ;// If not set Exit
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63)
+ MOV temp1,#4
+ MUL temp1,curQP,temp1
+ CMP predDir,#2 ;// Check the Prediction direction
+ RSB negCurQP,curQP,#0
+ LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP
+ ADD curQP,curQP,#1 ;// curQP=curQP+1
+ LSR curQP,curQP,#1 ;// curQP=round(curQP/2)
+ MOV Count,#2 ;// Initializing the Loop Count
+ BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal
+
+
+
+loop1
+ ;// Calculate tempPred
+
+ LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1=<i<=7
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC= abs(pPredBufRow[i])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufRow[i]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC ;// Rem=abs(pPredBufRow[i])-tempPred*curQP
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i],1<=i<8
+
+ CMP Rem,curQP
+ ADDGE tempPred,#1 ;// if Rem>=round(curQP/2); tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i]
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count]
+ STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp
+ ADD Count,Count,#2 ;// i=i+1
+ CMP Count,#16 ;// compare if i=8
+ BLT loop1
+ B Exit ;// Branch to exit
+
+Horizontal
+
+ MOV Count,#16 ;// Initializing i=8
+
+loop2
+
+ LSR temp2,Count,#3 ;// temp2=i>>3
+
+ ;// Calculate tempPred
+
+ LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3]
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC=abs(pPredBufCol[i>>3])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i]
+
+ CMP Rem,curQP ;// Compare Rem with round(curQP/2)
+ ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2)
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value
+ STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp
+ ADD Count,Count,#16 ;// i=i+8
+ CMP Count,#128 ;// compare i with 64
+ BLT loop2
+
+
+Exit
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100644
index 0000000..c9591cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,141 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+
+;//Local Variables
+Return RN 0
+Count RN 4
+tempVal21 RN 2
+tempVal43 RN 3
+QP1 RN 5
+X2 RN 6
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 7
+
+ M_START omxVCM4P2_QuantInvInter_I,r9
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100644
index 0000000..6328e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,188 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+videoComp RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+Return RN 0
+dcScaler RN 4
+temp RN 12
+index RN 6
+
+tempVal21 RN 4
+tempVal43 RN 5
+QP1 RN 6
+X2 RN 7
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 10
+Count RN 11
+
+
+
+
+ M_START omxVCM4P2_QuantInvIntra_I,r11
+
+
+
+ ;// Perform Inverse Quantization for DC coefficient
+
+ TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0
+ MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8
+ BNE calDCVal
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+ ;//M_CalDCScalar shortVideoHeader,videoComp, QP
+
+calDCVal
+
+ LDRH temp,[pSrcDst]
+ SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+ SSAT temp,#12,temp ;// Saturating to 12 bits
+
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ SUB pSrcDst,pSrcDst,#128
+
+ ;// Storing the Inverse Quantized DC coefficient
+
+ STRH temp,[pSrcDst],#2
+
+
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+
+
+ M_END
+ ENDIF
+ END
+